/* {{{ php_robbe_globals_construct */ static void php_friso_globals_construct(zend_friso_globals *friso_globals) { friso_globals->friso = friso_new(); friso_globals->config = friso_new_config(); friso_init_from_ifile(friso_globals->friso, friso_globals->config, INI_STR("friso.ini_file")); }
int init_friso(fstring path) { friso = friso_new(); config = friso_new_config(); if ( friso_init_from_ifile(friso, config, path) != 1 ) { printf("fail to initialize friso and config."); return -1; } //set the task. task = friso_new_task(); return 0; }
int main(int argc, char **argv) { lex_entry_t e; int lex = __LEX_CJK_WORDS__; char _line[__LENGTH__]; clock_t s_time, e_time; friso_t friso; s_time = clock(); friso = friso_new(); friso->dic = friso_dic_new(); //__CJK_WORDS__ friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-main.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-admin.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-chars.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-cn-mz.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-cn-place.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-company.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-festival.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-flname.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-food.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-lang.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-nation.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-net.lex", __LENGTH__ ); friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-org.lex", __LENGTH__ ); //__CJK_UNITS__ friso_dic_load( friso, __LEX_CJK_UNITS__, "../dict/lex-units.lex", __LENGTH__ ); //__MIX_WORDS__ friso_dic_load( friso, __LEX_ECM_WORDS__, "../dict/lex-mixed.lex", __LENGTH__ ); //__CN_LNAME__ friso_dic_load( friso, __LEX_CN_LNAME__, "../dict/lex-lname.lex", __LENGTH__ ); //__CN_SNAME__ friso_dic_load( friso, __LEX_CN_SNAME__, "../dict/lex-sname.lex", __LENGTH__ ); //__CN_DNAME1__ friso_dic_load( friso, __LEX_CN_DNAME1__, "../dict/lex-dname-1.lex", __LENGTH__ ); //__CN_DNAME2__ friso_dic_load( friso, __LEX_CN_DNAME2__, "../dict/lex-dname-2.lex", __LENGTH__ ); //__CN_LNA__ friso_dic_load( friso, __LEX_CN_LNA__, "../dict/lex-lna.lex", __LENGTH__ ); e_time = clock(); printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \ friso_all_dic_size( friso->dic ) ); while ( 1 ) { printf("friso-%d>> ", lex); scanf("%s", _line); if ( strcmp( _line, "quit" ) == 0 ) { break; } else if ( strcmp( _line, "help" ) == 0 ) { ___PRINT_HELP_INFO___ } else if ( strcmp( _line, "#set" ) == 0 ) { printf("lex_t>> "); scanf("%d", &lex); } else { s_time = clock(); e = friso_dic_get( friso->dic, lex, _line ); e_time = clock(); if ( e != NULL ) { printf("word=%s, syn=%s, fre=%d, cost:%fsec\n", e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre, (double) ( e_time - s_time ) / CLOCKS_PER_SEC ); } else { printf("%s was not found.\n", _line); } } } //friso_dic_free( friso->dic ); friso_free(friso); return 0; }
//creat a new friso with initialize item from a configuration file. __EXTERN_API__ friso_t friso_new_from_ifile( string __ifile ) { FILE *__stream; char __chars__[256], __key__[128], *__line__, __lexi__[128]; uint_t i, t, __hit__ = 0, __length__; friso_t e = friso_new(); if ( ( __stream = fopen( __ifile, "rb" ) ) != NULL ) { //initialize the entry with the value from the ifile. while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL ) { //comments filter. if ( __line__[0] == '#' ) continue; if ( __line__[0] == '\t' ) continue; if ( __line__[0] == ' ' || __line__[0] == '\0' ) continue; __length__ = strlen( __line__ ); for ( i = 0; i < __length__; i++ ) { if ( __line__[i] == ' ' || __line__[i] == '\t' || __line__[i] == '=' ) break; __key__[i] = __line__[i]; } __key__[i] = '\0'; //position the euqals char '='. if ( __line__[i] == ' ' || __line__[i] == '\t' ) { for ( i++ ; i < __length__; i++ ) if ( __line__[i] == '=' ) break; } //clear the left whitespace of the value. for ( i++; i < __length__ && ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ ); for ( t = 0; i < __length__; i++, t++ ) { if ( __line__[i] == ' ' || __line__[i] == '\t' ) break; __line__[t] = __line__[i]; } __line__[t] = '\0'; //printf("key=%s, value=%s\n", __key__, __line__ ); if ( strcmp( __key__, "friso.lex_dir" ) == 0 ) { /* * here copy the value of the lex_dir. * cause we need the value of friso.max_len to finish all * the work when we call function friso_dic_load_from_ifile to * initiliaze the friso dictionary. */ if ( __hit__ == 0 ) { __hit__ = t; for ( t = 0; t < __hit__; t++ ) { __lexi__[t] = __line__[t]; } __lexi__[t] = '\0'; } } else if ( strcmp( __key__, "friso.max_len" ) == 0 ) { e->max_len = atoi( __line__ ); } else if ( strcmp( __key__, "friso.r_name" ) == 0 ) { e->r_name = atoi( __line__ ); } else if ( strcmp( __key__, "friso.mix_len" ) == 0 ) { e->mix_len = atoi( __line__ ); } else if ( strcmp( __key__, "friso.lna_len" ) == 0 ) { e->lna_len = atoi( __line__ ); } else if ( strcmp( __key__, "friso.nthreshold" ) == 0 ) { e->nthreshold = atoi( __line__ ); } else if ( strcmp( __key__, "friso.mode" ) == 0 ) { e->mode = ( friso_mode_t ) atoi( __line__ ); } } /* * intialize the friso dictionary here. * use the setting from the ifile parse above. * we copied the value in the __lexi__. */ if ( __hit__ != 0 ) { e->dic = friso_dic_new(); friso_dic_load_from_ifile( e->dic, __lexi__, e->max_len * 3 ); } fclose( __stream ); } return e; }
int main(int argc, char **argv) { clock_t s_time, e_time; char line[__INPUT_LENGTH__] = {0}; int i; fstring __path__ = NULL, mode = NULL; friso_t friso; friso_config_t config; friso_task_t task; //get the lexicon directory for ( i = 0; i < argc; i++ ) { if ( strcasecmp( "-init", argv[i] ) == 0 ) { __path__ = argv[i+1]; } } if ( __path__ == NULL ) { println("Usage: friso -init lexicon path"); exit(0); } s_time = clock(); //initialize friso = friso_new(); config = friso_new_config(); /*friso_dic_t dic = friso_dic_new(); friso_dic_load_from_ifile( dic, __path__, __LENGTH__ ); friso_set_dic( friso, dic ); friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/ if ( friso_init_from_ifile(friso, config, __path__) != 1 ) { printf("fail to initialize friso and config."); goto err; } switch ( config->mode ) { case __FRISO_SIMPLE_MODE__: mode = "Simple"; break; case __FRISO_COMPLEX_MODE__: mode = "Complex"; break; case __FRISO_DETECT_MODE__: mode = "Detect"; break; } //friso_set_mode( config, __FRISO_DETECT_MODE__ ); //printf("clr_stw=%d\n", friso->clr_stw); //printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" )); //printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究")); e_time = clock(); printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC ); printf("Mode: %s\n", mode); printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" ); ___ABOUT___; //set the task. task = friso_new_task(); while ( 1 ) { print("friso>> "); getLine( stdin, line ); //exit the programe if ( strcasecmp( line, "quit" ) == 0 ) { ___EXIT_INFO___ } //for ( i = 0; i < 1000000; i++ ) { //set the task text. friso_set_text( task, line ); println("分词结果:"); s_time = clock(); while ( ( config->next_token( friso, config, task ) ) != NULL ) { //printf("%s[%d, %d, %d] ", task->token->word, // task->token->offset, task->token->length, task->token->rlen ); printf("%s ", task->token->word ); } //} e_time = clock(); printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC ); } friso_free_task( task ); //error block. err: friso_free_config(config); friso_free(friso); return 0; }