Пример #1
0
/* {{{ php_robbe_globals_construct */
static void php_friso_globals_construct(zend_friso_globals *friso_globals)
{
    friso_globals->friso = friso_new();
    friso_globals->config = friso_new_config();
    friso_init_from_ifile(friso_globals->friso,
                          friso_globals->config, INI_STR("friso.ini_file"));
}
Пример #2
0
int init_friso(fstring path) {
	friso = friso_new();
	config = friso_new_config();

	if ( friso_init_from_ifile(friso, config, path) != 1 ) {
		printf("fail to initialize friso and config.");
		return -1;
	}

	//set the task.
	task = friso_new_task();
	return 0;
}
Пример #3
0
int main(int argc, char **argv)
{
    lex_entry_t e;
    int lex = __LEX_CJK_WORDS__;
    char _line[__LENGTH__];
    clock_t s_time, e_time;
    friso_t friso;

    s_time = clock();

    friso = friso_new();
    friso->dic = friso_dic_new();
    //__CJK_WORDS__
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-main.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-admin.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-chars.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-cn-mz.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-cn-place.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-company.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-festival.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-flname.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-food.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-lang.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-nation.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-net.lex", __LENGTH__ );
    friso_dic_load( friso, __LEX_CJK_WORDS__, "../dict/lex-org.lex", __LENGTH__ );

    //__CJK_UNITS__
    friso_dic_load( friso, __LEX_CJK_UNITS__, "../dict/lex-units.lex", __LENGTH__ );
    //__MIX_WORDS__
    friso_dic_load( friso, __LEX_ECM_WORDS__, "../dict/lex-mixed.lex", __LENGTH__ );
    //__CN_LNAME__
    friso_dic_load( friso, __LEX_CN_LNAME__, "../dict/lex-lname.lex", __LENGTH__ );
    //__CN_SNAME__
    friso_dic_load( friso, __LEX_CN_SNAME__, "../dict/lex-sname.lex", __LENGTH__ );
    //__CN_DNAME1__
    friso_dic_load( friso, __LEX_CN_DNAME1__, "../dict/lex-dname-1.lex", __LENGTH__ );
    //__CN_DNAME2__
    friso_dic_load( friso, __LEX_CN_DNAME2__, "../dict/lex-dname-2.lex", __LENGTH__ );
    //__CN_LNA__
    friso_dic_load( friso, __LEX_CN_LNA__, "../dict/lex-lna.lex", __LENGTH__ );

    e_time = clock();

    printf("Done, cost: %f sec, size=%d\n", ( double ) ( e_time - s_time ) / CLOCKS_PER_SEC, \
        friso_all_dic_size( friso->dic ) );

    while ( 1 ) {
    printf("friso-%d>> ", lex);
    scanf("%s", _line);
    if ( strcmp( _line, "quit" ) == 0 ) {
        break;
    }  else if ( strcmp( _line, "help" ) == 0 ) {
        ___PRINT_HELP_INFO___
    } else if ( strcmp( _line, "#set" ) == 0 ) {
        printf("lex_t>> ");
        scanf("%d", &lex);
    } else {
        s_time = clock();
        e = friso_dic_get( friso->dic, lex, _line );
        e_time = clock();
        if ( e != NULL ) {
        printf("word=%s, syn=%s, fre=%d, cost:%fsec\n", 
            e->word, e->syn==NULL? "NULL" : (char *)e->syn->items[0], e->fre, 
            (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
        } else {
        printf("%s was not found.\n", _line);
        }
    }
    }

    //friso_dic_free( friso->dic );
    friso_free(friso);

    return 0;
}
Пример #4
0
//creat a new friso with initialize item from a configuration file.
__EXTERN_API__ friso_t friso_new_from_ifile( string __ifile ) {
	
	FILE *__stream;
	char __chars__[256], __key__[128], *__line__, __lexi__[128];
	uint_t i, t, __hit__ = 0, __length__;
	friso_t e = friso_new();

	if ( ( __stream = fopen( __ifile, "rb" ) ) != NULL ) {

		//initialize the entry with the value from the ifile.
		while ( ( __line__ = file_get_line( __chars__, __stream ) ) != NULL ) {
			//comments filter.
			if ( __line__[0] == '#' ) continue;
			if ( __line__[0] == '\t' ) continue; 
			if ( __line__[0] == ' ' || __line__[0] == '\0' ) continue;

			__length__ = strlen( __line__ );
			for ( i = 0; i < __length__; i++ ) {
				if ( __line__[i] == ' ' || __line__[i] == '\t' || __line__[i] == '=' ) break;
				__key__[i] = __line__[i];
			}
			__key__[i] = '\0';

			//position the euqals char '='.
			if ( __line__[i] == ' ' || __line__[i] == '\t' ) {
				for ( i++ ; i < __length__; i++ ) 
					if ( __line__[i] == '=' ) break; 
			} 

			//clear the left whitespace of the value.
			for ( i++; i < __length__ 
						&& ( __line__[i] == ' ' || __line__[i] == '\t' ); i++ );
			for ( t = 0; i < __length__; i++, t++ ) {
				if ( __line__[i] == ' ' || __line__[i] == '\t' ) break;
				__line__[t] = __line__[i]; 
			} 
			__line__[t] = '\0';

			//printf("key=%s, value=%s\n", __key__, __line__ );
			if ( strcmp( __key__, "friso.lex_dir" ) == 0 ) {
				/*
				 * here copy the value of the lex_dir.
				 *		cause we need the value of friso.max_len to finish all
				 *	the work when we call function friso_dic_load_from_ifile to
				 *	initiliaze the friso dictionary.
				 */
				if ( __hit__ == 0 ) {
					__hit__ = t;
					for ( t = 0; t < __hit__; t++ ) {
						__lexi__[t] = __line__[t];
					}
					__lexi__[t] = '\0';
				} 
			} else if ( strcmp( __key__, "friso.max_len" ) == 0 ) {
				e->max_len = atoi( __line__ );
			} else if ( strcmp( __key__, "friso.r_name" ) == 0 ) {
				e->r_name = atoi( __line__ );
			} else if ( strcmp( __key__, "friso.mix_len" ) == 0 ) {
				e->mix_len = atoi( __line__ );
			} else if ( strcmp( __key__, "friso.lna_len" ) == 0 ) {
				e->lna_len = atoi( __line__ );
			} else if ( strcmp( __key__, "friso.nthreshold" ) == 0 ) {
				e->nthreshold = atoi( __line__ );
			} else if ( strcmp( __key__, "friso.mode" ) == 0 ) {
				e->mode = ( friso_mode_t ) atoi( __line__ );
			}
		}

		/*
		 * intialize the friso dictionary here.
		 *		use the setting from the ifile parse above.
		 *	we copied the value in the __lexi__.
		 */
		if ( __hit__ != 0 ) {
			e->dic = friso_dic_new();
			friso_dic_load_from_ifile( e->dic, __lexi__, e->max_len * 3 );
		}

		fclose( __stream );
	}

	return e;
}
Пример #5
0
int main(int argc, char **argv) 
{

    clock_t s_time, e_time;
    char line[__INPUT_LENGTH__] = {0};
    int i;
    fstring __path__ = NULL, mode = NULL;

    friso_t friso;
    friso_config_t config;
    friso_task_t task;

    //get the lexicon directory
    for ( i = 0; i < argc; i++ ) {
        if ( strcasecmp( "-init", argv[i] ) == 0 ) {
            __path__ = argv[i+1];
        }
    }
    if ( __path__ == NULL ) {
        println("Usage: friso -init lexicon path");
        exit(0);
    }

    s_time = clock();

    //initialize
    friso = friso_new();
    config = friso_new_config();
    /*friso_dic_t dic = friso_dic_new();
      friso_dic_load_from_ifile( dic, __path__, __LENGTH__ );
      friso_set_dic( friso, dic );
      friso_set_mode( friso, __FRISO_COMPLEX_MODE__ );*/
    if ( friso_init_from_ifile(friso, config, __path__) != 1 ) {
        printf("fail to initialize friso and config.");
        goto err;
    }

    switch ( config->mode ) 
    {
        case __FRISO_SIMPLE_MODE__:
            mode = "Simple";
            break;
        case __FRISO_COMPLEX_MODE__:
            mode = "Complex";
            break;
        case __FRISO_DETECT_MODE__:
            mode = "Detect";
            break;
    }

    //friso_set_mode( config, __FRISO_DETECT_MODE__ );
    //printf("clr_stw=%d\n", friso->clr_stw);
    //printf("match c++?%d\n", friso_dic_match( friso->dic, __LEX_ENPUN_WORDS__, "c++" ));
    //printf("match(研究)?%d\n", friso_dic_match( friso->dic, __LEX_CJK_WORDS__, "研究"));

    e_time = clock();

    printf("Initialized in %fsec\n", (double) ( e_time - s_time ) / CLOCKS_PER_SEC );
    printf("Mode: %s\n", mode);
    printf("+-Version: %s (%s)\n", friso_version(), friso->charset == FRISO_UTF8 ? "UTF-8" : "GBK" );
    ___ABOUT___;

    //set the task.
    task = friso_new_task();

    while ( 1 ) 
    {
        print("friso>> ");
        getLine( stdin, line );
        //exit the programe
        if ( strcasecmp( line, "quit" ) == 0 ) {
            ___EXIT_INFO___
        }

        //for ( i = 0; i < 1000000; i++ ) {
        //set the task text.
        friso_set_text( task, line );
        println("分词结果:");

        s_time = clock();
        while ( ( config->next_token( friso, config, task ) ) != NULL ) 
        {
            //printf("%s[%d, %d, %d] ", task->token->word, 
            //        task->token->offset, task->token->length, task->token->rlen );
            printf("%s ", task->token->word );
        }
        //}
        e_time = clock();
        printf("\nDone, cost < %fsec\n", ( (double)(e_time - s_time) ) / CLOCKS_PER_SEC );

    }

    friso_free_task( task );

    //error block.
err:
    friso_free_config(config);
    friso_free(friso);
    

    return 0;
}