Ejemplo n.º 1
0
int main(void) {

    string_t temp;

    string_t string1 = string_new("hello world");
    string_t string2 = string_new("another");
    string_t string3 = string_new("a third");

    printf("Testing new()...\n");
    assert(strcmp("hello world", string1.bytes) == 0);


    printf("Testing at()...\n");

    assert(*string_at(string1, 4) == 'o');
    assert(*string_at(string1, -1) == 'd');

    assert(string_at(string1, 100) == NULL);
    assert(string_at(string1, -100) == NULL);


    printf("Testing cmp()...\n");

    assert(string_cmp(string1, string1) == 0);
    assert(string_cmp(string1, string2) > 0);
    assert(string_cmp(string2, string1) < 0);


    printf("Testing eq()...\n");

    assert(string_eq(string1, string1));
    assert(! string_eq(string1, string2));


    printf("Testing cat()...\n");

    temp = string_cat(string1, string2);
    assert(temp.length == string1.length + string2.length);
    assert(strncmp(string1.bytes, temp.bytes, string1.length) == 0);
    assert(strcmp(string2.bytes, temp.bytes + string1.length) == 0);
    string_free(temp);


    printf("Testing slice()...\n");

    temp = string_slice(string1, 1, 6);
    assert(strcmp(temp.bytes, "ello"));
    string_free(temp);

    temp = string_slice(string1, 1, -1);
    assert(strcmp(temp.bytes, "ello worl") == 0);
    string_free(temp);

    temp = string_slice(string1, -5, -2);
    assert(strcmp(temp.bytes, "wor") == 0);
    string_free(temp);

    temp = string_slice(string1, -100, 100);
    assert(strcmp(temp.bytes, "hello world") == 0);
    string_free(temp);


    printf("Testing split()...\n");

    string_split_t split = string_split_init(string1, 'o');

    temp = string_split_next(&split);
    assert(strcmp(temp.bytes, "hell") == 0);
    string_free(temp);

    temp = string_split_next(&split);
    assert(strcmp(temp.bytes, " w") == 0);
    string_free(temp);

    temp = string_split_next(&split);
    assert(strcmp(temp.bytes, "rld") == 0);
    string_free(temp);

    temp = string_split_next(&split);
    assert(! temp.bytes);
    string_free(temp);


    printf("Testing join()...\n");

    string_t parts[] = {string1, string2, string3};
    temp = string_join(parts, 3, ' ');
    assert(strcmp(temp.bytes, "hello world another a third") == 0);
    string_free(temp);

    // string_t[] parts = {s1, s2, s3};
    // int joined = join(parts, sizeof(parts) / sizeof(*parts));

    string_free(string1);
    string_free(string2);
    string_free(string3);


    printf("All tests passed.\n");
    return 0;
}
Ejemplo n.º 2
0
/**
 * load all the valid wors from a specified lexicon file . 
 *
 * @param dic        friso dictionary instance (A hash array)
 * @param lex        the lexicon type
 * @param lex_file    the path of the lexicon file
 * @param length    the maximum length of the word item
 */
FRISO_API void friso_dic_load( 
        friso_t friso,
        friso_config_t config,
        friso_lex_t lex,
        fstring lex_file,
        uint_t length ) 
{

    FILE * _stream;
    char __char[1024], _buffer[512];
    fstring _line;
    string_split_entry sse;

    fstring _word;
    char _sbuffer[512];
    fstring _syn;
    friso_array_t sywords;
    uint_t _fre;

    if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL ) 
    {
        while ( ( _line = file_get_line( __char, _stream ) ) != NULL ) 
        {
            //clear up the notes
            //make sure the length of the line is greater than 1.
            //like the single '#' mark in stopwords dictionary.
            if ( _line[0] == '#' && strlen(_line) > 1 ) continue;

            //handle the stopwords.
            if ( lex == __LEX_STOPWORDS__ )
            {
                //clean the chinese words that its length is greater than max length.
                if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue;
                friso_dic_add( friso->dic, __LEX_STOPWORDS__, 
                        string_copy_heap( _line, strlen(_line) ), NULL ); 
                continue;
            }

            //split the fstring with '/'.
            string_split_reset( &sse, "/", _line); 
            if ( string_split_next( &sse, _buffer ) == NULL ) continue;

            //1. get the word.
            _word = string_copy_heap( _buffer, strlen(_buffer) );

            if ( string_split_next( &sse, _buffer ) == NULL ) 
            {
                //normal lexicon type, 
                //add them to the dictionary directly
                friso_dic_add( friso->dic, lex, _word, NULL ); 
                continue;
            }

            /*
             * filter out the words that its length is larger
             *     than the specified limit.
             * but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__
             *     and __LEX_CEM_WORDS__.
             */
            if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ )
                    && strlen( _word ) > length ) 
            {
                FRISO_FREE(_word);
                continue;
            }

            //2. get the synonyms words.
            _syn = NULL;
            if ( strcmp( _buffer, "null" ) != 0 )
                _syn = string_copy( _buffer, _sbuffer, strlen(_buffer) );

            //3. get the word frequency if it available.
            _fre = 0;
            if ( string_split_next( &sse, _buffer ) != NULL )
                _fre = atoi( _buffer );

            /**
             * Here:
             * split the synonyms words with mark "," 
             *     and put them in a array list if the synonyms is not NULL
             */
            sywords = NULL;
            if ( config->add_syn && _syn != NULL ) 
            {
                string_split_reset( &sse, ",", _sbuffer );
                sywords = new_array_list_with_opacity(5);
                while ( string_split_next( &sse, _buffer ) != NULL ) 
                {
                    if ( strlen(_buffer) > length ) continue;
                    array_list_add( sywords, 
                            string_copy_heap(_buffer, strlen(_buffer)) );
                }
                sywords = array_list_trim( sywords );
            }

            //4. add the word item
            friso_dic_add_with_fre( 
                    friso->dic, lex, _word, sywords, _fre );
        } 

        fclose( _stream );
    } else {
        printf("Warning: Fail to open lexicon file %s\n", lex_file);
    } 
}