int main(void) { string_t temp; string_t string1 = string_new("hello world"); string_t string2 = string_new("another"); string_t string3 = string_new("a third"); printf("Testing new()...\n"); assert(strcmp("hello world", string1.bytes) == 0); printf("Testing at()...\n"); assert(*string_at(string1, 4) == 'o'); assert(*string_at(string1, -1) == 'd'); assert(string_at(string1, 100) == NULL); assert(string_at(string1, -100) == NULL); printf("Testing cmp()...\n"); assert(string_cmp(string1, string1) == 0); assert(string_cmp(string1, string2) > 0); assert(string_cmp(string2, string1) < 0); printf("Testing eq()...\n"); assert(string_eq(string1, string1)); assert(! string_eq(string1, string2)); printf("Testing cat()...\n"); temp = string_cat(string1, string2); assert(temp.length == string1.length + string2.length); assert(strncmp(string1.bytes, temp.bytes, string1.length) == 0); assert(strcmp(string2.bytes, temp.bytes + string1.length) == 0); string_free(temp); printf("Testing slice()...\n"); temp = string_slice(string1, 1, 6); assert(strcmp(temp.bytes, "ello")); string_free(temp); temp = string_slice(string1, 1, -1); assert(strcmp(temp.bytes, "ello worl") == 0); string_free(temp); temp = string_slice(string1, -5, -2); assert(strcmp(temp.bytes, "wor") == 0); string_free(temp); temp = string_slice(string1, -100, 100); assert(strcmp(temp.bytes, "hello world") == 0); string_free(temp); printf("Testing split()...\n"); string_split_t split = string_split_init(string1, 'o'); temp = string_split_next(&split); assert(strcmp(temp.bytes, "hell") == 0); string_free(temp); temp = string_split_next(&split); assert(strcmp(temp.bytes, " w") == 0); string_free(temp); temp = string_split_next(&split); assert(strcmp(temp.bytes, "rld") == 0); string_free(temp); temp = string_split_next(&split); assert(! temp.bytes); string_free(temp); printf("Testing join()...\n"); string_t parts[] = {string1, string2, string3}; temp = string_join(parts, 3, ' '); assert(strcmp(temp.bytes, "hello world another a third") == 0); string_free(temp); // string_t[] parts = {s1, s2, s3}; // int joined = join(parts, sizeof(parts) / sizeof(*parts)); string_free(string1); string_free(string2); string_free(string3); printf("All tests passed.\n"); return 0; }
/** * load all the valid wors from a specified lexicon file . * * @param dic friso dictionary instance (A hash array) * @param lex the lexicon type * @param lex_file the path of the lexicon file * @param length the maximum length of the word item */ FRISO_API void friso_dic_load( friso_t friso, friso_config_t config, friso_lex_t lex, fstring lex_file, uint_t length ) { FILE * _stream; char __char[1024], _buffer[512]; fstring _line; string_split_entry sse; fstring _word; char _sbuffer[512]; fstring _syn; friso_array_t sywords; uint_t _fre; if ( ( _stream = fopen( lex_file, "rb" ) ) != NULL ) { while ( ( _line = file_get_line( __char, _stream ) ) != NULL ) { //clear up the notes //make sure the length of the line is greater than 1. //like the single '#' mark in stopwords dictionary. if ( _line[0] == '#' && strlen(_line) > 1 ) continue; //handle the stopwords. if ( lex == __LEX_STOPWORDS__ ) { //clean the chinese words that its length is greater than max length. if ( ((int)_line[0]) < 0 && strlen( _line ) > length ) continue; friso_dic_add( friso->dic, __LEX_STOPWORDS__, string_copy_heap( _line, strlen(_line) ), NULL ); continue; } //split the fstring with '/'. string_split_reset( &sse, "/", _line); if ( string_split_next( &sse, _buffer ) == NULL ) continue; //1. get the word. _word = string_copy_heap( _buffer, strlen(_buffer) ); if ( string_split_next( &sse, _buffer ) == NULL ) { //normal lexicon type, //add them to the dictionary directly friso_dic_add( friso->dic, lex, _word, NULL ); continue; } /* * filter out the words that its length is larger * than the specified limit. * but not for __LEX_ECM_WORDS__ and english __LEX_STOPWORDS__ * and __LEX_CEM_WORDS__. */ if ( ! ( lex == __LEX_ECM_WORDS__ || lex == __LEX_CEM_WORDS__ ) && strlen( _word ) > length ) { FRISO_FREE(_word); continue; } //2. get the synonyms words. _syn = NULL; if ( strcmp( _buffer, "null" ) != 0 ) _syn = string_copy( _buffer, _sbuffer, strlen(_buffer) ); //3. get the word frequency if it available. _fre = 0; if ( string_split_next( &sse, _buffer ) != NULL ) _fre = atoi( _buffer ); /** * Here: * split the synonyms words with mark "," * and put them in a array list if the synonyms is not NULL */ sywords = NULL; if ( config->add_syn && _syn != NULL ) { string_split_reset( &sse, ",", _sbuffer ); sywords = new_array_list_with_opacity(5); while ( string_split_next( &sse, _buffer ) != NULL ) { if ( strlen(_buffer) > length ) continue; array_list_add( sywords, string_copy_heap(_buffer, strlen(_buffer)) ); } sywords = array_list_trim( sywords ); } //4. add the word item friso_dic_add_with_fre( friso->dic, lex, _word, sywords, _fre ); } fclose( _stream ); } else { printf("Warning: Fail to open lexicon file %s\n", lex_file); } }