int InitPinyin( ChewingData *pgdata, const char *prefix ) { char filename[ PATH_MAX ]; int i; FILE *fd; int ret; sprintf( filename, "%s" PLAT_SEPARATOR "%s", prefix, PINYIN_TAB_NAME ); fd = fopen(filename, "r"); if ( ! fd ) return 0; ret = fscanf( fd, "%d", &pgdata->static_data.HANYU_INITIALS ); if ( ret != 1 ) { return 0; } ++pgdata->static_data.HANYU_INITIALS; pgdata->static_data.hanyuInitialsMap = ALC( keymap, pgdata->static_data.HANYU_INITIALS ); for ( i = 0; i < pgdata->static_data.HANYU_INITIALS - 1; i++ ) { ret = fscanf( fd, "%s %s", pgdata->static_data.hanyuInitialsMap[ i ].pinyin, pgdata->static_data.hanyuInitialsMap[ i ].zuin ); if ( ret != 2 ) { return 0; } } ret = fscanf( fd, "%d", &pgdata->static_data.HANYU_FINALS ); if ( ret != 1 ) { return 0; } ++pgdata->static_data.HANYU_FINALS; pgdata->static_data.hanyuFinalsMap = ALC( keymap, pgdata->static_data.HANYU_FINALS ); for ( i = 0; i < pgdata->static_data.HANYU_FINALS - 1; i++ ) { ret = fscanf( fd, "%s %s", pgdata->static_data.hanyuFinalsMap[ i ].pinyin, pgdata->static_data.hanyuFinalsMap[ i ].zuin ); if ( ret != 2 ) { return 0; } } fclose( fd ); return 1; }
/* when record==NULL then output the "link list" */ static void SaveRecord( int *record, int nInter, TreeDataType *ptd ) { RecordNode *now, *p, *pre; pre = NULL; for ( p = ptd->phList; p; ) { /* if 'p' contains 'record', then discard 'record'. */ if ( IsRecContain( p->arrIndex, p->nInter, record, nInter, ptd ) ) return; /* if 'record' contains 'p', then discard 'p' * -- We must deal with the linked list. */ if ( IsRecContain( record, nInter, p->arrIndex, p->nInter, ptd ) ) { RecordNode *tp = p; if ( pre ) pre->next = p->next; else ptd->phList = ptd->phList->next; p = p->next; free( tp->arrIndex ); free( tp ); } else pre = p, p = p->next; } now = ALC( RecordNode, 1 ); assert( now ); now->next = ptd->phList; now->arrIndex = ALC( int, nInter ); assert( now->arrIndex ); now->nInter = nInter; memcpy( now->arrIndex, record, nInter * sizeof( int ) ); ptd->phList = now; }
/* find the maximum frequency of the same phrase */ static int LoadMaxFreq( const uint16 phoneSeq[], int len ) { int pho_id; Phrase *phrase = ALC( Phrase, 1 ); int maxFreq = FREQ_INIT_VALUE; UserPhraseData *uphrase; pho_id = TreeFindPhrase( 0, len - 1, phoneSeq ); if ( pho_id != -1 ) { GetPhraseFirst( phrase, pho_id ); do { if ( phrase->freq > maxFreq ) maxFreq = phrase->freq; } while( GetPhraseNext( phrase ) ); } free( phrase ); uphrase = UserGetPhraseFirst( phoneSeq ); while ( uphrase ) { if ( uphrase->userfreq > maxFreq ) maxFreq = uphrase->userfreq; uphrase = UserGetPhraseNext( phoneSeq ); } return maxFreq; }
/* find the maximum frequency of the same phrase */ static int LoadMaxFreq( ChewingData *pgdata, const uint16_t phoneSeq[], int len ) { const TreeType *tree_pos; Phrase *phrase = ALC( Phrase, 1 ); int maxFreq = FREQ_INIT_VALUE; UserPhraseData *uphrase; tree_pos = TreeFindPhrase( pgdata, 0, len - 1, phoneSeq ); if ( tree_pos ) { GetPhraseFirst( pgdata, phrase, tree_pos ); do { if ( phrase->freq > maxFreq ) maxFreq = phrase->freq; } while( GetVocabNext( pgdata, phrase ) ); } free( phrase ); uphrase = UserGetPhraseFirst( pgdata, phoneSeq ); while ( uphrase ) { if ( uphrase->userfreq > maxFreq ) maxFreq = uphrase->userfreq; uphrase = UserGetPhraseNext( pgdata, phoneSeq ); } return maxFreq; }
CHEWING_API uint16 *chewing_get_phoneSeq( ChewingContext *ctx ) { uint16 *seq; seq = ALC( uint16, ctx->data->nPhoneSeq ); memcpy( seq, ctx->data->phoneSeq, sizeof(uint16)*ctx->data->nPhoneSeq ); return seq; }
int InitTree( ChewingData *pgdata, const char * prefix ) { #ifdef USE_BINARY_DATA char filename[ PATH_MAX ]; size_t len; size_t offset; len = snprintf( filename, sizeof( filename ), "%s" PLAT_SEPARATOR "%s", prefix, PHONE_TREE_FILE ); if ( len + 1 > sizeof( filename ) ) return -1; plat_mmap_set_invalid( &pgdata->static_data.tree_mmap ); pgdata->static_data.tree_size = plat_mmap_create( &pgdata->static_data.tree_mmap, filename, FLAG_ATTRIBUTE_READ ); if ( pgdata->static_data.tree_size <= 0 ) return -1; offset = 0; pgdata->static_data.tree = (TreeType *) plat_mmap_set_view( &pgdata->static_data.tree_mmap, &offset, &pgdata->static_data.tree_size ); if ( !pgdata->static_data.tree ) return -1; return 0; #else char filename[ PATH_MAX ]; int len; FILE *infile = NULL; int i; len = snprintf( filename, sizeof( filename ), "%s" PLAT_SEPARATOR "%s", prefix, PHONE_TREE_FILE ); if ( len + 1 > sizeof( filename ) ) return -1; infile = fopen( filename, "r" ); if ( !infile ) return -1; pgdata->static_data.tree = ALC( TreeType, TREE_SIZE ); if ( !pgdata->static_data.tree ) { fclose( infile ); return -1; } /* XXX: What happen if infile contains more than TREE_SIZE data? */ for ( i = 0; i < TREE_SIZE; i++ ) { if ( fscanf( infile, "%hu%d%d%d", &pgdata->static_data.tree[ i ].phone_id, &pgdata->static_data.tree[ i ].phrase_id, &pgdata->static_data.tree[ i ].child_begin, &pgdata->static_data.tree[ i ].child_end ) != 4 ) break; } fclose( infile ); return 0; #endif }
CHEWING_API ChewingContext *chewing_new() { ChewingContext *ctx; ChewingData *internal_data = ALC( ChewingData, 1 ); ChewingOutput *internal_output = ALC( ChewingOutput, 1 ); ctx = ALC( ChewingContext, 1 ); if ( ctx && internal_data && internal_output ) { ctx->data = internal_data; ctx->output = internal_output; ctx->cand_no = 0; /* handle configuration */ chewing_Reset( ctx ); return ctx; } else { return NULL; } }
static ChewingData * allocate_ChewingData() { static const int DEFAULT_SELKEY[] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' }; ChewingData *data = ALC( ChewingData, 1 ); if ( data ) { data->config.candPerPage = MAX_SELKEY; data->config.maxChiSymbolLen = MAX_CHI_SYMBOL_LEN; data->logger = NullLogger; memcpy( data->config.selKey, DEFAULT_SELKEY, sizeof( data->config.selKey ) ); } return data; }
/* * phrase is said to satisfy a choose interval if * their intersections are the same */ static int CheckChoose( ChewingData *pgdata, int ph_id, int from, int to, Phrase **pp_phr, char selectStr[][ MAX_PHONE_SEQ_LEN * MAX_UTF8_SIZE + 1 ], IntervalType selectInterval[], int nSelect ) { IntervalType inte, c; int chno, len; Phrase *phrase = ALC( Phrase, 1 ); assert( phrase ); inte.from = from; inte.to = to; *pp_phr = NULL; /* if there exist one phrase satisfied all selectStr then return 1, else return 0. */ GetPhraseFirst( pgdata, phrase, ph_id ); do { for ( chno = 0; chno < nSelect; chno++ ) { c = selectInterval[ chno ]; if ( IsContain( inte, c ) ) { /* find a phrase of ph_id where the text contains * 'selectStr[chno]' test if not ok then return 0, if ok * then continue to test */ len = c.to - c.from; if ( memcmp( ueStrSeek( phrase->phrase, c.from - from ), selectStr[ chno ], ueStrNBytes( selectStr[ chno ], len ) ) ) break; } else if ( IsIntersect( inte, selectInterval[ chno ] ) ) { free( phrase ); return 0; } } if ( chno == nSelect ) { *pp_phr = phrase; return 1; } } while ( GetPhraseNext( pgdata, phrase ) ); free( phrase ); return 0; }
/* find the maximum frequency of the same phrase */ static int LoadMaxFreq(ChewingData *pgdata, const uint16_t phoneSeq[], int len) { const TreeType *tree_pos; Phrase *phrase = ALC(Phrase, 1); int maxFreq = FREQ_INIT_VALUE; int max_userphrase_freq; int ret; tree_pos = TreeFindPhrase(pgdata, 0, len - 1, phoneSeq); if (tree_pos) { GetPhraseFirst(pgdata, phrase, tree_pos); do { if (phrase->freq > maxFreq) maxFreq = phrase->freq; } while (GetVocabNext(pgdata, phrase)); } free(phrase); assert(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]); ret = UserBindPhone(pgdata, STMT_USERPHRASE_GET_MAX_FREQ, phoneSeq, len); if (ret != SQLITE_OK) { LOG_ERROR("UserBindPhone returns %d", ret); return maxFreq; } ret = sqlite3_step(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]); if (ret != SQLITE_ROW) return maxFreq; ret = sqlite3_reset(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]); if (ret != SQLITE_OK) { LOG_ERROR("sqlite3_reset returns %d", ret); return maxFreq; } max_userphrase_freq = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ], SQL_STMT_USERPHRASE[STMT_USERPHRASE_GET_MAX_FREQ].column [COLUMN_USERPHRASE_USER_FREQ]); if (max_userphrase_freq > maxFreq) maxFreq = max_userphrase_freq; return maxFreq; }
/* load the orginal frequency from the static dict */ static int LoadOriginalFreq(ChewingData *pgdata, const uint16_t phoneSeq[], const char wordSeq[], int len) { const TreeType *tree_pos; int retval; Phrase *phrase = ALC(Phrase, 1); tree_pos = TreeFindPhrase(pgdata, 0, len - 1, phoneSeq); if (tree_pos) { GetPhraseFirst(pgdata, phrase, tree_pos); do { /* find the same phrase */ if (!strcmp(phrase->phrase, wordSeq)) { retval = phrase->freq; free(phrase); return retval; } } while (GetVocabNext(pgdata, phrase)); } free(phrase); return FREQ_INIT_VALUE; }
/* load the orginal frequency from the static dict */ static int LoadOriginalFreq( const uint16 phoneSeq[], const char wordSeq[], int len ) { int pho_id; int retval; Phrase *phrase = ALC( Phrase, 1 ); pho_id = TreeFindPhrase( 0, len - 1, phoneSeq ); if ( pho_id != -1 ) { GetPhraseFirst( phrase, pho_id ); do { /* find the same phrase */ if ( ! strcmp( phrase->phrase, wordSeq ) ) { retval = phrase->freq; free( phrase ); return retval; } } while ( GetPhraseNext( phrase ) ); } free( phrase ); return FREQ_INIT_VALUE; }
CHEWING_API ChewingContext *chewing_new() { ChewingContext *ctx; int ret; char search_path[PATH_MAX]; char path[PATH_MAX]; ctx = ALC( ChewingContext, 1 ); if ( !ctx ) goto error; ctx->output = ALC ( ChewingOutput, 1 ); if ( !ctx->output ) goto error; ctx->data = allocate_ChewingData(); if ( !ctx->data ) goto error; chewing_Reset( ctx ); ret = get_search_path( search_path, sizeof( search_path ) ); if ( ret ) goto error; ret = find_path_by_files( search_path, DICT_FILES, path, sizeof( path ) ); if ( ret ) goto error; ret = InitDict( ctx->data, path ); if ( ret ) goto error; ret = InitTree( ctx->data, path ); if ( ret ) goto error; ret = InitHash( ctx->data ); if ( !ret ) goto error; ctx->cand_no = 0; ret = find_path_by_files( search_path, SYMBOL_TABLE_FILES, path, sizeof( path ) ); if ( ret ) goto error; ret = InitSymbolTable( ctx->data, path ); if ( ret ) goto error; ret = find_path_by_files( search_path, EASY_SYMBOL_FILES, path, sizeof( path ) ); if ( ret ) goto error; ret = InitEasySymbolInput( ctx->data, path ); if ( ret ) goto error; ret = find_path_by_files( search_path, PINYIN_FILES, path, sizeof( path ) ); if ( ret ) goto error; ret = InitPinyin( ctx->data, path ); if ( !ret ) goto error; return ctx; error: chewing_delete( ctx ); return NULL; }
static int CheckUserChoose( ChewingData *pgdata, uint16_t *new_phoneSeq, int from , int to, Phrase **pp_phr, char selectStr[][ MAX_PHONE_SEQ_LEN * MAX_UTF8_SIZE + 1 ], IntervalType selectInterval[], int nSelect ) { IntervalType inte, c; int chno, len; int user_alloc; UserPhraseData *pUserPhraseData; Phrase *p_phr = ALC( Phrase, 1 ); assert( p_phr ); inte.from = from; inte.to = to; *pp_phr = NULL; /* pass 1 * if these exist one selected interval which is not contained by inte * but has intersection with inte, then inte is an unacceptable interval */ for ( chno = 0; chno < nSelect; chno++ ) { c = selectInterval[ chno ]; if ( IsIntersect( inte, c ) && ! IsContain( inte, c ) ) { free( p_phr ); return 0; } } /* pass 2 * if there exist one phrase satisfied all selectStr then return 1, else return 0. * also store the phrase with highest freq */ pUserPhraseData = UserGetPhraseFirst( pgdata, new_phoneSeq ); p_phr->freq = -1; do { for ( chno = 0; chno < nSelect; chno++ ) { c = selectInterval[ chno ]; if ( IsContain( inte, c ) ) { /* * find a phrase of ph_id where the text contains * 'selectStr[chno]' test if not ok then return 0, * if ok then continue to test. */ len = c.to - c.from; if ( memcmp( ueStrSeek( pUserPhraseData->wordSeq, c.from - from ), selectStr[ chno ], ueStrNBytes( selectStr[ chno ], len ) ) ) break; } } if ( chno == nSelect ) { /* save phrase data to "pp_phr" */ if ( pUserPhraseData->userfreq > p_phr->freq ) { if ( ( user_alloc = ( to - from ) ) > 0 ) { ueStrNCpy( p_phr->phrase, pUserPhraseData->wordSeq, user_alloc, 1); } p_phr->freq = pUserPhraseData->userfreq; *pp_phr = p_phr; } } } while ( ( pUserPhraseData = UserGetPhraseNext( pgdata, new_phoneSeq ) ) != NULL ); if ( p_phr->freq != -1 ) return 1; free( p_phr ); return 0; }