/* find the maximum frequency of the same phrase */ static int LoadMaxFreq( ChewingData *pgdata, const uint16_t phoneSeq[], int len ) { const TreeType *tree_pos; Phrase *phrase = ALC( Phrase, 1 ); int maxFreq = FREQ_INIT_VALUE; UserPhraseData *uphrase; tree_pos = TreeFindPhrase( pgdata, 0, len - 1, phoneSeq ); if ( tree_pos ) { GetPhraseFirst( pgdata, phrase, tree_pos ); do { if ( phrase->freq > maxFreq ) maxFreq = phrase->freq; } while( GetVocabNext( pgdata, phrase ) ); } free( phrase ); uphrase = UserGetPhraseFirst( pgdata, phoneSeq ); while ( uphrase ) { if ( uphrase->userfreq > maxFreq ) maxFreq = uphrase->userfreq; uphrase = UserGetPhraseNext( pgdata, phoneSeq ); } return maxFreq; }
/* find the maximum frequency of the same phrase */ static int LoadMaxFreq( const uint16 phoneSeq[], int len ) { int pho_id; Phrase *phrase = ALC( Phrase, 1 ); int maxFreq = FREQ_INIT_VALUE; UserPhraseData *uphrase; pho_id = TreeFindPhrase( 0, len - 1, phoneSeq ); if ( pho_id != -1 ) { GetPhraseFirst( phrase, pho_id ); do { if ( phrase->freq > maxFreq ) maxFreq = phrase->freq; } while( GetPhraseNext( phrase ) ); } free( phrase ); uphrase = UserGetPhraseFirst( phoneSeq ); while ( uphrase ) { if ( uphrase->userfreq > maxFreq ) maxFreq = uphrase->userfreq; uphrase = UserGetPhraseNext( phoneSeq ); } return maxFreq; }
/** @brief Loading all possible phrases after the cursor from long to short into AvailInfo structure.*/ static void SetAvailInfo( AvailInfo *pai, const uint16 phoneSeq[], int nPhoneSeq, int begin, const int bSymbolArrBrkpt[] ) { int end, pho_id; int diff; #ifndef FAFT_CHEWING uint16 userPhoneSeq[ MAX_PHONE_SEQ_LEN ]; #endif pai->nAvail = 0; for ( end = begin; end < nPhoneSeq; end++ ) { diff = end - begin; if ( diff > 0 && bSymbolArrBrkpt[ end ] ) break; pho_id = TreeFindPhrase( begin, end, phoneSeq ); if ( pho_id != -1 ) { /* save it! */ pai->avail[ pai->nAvail ].len = diff + 1; pai->avail[ pai->nAvail ].id = pho_id; pai->nAvail++; } #ifndef FAFT_CHEWING else { memcpy( userPhoneSeq, &phoneSeq[ begin ], sizeof( uint16 ) * ( diff + 1 ) ) ; userPhoneSeq[ diff + 1 ] = 0; if ( UserGetPhraseFirst( userPhoneSeq ) ) { /* save it! */ pai->avail[ pai->nAvail ].len = diff + 1; pai->avail[ pai->nAvail ].id = -1; pai->nAvail++; } else { pai->avail[ pai->nAvail ].len = 0; pai->avail[ pai->nAvail ].id = -1; } } #endif } }
static void FindInterval( ChewingData *pgdata, TreeDataType *ptd ) { int end, begin, pho_id; Phrase *p_phrase, *puserphrase, *pdictphrase; UsedPhraseMode i_used_phrase; uint16_t new_phoneSeq[ MAX_PHONE_SEQ_LEN ]; for ( begin = 0; begin < pgdata->nPhoneSeq; begin++ ) { for ( end = begin; end < pgdata->nPhoneSeq; end++ ) { if ( ! CheckBreakpoint( begin, end + 1, pgdata->bArrBrkpt ) ) continue; /* set new_phoneSeq */ memcpy( new_phoneSeq, &pgdata->phoneSeq[ begin ], sizeof( uint16_t ) * ( end - begin + 1 ) ); new_phoneSeq[ end - begin + 1 ] = 0; puserphrase = pdictphrase = NULL; i_used_phrase = USED_PHRASE_NONE; /* check user phrase */ if ( UserGetPhraseFirst( pgdata, new_phoneSeq ) && CheckUserChoose( pgdata, new_phoneSeq, begin, end + 1, &p_phrase, pgdata->selectStr, pgdata->selectInterval, pgdata->nSelect ) ) { puserphrase = p_phrase; } /* check dict phrase */ pho_id = TreeFindPhrase( pgdata, begin, end, pgdata->phoneSeq ); if ( ( pho_id != -1 ) && CheckChoose( pgdata, pho_id, begin, end + 1, &p_phrase, pgdata->selectStr, pgdata->selectInterval, pgdata->nSelect ) ) { pdictphrase = p_phrase; } /* add only one interval, which has the largest freqency * but when the phrase is the same, the user phrase overrides * static dict */ if ( puserphrase != NULL && pdictphrase == NULL ) { i_used_phrase = USED_PHRASE_USER; } else if ( puserphrase == NULL && pdictphrase != NULL ) { i_used_phrase = USED_PHRASE_DICT; } else if ( puserphrase != NULL && pdictphrase != NULL ) { /* the same phrase, userphrase overrides */ if ( ! strcmp( puserphrase->phrase, pdictphrase->phrase ) ) { i_used_phrase = USED_PHRASE_USER; } else { if ( puserphrase->freq > pdictphrase->freq ) { i_used_phrase = USED_PHRASE_USER; } else { i_used_phrase = USED_PHRASE_DICT; } } } switch ( i_used_phrase ) { case USED_PHRASE_USER: AddInterval( ptd, begin, end, -1, puserphrase, IS_USER_PHRASE ); break; case USED_PHRASE_DICT: AddInterval( ptd, begin, end, pho_id, pdictphrase, IS_DICT_PHRASE ); break; case USED_PHRASE_NONE: default: break; } internal_release_Phrase( i_used_phrase, puserphrase, pdictphrase ); } } }
static int CheckUserChoose( ChewingData *pgdata, uint16_t *new_phoneSeq, int from , int to, Phrase **pp_phr, char selectStr[][ MAX_PHONE_SEQ_LEN * MAX_UTF8_SIZE + 1 ], IntervalType selectInterval[], int nSelect ) { IntervalType inte, c; int chno, len; int user_alloc; UserPhraseData *pUserPhraseData; Phrase *p_phr = ALC( Phrase, 1 ); assert( p_phr ); inte.from = from; inte.to = to; *pp_phr = NULL; /* pass 1 * if these exist one selected interval which is not contained by inte * but has intersection with inte, then inte is an unacceptable interval */ for ( chno = 0; chno < nSelect; chno++ ) { c = selectInterval[ chno ]; if ( IsIntersect( inte, c ) && ! IsContain( inte, c ) ) { free( p_phr ); return 0; } } /* pass 2 * if there exist one phrase satisfied all selectStr then return 1, else return 0. * also store the phrase with highest freq */ pUserPhraseData = UserGetPhraseFirst( pgdata, new_phoneSeq ); p_phr->freq = -1; do { for ( chno = 0; chno < nSelect; chno++ ) { c = selectInterval[ chno ]; if ( IsContain( inte, c ) ) { /* * find a phrase of ph_id where the text contains * 'selectStr[chno]' test if not ok then return 0, * if ok then continue to test. */ len = c.to - c.from; if ( memcmp( ueStrSeek( pUserPhraseData->wordSeq, c.from - from ), selectStr[ chno ], ueStrNBytes( selectStr[ chno ], len ) ) ) break; } } if ( chno == nSelect ) { /* save phrase data to "pp_phr" */ if ( pUserPhraseData->userfreq > p_phr->freq ) { if ( ( user_alloc = ( to - from ) ) > 0 ) { ueStrNCpy( p_phr->phrase, pUserPhraseData->wordSeq, user_alloc, 1); } p_phr->freq = pUserPhraseData->userfreq; *pp_phr = p_phr; } } } while ( ( pUserPhraseData = UserGetPhraseNext( pgdata, new_phoneSeq ) ) != NULL ); if ( p_phr->freq != -1 ) return 1; free( p_phr ); return 0; }
/** @brief Loading all possible phrases of certain length. * * Loading all possible phrases of certain length into ChoiceInfo structure from static * and dynamic dictionaries,\n * including number of total pages and the number of current page.\n */ static void SetChoiceInfo( ChoiceInfo *pci,AvailInfo *pai, uint16 *phoneSeq, int cursor, int candPerPage ) { Word tempWord; Phrase tempPhrase; int len; UserPhraseData *pUserPhraseData; uint16 userPhoneSeq[ MAX_PHONE_SEQ_LEN ]; /* Clears previous candidates. */ memset( pci->totalChoiceStr, '\0', sizeof(char) * MAX_CHOICE * MAX_PHRASE_LEN * MAX_UTF8_SIZE + 1); pci->nTotalChoice = 0; len = pai->avail[ pai->currentAvail ].len; assert(len); /* secondly, read tree phrase */ if ( len == 1 ) { /* single character */ GetCharFirst( &tempWord, phoneSeq[ cursor ] ); do { if ( ChoiceTheSame( pci, tempWord.word, ueBytesFromChar( tempWord.word[0] ) * sizeof( char ) ) ) continue; memcpy( pci->totalChoiceStr[ pci->nTotalChoice ], tempWord.word, ueBytesFromChar( tempWord.word[0] ) * sizeof( char ) ); assert(pci->nTotalChoice <= MAX_CHOICE); pci->totalChoiceStr[ pci->nTotalChoice ][ ueBytesFromChar( tempWord.word[0] ) ] = '\0'; pci->nTotalChoice++; } while( GetCharNext( &tempWord ) ); } /* phrase */ else { if ( pai->avail[ pai->currentAvail ].id != -1 ) { GetPhraseFirst( &tempPhrase, pai->avail[ pai->currentAvail ].id ); do { if ( ChoiceTheSame( pci, tempPhrase.phrase, len * ueBytesFromChar( tempPhrase.phrase[0] ) * sizeof( char ) ) ) { continue; } ueStrNCpy( pci->totalChoiceStr[ pci->nTotalChoice ], tempPhrase.phrase, len, 1); pci->nTotalChoice++; } while( GetPhraseNext( &tempPhrase ) ); } memcpy( userPhoneSeq, &phoneSeq[ cursor ], sizeof( uint16 ) * len ); userPhoneSeq[ len ] = 0; pUserPhraseData = UserGetPhraseFirst( userPhoneSeq ); if ( pUserPhraseData ) { do { /* check if the phrase is already in the choice list */ if ( ChoiceTheSame( pci, pUserPhraseData->wordSeq, len * ueBytesFromChar( pUserPhraseData->wordSeq[0] ) * sizeof( char ) ) ) continue; /* otherwise store it */ ueStrNCpy( pci->totalChoiceStr[ pci->nTotalChoice ], pUserPhraseData->wordSeq, len, 1); pci->nTotalChoice++; } while( ( pUserPhraseData = UserGetPhraseNext( userPhoneSeq ) ) != NULL ); } } /* magic number */ pci->nChoicePerPage = candPerPage; if ( pci->nChoicePerPage > MAX_SELKEY ) pci->nChoicePerPage = MAX_SELKEY; pci->nPage = CEIL_DIV( pci->nTotalChoice, pci->nChoicePerPage ); pci->pageNo = 0; }
/** @brief Loading all possible phrases after the cursor from long to short into AvailInfo structure.*/ static void SetAvailInfo(ChewingData *pgdata, int begin, int end) { AvailInfo *pai = &(pgdata->availInfo); const uint16_t *phoneSeq = pgdata->phoneSeq; int nPhoneSeq = pgdata->nPhoneSeq; const int *bSymbolArrBrkpt = pgdata->bSymbolArrBrkpt; int symbolArrBrkpt[ARRAY_SIZE(pgdata->bSymbolArrBrkpt)] = { 0 }; const TreeType *tree_pos; int diff; uint16_t userPhoneSeq[MAX_PHONE_SEQ_LEN]; int i, head, head_tmp; int tail, tail_tmp; int pos; head = tail = 0; pai->nAvail = 0; /* * XXX: The phoneSeq, nPhoneSeq skip any symbol in preedit buffer, * while bSymbolArrBrkpt, does not skip any symbol in preedit * buffer. So we need to do some translate here. */ for (i = 0; i < pgdata->chiSymbolBufLen; ++i) { if (bSymbolArrBrkpt[i]) { /* * XXX: If preedit buffer starts with symbol, the pos * will become negative. In this case, we just ignore * this symbol because it does not create any break * point. */ pos = i - CountSymbols(pgdata, i + 1); if (pos >= 0) symbolArrBrkpt[pos] = 1; } } if (pgdata->config.bPhraseChoiceRearward) { for (i = end; i >= begin; i--) { if (symbolArrBrkpt[i]) break; head = i; } head_tmp = end; } else { head_tmp = head = begin; } if (pgdata->config.bPhraseChoiceRearward) { tail_tmp = tail = end; } else { for (i = begin; i < nPhoneSeq; i++) { tail = i; if (symbolArrBrkpt[i]) break; } tail_tmp = begin; } while (head <= head_tmp && tail_tmp <= tail) { diff = tail_tmp - head_tmp; tree_pos = TreeFindPhrase(pgdata, head_tmp, tail_tmp, phoneSeq); if (tree_pos) { /* save it! */ pai->avail[pai->nAvail].len = diff + 1; pai->avail[pai->nAvail].id = tree_pos; pai->nAvail++; } else { memcpy(userPhoneSeq, &phoneSeq[head_tmp], sizeof(uint16_t) * (diff + 1)); userPhoneSeq[diff + 1] = 0; if (UserGetPhraseFirst(pgdata, userPhoneSeq)) { /* save it! */ pai->avail[pai->nAvail].len = diff + 1; pai->avail[pai->nAvail].id = NULL; pai->nAvail++; } else { pai->avail[pai->nAvail].len = 0; pai->avail[pai->nAvail].id = NULL; } UserGetPhraseEnd(pgdata, userPhoneSeq); } if (pgdata->config.bPhraseChoiceRearward) { head_tmp--; } else { tail_tmp++; } } }
/** @brief Loading all possible phrases of certain length. * * Loading all possible phrases of certain length into ChoiceInfo structure * from static and dynamic dictionaries, including number of total pages and * the number of current page. */ static void SetChoiceInfo(ChewingData *pgdata) { Phrase tempPhrase; int len; UserPhraseData *pUserPhraseData; uint16_t userPhoneSeq[MAX_PHONE_SEQ_LEN]; ChoiceInfo *pci = &(pgdata->choiceInfo); AvailInfo *pai = &(pgdata->availInfo); uint16_t *phoneSeq = pgdata->phoneSeq; uint16_t *phoneSeqAlt = pgdata->phoneSeqAlt; int cursor = PhoneSeqCursor(pgdata); int candPerPage = pgdata->config.candPerPage; /* Clears previous candidates. */ memset(pci->totalChoiceStr, '\0', MAX_CHOICE * MAX_PHRASE_LEN * MAX_UTF8_SIZE + 1); pci->nTotalChoice = 0; len = pai->avail[pai->currentAvail].len; assert(len); /* secondly, read tree phrase */ if (len == 1) { /* single character */ ChoiceInfoAppendChi(pgdata, pci, phoneSeq[cursor]); if (phoneSeq[cursor] != phoneSeqAlt[cursor]) { ChoiceInfoAppendChi(pgdata, pci, phoneSeqAlt[cursor]); } if (pgdata->bopomofoData.kbtype == KB_HSU || pgdata->bopomofoData.kbtype == KB_DVORAK_HSU) { switch (phoneSeq[cursor]) { case 0x2800: /* 'ㄘ' */ ChoiceInfoAppendChi(pgdata, pci, 0x30); /* 'ㄟ' */ break; case 0x80: /* 'ㄧ' */ ChoiceInfoAppendChi(pgdata, pci, 0x20); /* 'ㄝ' */ break; case 0x2A00: /* 'ㄙ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1); /* '˙' */ break; case 0xA00: /* 'ㄉ' */ ChoiceInfoAppendChi(pgdata, pci, 0x2); /* 'ˊ' */ break; case 0x800: /* 'ㄈ' */ ChoiceInfoAppendChi(pgdata, pci, 0x3); /* 'ˇ' */ break; case 0x18: /* 'ㄜ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1200); /* 'ㄍ' */ break; case 0x10: /* 'ㄛ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1600); /* 'ㄏ' */ break; case 0x1E00: /* 'ㄓ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1800); /* 'ㄐ' */ ChoiceInfoAppendChi(pgdata, pci, 0x4); /* 'ˋ' */ break; case 0x58: /* 'ㄤ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1400); /* 'ㄎ' */ break; case 0x68: /* 'ㄦ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1000); /* 'ㄌ' */ ChoiceInfoAppendChi(pgdata, pci, 0x60); /* 'ㄥ' */ break; case 0x2200: /* 'ㄕ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1C00); /* 'ㄒ' */ break; case 0x2000: /* 'ㄔ' */ ChoiceInfoAppendChi(pgdata, pci, 0x1A00); /* 'ㄑ' */ break; case 0x50: /* 'ㄣ' */ ChoiceInfoAppendChi(pgdata, pci, 0xE00); /* 'ㄋ' */ break; case 0x48: /* 'ㄢ' */ ChoiceInfoAppendChi(pgdata, pci, 0x600); /* 'ㄇ' */ break; default: break; } } } /* phrase */ else { if (pai->avail[pai->currentAvail].id) { GetPhraseFirst(pgdata, &tempPhrase, pai->avail[pai->currentAvail].id); do { if (ChoiceTheSame(pci, tempPhrase.phrase, len * ueBytesFromChar(tempPhrase.phrase[0]))) { continue; } ueStrNCpy(pci->totalChoiceStr[pci->nTotalChoice], tempPhrase.phrase, len, 1); pci->nTotalChoice++; } while (GetVocabNext(pgdata, &tempPhrase)); } memcpy(userPhoneSeq, &phoneSeq[cursor], sizeof(uint16_t) * len); userPhoneSeq[len] = 0; pUserPhraseData = UserGetPhraseFirst(pgdata, userPhoneSeq); if (pUserPhraseData) { do { /* check if the phrase is already in the choice list */ if (ChoiceTheSame(pci, pUserPhraseData->wordSeq, len * ueBytesFromChar(pUserPhraseData->wordSeq[0]))) continue; /* otherwise store it */ ueStrNCpy(pci->totalChoiceStr[pci->nTotalChoice], pUserPhraseData->wordSeq, len, 1); pci->nTotalChoice++; } while ((pUserPhraseData = UserGetPhraseNext(pgdata, userPhoneSeq)) != NULL); } UserGetPhraseEnd(pgdata, userPhoneSeq); } /* magic number */ pci->nChoicePerPage = candPerPage; assert(pci->nTotalChoice > 0); pci->nPage = CEIL_DIV(pci->nTotalChoice, pci->nChoicePerPage); pci->pageNo = 0; pci->isSymbol = WORD_CHOICE; }
/** @brief Loading all possible phrases after the cursor from long to short into AvailInfo structure.*/ static void SetAvailInfo( ChewingData *pgdata, int begin, int end) { AvailInfo *pai = &( pgdata->availInfo ); const uint16_t *phoneSeq = pgdata->phoneSeq; int nPhoneSeq = pgdata->nPhoneSeq; const int *bSymbolArrBrkpt = pgdata->bSymbolArrBrkpt; int pho_id; int diff; uint16_t userPhoneSeq[ MAX_PHONE_SEQ_LEN ]; int i, head, head_tmp; int tail, tail_tmp; head = tail = 0; pai->nAvail = 0; if ( pgdata->config.bPhraseChoiceRearward ) { for ( i = end; i >= begin; i--){ head = i; if ( bSymbolArrBrkpt[ i ] ) break; } head_tmp = end; } else { head_tmp = head = begin; } if ( pgdata->config.bPhraseChoiceRearward ) { tail_tmp = tail = end; } else { for ( i = begin; i < nPhoneSeq; i++ ) { if ( bSymbolArrBrkpt[ i ] ) break; tail = i; } tail_tmp = begin; } while ( head <= head_tmp && tail_tmp <= tail ) { diff = tail_tmp - head_tmp; pho_id = TreeFindPhrase( pgdata, head_tmp, tail_tmp, phoneSeq ); if ( pho_id != -1 ) { /* save it! */ pai->avail[ pai->nAvail ].len = diff + 1; pai->avail[ pai->nAvail ].id = pho_id; pai->nAvail++; } else { memcpy( userPhoneSeq, &phoneSeq[ head_tmp ], sizeof( uint16_t ) * ( diff + 1 ) ) ; userPhoneSeq[ diff + 1 ] = 0; if ( UserGetPhraseFirst( pgdata, userPhoneSeq ) ) { /* save it! */ pai->avail[ pai->nAvail ].len = diff + 1; pai->avail[ pai->nAvail ].id = -1; pai->nAvail++; } else { pai->avail[ pai->nAvail ].len = 0; pai->avail[ pai->nAvail ].id = -1; } } if ( pgdata->config.bPhraseChoiceRearward ) { head_tmp--; } else { tail_tmp++; } } }