/* 向词的cache中插入新的词。 * 参数: * hz 词 * length 长度 * 返回:无 */ void InsertCiToCache(HZ *hz, int length, int syllable_length, int set_fix_top) { int pos; //Cache中的位置 int item_length; //本词条的长度 int used_count; //词汇使用度 int i; char item_save[WORDLIB_FEATURE_LENGTH + sizeof(HZ) * MAX_WORD_LENGTH]; //临时保存区 //如果以当前的位置确定词的顺序,则不能向词频中增加内容 if (pim_config->ci_option & CI_ADJUST_FREQ_NONE) return; if (length > MAX_WORD_LENGTH) return; //UCS4的词不加入Cache if (length != syllable_length) return; item_length = length * sizeof(HZ) + WORDLIB_FEATURE_LENGTH; //以下这个词是否出现在Cache中? pos = GetCiCacheInfo(hz, length); if (pos != -1) //找到 { used_count = *(int*)&share_segment->ci_cache.cache[pos] >> 8; //找出词的使用度 used_count++; }
/* 完整处理音节到汉字候选。 * 处理过程: * 1. 检索汉字候选 * 2. 汉字候选排重 * 3. 候选排序 * 本函数将依照系统配置与IC配置中的当前模糊音、检索结果集以及输出集合 * 设置进行处理。 * * 参数: * syllable 音节 * *candidate_array 候选数组 * array_length 候选数组长度 * 返回值: * 返回候选数目 */ int ProcessZiCandidates(SYLLABLE syllable, CANDIDATE *candidate_array, int array_length, int zi_level) { int zi_count = 0, normal_zi_count = 0, small_count = 0, small_ci_count = 0, small_zi_count = 0; SYLLABLE small_syllables[2]; CANDIDATE small_word_candidates[0x10]; //寻找类似xian输入西安的候选,双拼不用考虑 if (/*pim_config->special_parse_pin_yin && */pim_config->pinyin_mode != PINYIN_SHUANGPIN && GetSmallSyllables(syllable, small_syllables)) { int i; small_ci_count = ProcessCiCandidate(small_syllables, 2, 0, small_word_candidates, _SizeOf(small_word_candidates), 1); for (i = 0; i < small_ci_count && i < array_length; i++) { candidate_array[i].type = CAND_TYPE_ZI; candidate_array[i].hz.is_word = 1; candidate_array[i].hz.word_item = small_word_candidates[i].word.item; candidate_array[i].hz.origin_syllable = syllable; candidate_array[i].hz.top_pos = DEFAULT_TOP_POS; candidate_array[i].hz.cache_pos = GetCiCacheInfo(GetItemHZPtr(candidate_array[i].hz.word_item), candidate_array[i].hz.word_item->ci_length); } //检索小音节拆分字 small_zi_count = GetZiCandidates(small_syllables[0], candidate_array + small_ci_count, array_length - small_ci_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, zi_level, pim_config->hz_output_mode); //如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找 if (!small_zi_count) small_zi_count = GetZiCandidates(small_syllables[0], candidate_array + small_ci_count, array_length - small_ci_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, HZ_ALL_USED, HZ_OUTPUT_HANZI_ALL); for (i = small_ci_count; i < small_ci_count + small_zi_count; i++) { candidate_array[i].hz.hz_type = ZI_TYPE_OTHER; } small_count = small_ci_count + small_zi_count; } //检索普通字 normal_zi_count = GetZiCandidates(syllable, candidate_array + small_count, array_length - small_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, zi_level, pim_config->hz_output_mode); //如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找 if (!normal_zi_count) normal_zi_count = GetZiCandidates(syllable, candidate_array + small_count, array_length - small_count, pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0, HZ_ALL_USED, HZ_OUTPUT_HANZI_ALL); zi_count = small_zi_count + normal_zi_count; //去掉无法显示的汉字 zi_count = DeleteUnreadableZiCandidates(candidate_array + small_ci_count, zi_count); //排重(只针对字) zi_count = UnifyZiCandidates(candidate_array + small_ci_count, zi_count); //排序(字和小音节词都要排) SortZiCandidates(candidate_array, small_ci_count + zi_count); return small_ci_count + zi_count; }