Exemple #1
0
Fichier : ci.c Projet : 52M/unispim
/*	向词的cache中插入新的词。
 *	参数:
 *		hz			词
 *		length		长度
 *	返回:无
 */
void InsertCiToCache(HZ *hz, int length, int syllable_length, int set_fix_top)
{
	int pos;						//Cache中的位置
	int item_length;				//本词条的长度
	int used_count;					//词汇使用度
	int i;
	char item_save[WORDLIB_FEATURE_LENGTH + sizeof(HZ) * MAX_WORD_LENGTH];		//临时保存区

	//如果以当前的位置确定词的顺序,则不能向词频中增加内容
	if (pim_config->ci_option & CI_ADJUST_FREQ_NONE)
		return;

	if (length > MAX_WORD_LENGTH)
		return;

	//UCS4的词不加入Cache
	if (length != syllable_length)
		return;

	item_length = length * sizeof(HZ) + WORDLIB_FEATURE_LENGTH;

	//以下这个词是否出现在Cache中?
	pos = GetCiCacheInfo(hz, length);

	if (pos != -1)		//找到
	{
		used_count = *(int*)&share_segment->ci_cache.cache[pos] >> 8;		//找出词的使用度
		used_count++;
	}
Exemple #2
0
/*	完整处理音节到汉字候选。
 *	处理过程:
 *		1. 检索汉字候选
 *		2. 汉字候选排重
 *		3. 候选排序
 *	本函数将依照系统配置与IC配置中的当前模糊音、检索结果集以及输出集合
 *	设置进行处理。
 *
 *	参数:
 *		syllable			音节
 *		*candidate_array	候选数组
 *		array_length		候选数组长度
 *	返回值:
 *		返回候选数目
 */
int ProcessZiCandidates(SYLLABLE syllable, CANDIDATE *candidate_array, int array_length, int zi_level)
{
	int zi_count = 0, normal_zi_count = 0, small_count = 0, small_ci_count = 0, small_zi_count = 0;
	SYLLABLE small_syllables[2];
	CANDIDATE small_word_candidates[0x10];

	//寻找类似xian输入西安的候选,双拼不用考虑
	if (/*pim_config->special_parse_pin_yin && */pim_config->pinyin_mode != PINYIN_SHUANGPIN && GetSmallSyllables(syllable, small_syllables))
	{
		int i;

		small_ci_count = ProcessCiCandidate(small_syllables,
										 2,
										 0,
										 small_word_candidates,
										 _SizeOf(small_word_candidates),
										 1);

		for (i = 0; i < small_ci_count && i < array_length; i++)
		{
			candidate_array[i].type			= CAND_TYPE_ZI;
			candidate_array[i].hz.is_word	= 1;
			candidate_array[i].hz.word_item = small_word_candidates[i].word.item;
			candidate_array[i].hz.origin_syllable = syllable;
			candidate_array[i].hz.top_pos	= DEFAULT_TOP_POS;
			candidate_array[i].hz.cache_pos =
				GetCiCacheInfo(GetItemHZPtr(candidate_array[i].hz.word_item), candidate_array[i].hz.word_item->ci_length);
		}
		
		//检索小音节拆分字
		small_zi_count = GetZiCandidates(small_syllables[0],
			candidate_array + small_ci_count,
			array_length - small_ci_count,
			pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
			zi_level,
			pim_config->hz_output_mode);

		//如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找
		if (!small_zi_count)			
			small_zi_count = GetZiCandidates(small_syllables[0],
			candidate_array + small_ci_count,
			array_length - small_ci_count,
			pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
			HZ_ALL_USED,
			HZ_OUTPUT_HANZI_ALL);

		for (i = small_ci_count; i < small_ci_count + small_zi_count; i++)
		{
			candidate_array[i].hz.hz_type = ZI_TYPE_OTHER;
		}
		
		small_count = small_ci_count + small_zi_count;
	}

	//检索普通字
	normal_zi_count = GetZiCandidates(syllable,
							candidate_array + small_count,
							array_length - small_count,
							pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
							zi_level,
							pim_config->hz_output_mode);

	//如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找
	if (!normal_zi_count)			
		normal_zi_count = GetZiCandidates(syllable,
								candidate_array + small_count,
								array_length - small_count,
								pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
								HZ_ALL_USED,
								HZ_OUTPUT_HANZI_ALL);

	zi_count = small_zi_count + normal_zi_count;

	//去掉无法显示的汉字
	zi_count = DeleteUnreadableZiCandidates(candidate_array + small_ci_count, zi_count);

	//排重(只针对字)
	zi_count = UnifyZiCandidates(candidate_array + small_ci_count, zi_count);

	//排序(字和小音节词都要排)
	SortZiCandidates(candidate_array, small_ci_count + zi_count);

	return small_ci_count + zi_count;
}