Esempio n. 1
0
/**	检查词条,如果核心能够处理(有这个词条,或在没有其他候选的情况下能够ICW出来)
 *	返回1,否则返回0
 */
int HaveItem(SYLLABLE *syllables, HZ *ci, int ci_length)
{
	int i, count;
	CANDIDATE candidate[MAX_CANDIDATES];

	if (ci_length < 2)
		return 0;

	count = ProcessCiCandidate(syllables, ci_length, 0, candidate, MAX_CANDIDATES, 0);
	if (count)		//有候选
	{
		for (i = 0; i < count; i++)
		{
			if (ci_length == candidate[i].word.item->ci_length &&
				!strncmp((char*)ci, (char*)candidate[i].word.hz, sizeof(HZ) * ci_length))
				return 1;		//找到该词
		}
		return 0;				//有其他候选,不需要判断ICW
	}

	count = GetIcwCandidates(syllables, ci_length, candidate);
	if (!count)
		return 0;

	if (!strncmp((char*)ci, (char*)candidate[0].icw.hz, sizeof(HZ) * ci_length))
		return 1;
	return 0;
}
Esempio n. 2
0
/*	完整处理音节到汉字候选。
 *	处理过程:
 *		1. 检索汉字候选
 *		2. 汉字候选排重
 *		3. 候选排序
 *	本函数将依照系统配置与IC配置中的当前模糊音、检索结果集以及输出集合
 *	设置进行处理。
 *
 *	参数:
 *		syllable			音节
 *		*candidate_array	候选数组
 *		array_length		候选数组长度
 *	返回值:
 *		返回候选数目
 */
int ProcessZiCandidates(SYLLABLE syllable, CANDIDATE *candidate_array, int array_length, int zi_level)
{
	int zi_count = 0, normal_zi_count = 0, small_count = 0, small_ci_count = 0, small_zi_count = 0;
	SYLLABLE small_syllables[2];
	CANDIDATE small_word_candidates[0x10];

	//寻找类似xian输入西安的候选,双拼不用考虑
	if (/*pim_config->special_parse_pin_yin && */pim_config->pinyin_mode != PINYIN_SHUANGPIN && GetSmallSyllables(syllable, small_syllables))
	{
		int i;

		small_ci_count = ProcessCiCandidate(small_syllables,
										 2,
										 0,
										 small_word_candidates,
										 _SizeOf(small_word_candidates),
										 1);

		for (i = 0; i < small_ci_count && i < array_length; i++)
		{
			candidate_array[i].type			= CAND_TYPE_ZI;
			candidate_array[i].hz.is_word	= 1;
			candidate_array[i].hz.word_item = small_word_candidates[i].word.item;
			candidate_array[i].hz.origin_syllable = syllable;
			candidate_array[i].hz.top_pos	= DEFAULT_TOP_POS;
			candidate_array[i].hz.cache_pos =
				GetCiCacheInfo(GetItemHZPtr(candidate_array[i].hz.word_item), candidate_array[i].hz.word_item->ci_length);
		}
		
		//检索小音节拆分字
		small_zi_count = GetZiCandidates(small_syllables[0],
			candidate_array + small_ci_count,
			array_length - small_ci_count,
			pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
			zi_level,
			pim_config->hz_output_mode);

		//如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找
		if (!small_zi_count)			
			small_zi_count = GetZiCandidates(small_syllables[0],
			candidate_array + small_ci_count,
			array_length - small_ci_count,
			pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
			HZ_ALL_USED,
			HZ_OUTPUT_HANZI_ALL);

		for (i = small_ci_count; i < small_ci_count + small_zi_count; i++)
		{
			candidate_array[i].hz.hz_type = ZI_TYPE_OTHER;
		}
		
		small_count = small_ci_count + small_zi_count;
	}

	//检索普通字
	normal_zi_count = GetZiCandidates(syllable,
							candidate_array + small_count,
							array_length - small_count,
							pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
							zi_level,
							pim_config->hz_output_mode);

	//如果没有找到汉字,如eng,则必须扩大汉字的范围进行查找
	if (!normal_zi_count)			
		normal_zi_count = GetZiCandidates(syllable,
								candidate_array + small_count,
								array_length - small_count,
								pim_config->use_fuzzy ? pim_config->fuzzy_mode : 0,
								HZ_ALL_USED,
								HZ_OUTPUT_HANZI_ALL);

	zi_count = small_zi_count + normal_zi_count;

	//去掉无法显示的汉字
	zi_count = DeleteUnreadableZiCandidates(candidate_array + small_ci_count, zi_count);

	//排重(只针对字)
	zi_count = UnifyZiCandidates(candidate_array + small_ci_count, zi_count);

	//排序(字和小音节词都要排)
	SortZiCandidates(candidate_array, small_ci_count + zi_count);

	return small_ci_count + zi_count;
}