Пример #1
0
int InitPinyin( ChewingData *pgdata, const char *prefix )
{
	char filename[ PATH_MAX ];
	int i;
	FILE *fd;
	int ret;

	sprintf( filename,
		"%s" PLAT_SEPARATOR "%s",
		prefix, PINYIN_TAB_NAME );

	fd = fopen(filename, "r");

	if ( ! fd )
		return 0;

	ret = fscanf( fd, "%d", &pgdata->static_data.HANYU_INITIALS );
	if ( ret != 1 ) {
		return 0;
	}
	++pgdata->static_data.HANYU_INITIALS;
	pgdata->static_data.hanyuInitialsMap = ALC( keymap, pgdata->static_data.HANYU_INITIALS );
	for ( i = 0; i < pgdata->static_data.HANYU_INITIALS - 1; i++ ) {
		ret = fscanf( fd, "%s %s",
			pgdata->static_data.hanyuInitialsMap[ i ].pinyin,
			pgdata->static_data.hanyuInitialsMap[ i ].zuin );
		if ( ret != 2 ) {
			return 0;
		}
	}

	ret = fscanf( fd, "%d", &pgdata->static_data.HANYU_FINALS );
	if ( ret != 1 ) {
		return 0;
	}
	++pgdata->static_data.HANYU_FINALS;
	pgdata->static_data.hanyuFinalsMap = ALC( keymap, pgdata->static_data.HANYU_FINALS );
	for ( i = 0; i < pgdata->static_data.HANYU_FINALS - 1; i++ ) {
		ret = fscanf( fd, "%s %s",
			pgdata->static_data.hanyuFinalsMap[ i ].pinyin,
			pgdata->static_data.hanyuFinalsMap[ i ].zuin );
		if ( ret != 2 ) {
			return 0;
		}
	}

	fclose( fd );

	return 1;
}
Пример #2
0
/* when record==NULL then output the "link list" */
static void SaveRecord( int *record, int nInter, TreeDataType *ptd )
{
	RecordNode *now, *p, *pre;

	pre = NULL;
	for ( p = ptd->phList; p; ) {
		/* if  'p' contains 'record', then discard 'record'. */
		if ( IsRecContain( p->arrIndex, p->nInter, record, nInter, ptd ) ) 
			return;

		/* if 'record' contains 'p', then discard 'p' 
		 * -- We must deal with the linked list. */
		if ( IsRecContain( record, nInter, p->arrIndex, p->nInter, ptd ) ) {
			RecordNode *tp = p;

			if ( pre ) 
				pre->next = p->next;
			else
				ptd->phList = ptd->phList->next;
			p = p->next;
			free( tp->arrIndex );			
			free( tp );
		}
		else 
			pre = p, p = p->next;
	}
	now = ALC( RecordNode, 1 );
	assert( now );
	now->next = ptd->phList;
	now->arrIndex = ALC( int, nInter );
	assert( now->arrIndex );
	now->nInter = nInter;
	memcpy( now->arrIndex, record, nInter * sizeof( int ) );	
	ptd->phList = now;
}
Пример #3
0
/* find the maximum frequency of the same phrase */
static int LoadMaxFreq( const uint16 phoneSeq[], int len )
{
	int pho_id;
	Phrase *phrase = ALC( Phrase, 1 );
	int maxFreq = FREQ_INIT_VALUE;
	UserPhraseData *uphrase;

	pho_id = TreeFindPhrase( 0, len - 1, phoneSeq );
	if ( pho_id != -1 ) {
		GetPhraseFirst( phrase, pho_id );
		do {
			if ( phrase->freq > maxFreq )
				maxFreq = phrase->freq;
		} while( GetPhraseNext( phrase ) );
	}
	free( phrase );

	uphrase = UserGetPhraseFirst( phoneSeq );
	while ( uphrase ) {
		if ( uphrase->userfreq > maxFreq )
			maxFreq = uphrase->userfreq;
		uphrase = UserGetPhraseNext( phoneSeq );
	}	  

	return maxFreq;
}
Пример #4
0
/* find the maximum frequency of the same phrase */
static int LoadMaxFreq( ChewingData *pgdata, const uint16_t phoneSeq[], int len )
{
	const TreeType *tree_pos;
	Phrase *phrase = ALC( Phrase, 1 );
	int maxFreq = FREQ_INIT_VALUE;
	UserPhraseData *uphrase;

	tree_pos = TreeFindPhrase( pgdata, 0, len - 1, phoneSeq );
	if ( tree_pos ) {
		GetPhraseFirst( pgdata, phrase, tree_pos );
		do {
			if ( phrase->freq > maxFreq )
				maxFreq = phrase->freq;
		} while( GetVocabNext( pgdata, phrase ) );
	}
	free( phrase );

	uphrase = UserGetPhraseFirst( pgdata, phoneSeq );
	while ( uphrase ) {
		if ( uphrase->userfreq > maxFreq )
			maxFreq = uphrase->userfreq;
		uphrase = UserGetPhraseNext( pgdata, phoneSeq );
	}

	return maxFreq;
}
Пример #5
0
CHEWING_API uint16 *chewing_get_phoneSeq( ChewingContext *ctx )
{
	uint16 *seq;
	seq = ALC( uint16, ctx->data->nPhoneSeq );
	memcpy( seq, ctx->data->phoneSeq, sizeof(uint16)*ctx->data->nPhoneSeq );
	return seq;
}
Пример #6
0
int InitTree( ChewingData *pgdata, const char * prefix )
{
#ifdef USE_BINARY_DATA
	char filename[ PATH_MAX ];
	size_t len;
	size_t offset;

	len = snprintf( filename, sizeof( filename ), "%s" PLAT_SEPARATOR "%s", prefix, PHONE_TREE_FILE );
	if ( len + 1 > sizeof( filename ) )
		return -1;

	plat_mmap_set_invalid( &pgdata->static_data.tree_mmap );
	pgdata->static_data.tree_size = plat_mmap_create( &pgdata->static_data.tree_mmap, filename, FLAG_ATTRIBUTE_READ );
	if ( pgdata->static_data.tree_size <= 0 )
		return -1;

	offset = 0;
	pgdata->static_data.tree = (TreeType *) plat_mmap_set_view( &pgdata->static_data.tree_mmap, &offset, &pgdata->static_data.tree_size );
	if ( !pgdata->static_data.tree )
		return -1;

	return 0;
#else
	char filename[ PATH_MAX ];
	int len;
	FILE *infile = NULL;
	int i;

	len = snprintf( filename, sizeof( filename ), "%s" PLAT_SEPARATOR "%s", prefix, PHONE_TREE_FILE );
	if ( len + 1 > sizeof( filename ) )
		return -1;

	infile = fopen( filename, "r" );
	if ( !infile )
		return -1;

	pgdata->static_data.tree = ALC( TreeType, TREE_SIZE );
	if ( !pgdata->static_data.tree ) {
		fclose( infile );
		return -1;
	}

	/* XXX: What happen if infile contains more than TREE_SIZE data? */
	for ( i = 0; i < TREE_SIZE; i++ ) {
		if ( fscanf( infile, "%hu%d%d%d",
					&pgdata->static_data.tree[ i ].phone_id,
					&pgdata->static_data.tree[ i ].phrase_id,
					&pgdata->static_data.tree[ i ].child_begin,
					&pgdata->static_data.tree[ i ].child_end ) != 4 )
			break;
	}

	fclose( infile );
	return 0;
#endif
}
Пример #7
0
CHEWING_API ChewingContext *chewing_new()
{
	ChewingContext *ctx;
	
	ChewingData *internal_data = ALC( ChewingData, 1 );
	ChewingOutput *internal_output = ALC( ChewingOutput, 1 );
	ctx = ALC( ChewingContext, 1 );
	if ( ctx && internal_data && internal_output ) {
		ctx->data = internal_data;
		ctx->output = internal_output;
		ctx->cand_no = 0;

		/* handle configuration */
		chewing_Reset( ctx );

		return ctx;
	} else {
		return NULL;
	}
}
Пример #8
0
static ChewingData * allocate_ChewingData()
{
	static const int DEFAULT_SELKEY[] = { '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' };

	ChewingData *data = ALC( ChewingData, 1 );
	if ( data ) {
		data->config.candPerPage = MAX_SELKEY;
		data->config.maxChiSymbolLen = MAX_CHI_SYMBOL_LEN;
		data->logger = NullLogger;
		memcpy( data->config.selKey, DEFAULT_SELKEY, sizeof( data->config.selKey ) );
	}

	return data;
}
Пример #9
0
/*
 * phrase is said to satisfy a choose interval if 
 * their intersections are the same */
static int CheckChoose(
		ChewingData *pgdata,
		int ph_id, int from, int to, Phrase **pp_phr, 
		char selectStr[][ MAX_PHONE_SEQ_LEN * MAX_UTF8_SIZE + 1 ], 
		IntervalType selectInterval[], int nSelect )
{
	IntervalType inte, c;
	int chno, len;
	Phrase *phrase = ALC( Phrase, 1 );

	assert( phrase );
	inte.from = from;
	inte.to = to;
	*pp_phr = NULL;

	/* if there exist one phrase satisfied all selectStr then return 1, else return 0. */
	GetPhraseFirst( pgdata, phrase, ph_id );
	do {
		for ( chno = 0; chno < nSelect; chno++ ) {
			c = selectInterval[ chno ];

			if ( IsContain( inte, c ) ) {
				/* find a phrase of ph_id where the text contains 
				 * 'selectStr[chno]' test if not ok then return 0, if ok 
				 * then continue to test
				 */
				len = c.to - c.from;
				if ( memcmp(
					ueStrSeek( phrase->phrase, c.from - from ),
					selectStr[ chno ],
					ueStrNBytes( selectStr[ chno ], len ) ) )
					break;
			}
			else if ( IsIntersect( inte, selectInterval[ chno ] ) ) {
				free( phrase );
				return 0;
			} 
		}
		if ( chno == nSelect ) {
			*pp_phr = phrase;
			return 1;
		}
	} while ( GetPhraseNext( pgdata, phrase ) );
	free( phrase );
	return 0;
}
Пример #10
0
/* find the maximum frequency of the same phrase */
static int LoadMaxFreq(ChewingData *pgdata, const uint16_t phoneSeq[], int len)
{
    const TreeType *tree_pos;
    Phrase *phrase = ALC(Phrase, 1);
    int maxFreq = FREQ_INIT_VALUE;
    int max_userphrase_freq;
    int ret;

    tree_pos = TreeFindPhrase(pgdata, 0, len - 1, phoneSeq);
    if (tree_pos) {
        GetPhraseFirst(pgdata, phrase, tree_pos);
        do {
            if (phrase->freq > maxFreq)
                maxFreq = phrase->freq;
        } while (GetVocabNext(pgdata, phrase));
    }
    free(phrase);

    assert(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]);

    ret = UserBindPhone(pgdata, STMT_USERPHRASE_GET_MAX_FREQ, phoneSeq, len);
    if (ret != SQLITE_OK) {
        LOG_ERROR("UserBindPhone returns %d", ret);
        return maxFreq;
    }

    ret = sqlite3_step(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]);
    if (ret != SQLITE_ROW)
        return maxFreq;

    ret = sqlite3_reset(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ]);
    if (ret != SQLITE_OK) {
        LOG_ERROR("sqlite3_reset returns %d", ret);
        return maxFreq;
    }

    max_userphrase_freq = sqlite3_column_int(pgdata->static_data.stmt_userphrase[STMT_USERPHRASE_GET_MAX_FREQ],
                                             SQL_STMT_USERPHRASE[STMT_USERPHRASE_GET_MAX_FREQ].column
                                             [COLUMN_USERPHRASE_USER_FREQ]);

    if (max_userphrase_freq > maxFreq)
        maxFreq = max_userphrase_freq;

    return maxFreq;
}
Пример #11
0
/* load the orginal frequency from the static dict */
static int LoadOriginalFreq(ChewingData *pgdata, const uint16_t phoneSeq[], const char wordSeq[], int len)
{
    const TreeType *tree_pos;
    int retval;
    Phrase *phrase = ALC(Phrase, 1);

    tree_pos = TreeFindPhrase(pgdata, 0, len - 1, phoneSeq);
    if (tree_pos) {
        GetPhraseFirst(pgdata, phrase, tree_pos);
        do {
            /* find the same phrase */
            if (!strcmp(phrase->phrase, wordSeq)) {
                retval = phrase->freq;
                free(phrase);
                return retval;
            }
        } while (GetVocabNext(pgdata, phrase));
    }

    free(phrase);
    return FREQ_INIT_VALUE;
}
Пример #12
0
/* load the orginal frequency from the static dict */
static int LoadOriginalFreq( const uint16 phoneSeq[], const char wordSeq[], int len )
{
	int pho_id;
	int retval;
	Phrase *phrase = ALC( Phrase, 1 );

	pho_id = TreeFindPhrase( 0, len - 1, phoneSeq );
	if ( pho_id != -1 ) {
		GetPhraseFirst( phrase, pho_id );
		do {
			/* find the same phrase */
			if ( ! strcmp(
				phrase->phrase, 
				wordSeq ) ) {
				retval = phrase->freq;	
				free( phrase );
				return retval;
			}
		} while ( GetPhraseNext( phrase ) );
	}

	free( phrase );
	return FREQ_INIT_VALUE;
}
Пример #13
0
CHEWING_API ChewingContext *chewing_new()
{
	ChewingContext *ctx;
	int ret;
	char search_path[PATH_MAX];
	char path[PATH_MAX];

	ctx = ALC( ChewingContext, 1 );
	if ( !ctx )
		goto error;

	ctx->output = ALC ( ChewingOutput, 1 );
	if ( !ctx->output )
		goto error;

	ctx->data = allocate_ChewingData();
	if ( !ctx->data )
		goto error;

	chewing_Reset( ctx );

	ret = get_search_path( search_path, sizeof( search_path ) );
	if ( ret )
		goto error;

	ret = find_path_by_files(
		search_path, DICT_FILES, path, sizeof( path ) );
	if ( ret )
		goto error;
	ret = InitDict( ctx->data, path );
	if ( ret )
		goto error;
	ret = InitTree( ctx->data, path );
	if ( ret )
		goto error;

	ret = InitHash( ctx->data );
	if ( !ret )
		goto error;

	ctx->cand_no = 0;

	ret = find_path_by_files(
		search_path, SYMBOL_TABLE_FILES, path, sizeof( path ) );
	if ( ret )
		goto error;
	ret = InitSymbolTable( ctx->data, path );
	if ( ret )
		goto error;

	ret = find_path_by_files(
		search_path, EASY_SYMBOL_FILES, path, sizeof( path ) );
	if ( ret )
		goto error;
	ret = InitEasySymbolInput( ctx->data, path );
	if ( ret )
		goto error;

	ret = find_path_by_files(
		search_path, PINYIN_FILES, path, sizeof( path ) );
	if ( ret )
		goto error;
	ret = InitPinyin( ctx->data, path );
	if ( !ret )
		goto error;

	return ctx;
error:
	chewing_delete( ctx );
	return NULL;
}
Пример #14
0
static int CheckUserChoose( 
		ChewingData *pgdata,
		uint16_t *new_phoneSeq, int from , int to,
		Phrase **pp_phr, 
		char selectStr[][ MAX_PHONE_SEQ_LEN * MAX_UTF8_SIZE + 1 ], 
		IntervalType selectInterval[], int nSelect )
{
	IntervalType inte, c;
	int chno, len;
	int user_alloc;
	UserPhraseData *pUserPhraseData;
	Phrase *p_phr = ALC( Phrase, 1 );

	assert( p_phr );
	inte.from = from;
	inte.to = to;
	*pp_phr = NULL;

	/* pass 1
	 * if these exist one selected interval which is not contained by inte
	 * but has intersection with inte, then inte is an unacceptable interval
	 */
	for ( chno = 0; chno < nSelect; chno++ ) {
		c = selectInterval[ chno ];
		if ( IsIntersect( inte, c ) && ! IsContain( inte, c ) ) {
			free( p_phr );
			return 0;
		}
	}

	/* pass 2
	 * if there exist one phrase satisfied all selectStr then return 1, else return 0.
	 * also store the phrase with highest freq
	 */
	pUserPhraseData = UserGetPhraseFirst( pgdata, new_phoneSeq );
	p_phr->freq = -1;
	do {
		for ( chno = 0; chno < nSelect; chno++ ) {
			c = selectInterval[ chno ];

			if ( IsContain( inte, c ) ) {
				/* 
				 * find a phrase of ph_id where the text contains 
				 * 'selectStr[chno]' test if not ok then return 0, 
				 * if ok then continue to test. */
				len = c.to - c.from;
				if ( memcmp(
					ueStrSeek( pUserPhraseData->wordSeq, c.from - from ),
					selectStr[ chno ],
					ueStrNBytes( selectStr[ chno ], len ) ) )
					break;
			}

		}
		if ( chno == nSelect ) {
			/* save phrase data to "pp_phr" */
			if ( pUserPhraseData->userfreq > p_phr->freq ) {
				if ( ( user_alloc = ( to - from ) ) > 0 ) {
					ueStrNCpy( p_phr->phrase,
							pUserPhraseData->wordSeq,
							user_alloc, 1);
				}
				p_phr->freq = pUserPhraseData->userfreq;
				*pp_phr = p_phr;
			}
		}
	} while ( ( pUserPhraseData = UserGetPhraseNext( pgdata, new_phoneSeq ) ) != NULL );

	if ( p_phr->freq != -1 ) 
		return 1;
		
	free( p_phr );
	return 0;
}