示例#1
0
bool CISpellAffixRule::Apply ( CSphString & sWord )
{
	if ( m_sCondition.IsEmpty () )
		return true;

	if ( sWord.IsEmpty () )
		return false;

	m_iWordLen = strlen ( sWord.cstr () );

	bool bDotCond = ( m_sCondition=="." );
	if ( m_eRule==RULE_SUFFIXES )
	{
		if ( !bDotCond && !CheckSuffix ( sWord ) )
			return false;

		if ( !StripAppendSuffix ( sWord ) )
			return false;
	} else
	{
		if ( !bDotCond && !CheckPrefix ( sWord ) )
			return false;

		if ( !StripAppendPrefix ( sWord ) )
			return false;
	}
	return true;
}
示例#2
0
bool CISpellAffixRule::StripAppendPrefix ( CSphString & sWord ) const
{
	static char szTmp [MAX_STR_LENGTH];

	if ( !m_sStrip.IsEmpty () )
	{
		const char * Pos = strstr ( sWord.cstr (), m_sStrip.cstr () );
		if ( Pos!=sWord.cstr() )
			return false;
	}

	if ( !m_sAppend.IsEmpty () )
		strcpy ( szTmp, m_sAppend.cstr() ); // NOLINT

	strncpy ( szTmp + m_iAppendLen, sWord.cstr () + m_iStripLen, m_iWordLen - m_iStripLen );
	szTmp [m_iWordLen - m_iStripLen + m_iAppendLen] = '\0';

	sWord = szTmp;

	return true;
}
示例#3
0
bool CISpellAffixRule::StripAppendSuffix ( CSphString & sWord ) const
{
	static char szTmp [ MAX_STR_LENGTH];

	if ( !m_sStrip.IsEmpty () )
	{
		if ( m_iWordLen < m_iStripLen )
			return false;

		if ( strncmp ( sWord.cstr () + m_iWordLen - m_iStripLen, m_sStrip.cstr (), m_iStripLen ) )
			return false;
	}

	strncpy ( szTmp, sWord.cstr (), m_iWordLen - m_iStripLen );
	szTmp [m_iWordLen - m_iStripLen] = '\0';

	if ( !m_sAppend.IsEmpty () )
		strcat ( szTmp, m_sAppend.cstr () ); // NOLINT

	sWord = szTmp;

	return true;
}
示例#4
0
文件: testrt.cpp 项目: frankee/csft
void DoIndexing ( CSphSource * pSrc, ISphRtIndex * pIndex )
{
    CSphString sError;
    CSphVector<DWORD> dMvas;

    int64_t tmStart = sphMicroTimer ();
    int64_t tmAvgCommit = 0;
    int64_t tmMaxCommit = 0;
    int iCommits = 0;
    for ( ;; )
    {
        if ( !pSrc->IterateDocument ( sError ) )
            sphDie ( "iterate-document failed: %s", sError.cstr() );
        ISphHits * pHitsNext = pSrc->IterateHits ( sError );
        if ( !sError.IsEmpty() )
            sphDie ( "iterate-hits failed: %s", sError.cstr() );

        if ( pSrc->m_tDocInfo.m_iDocID )
            pIndex->AddDocument ( pHitsNext, pSrc->m_tDocInfo, NULL, dMvas, sError );

        if ( ( pSrc->GetStats().m_iTotalDocuments % COMMIT_STEP )==0 || !pSrc->m_tDocInfo.m_iDocID )
        {
            int64_t tmCommit = sphMicroTimer();
            pIndex->Commit ();
            tmCommit = sphMicroTimer()-tmCommit;

            iCommits++;
            tmAvgCommit += tmCommit;
            tmMaxCommit = Max ( tmMaxCommit, tmCommit );

            if ( !pSrc->m_tDocInfo.m_iDocID )
            {
                tmAvgCommit /= iCommits;
                break;
            }
        }

        if (!( pSrc->GetStats().m_iTotalDocuments % 100 ))
            printf ( "%d docs\r", (int)pSrc->GetStats().m_iTotalDocuments );

        static bool bOnce = true;
        if ( iCommits*COMMIT_STEP>=5000 && bOnce )
        {
            printf ( "\n" );
            DoSearch ( pIndex );
            bOnce = false;
        }
    }

    pSrc->Disconnect();

    int64_t tmEnd = sphMicroTimer ();
    float fTotalMB = (float)pSrc->GetStats().m_iTotalBytes/1000000.0f;
    printf ( "commit-step %d, %d docs, %d bytes, %d.%03d sec, %.2f MB/sec\n",
             COMMIT_STEP,
             (int)pSrc->GetStats().m_iTotalDocuments,
             (int)pSrc->GetStats().m_iTotalBytes,
             (int)((tmEnd-tmStart)/1000000), (int)(((tmEnd-tmStart)%1000000)/1000),
             fTotalMB*1000000.0f/(tmEnd-tmStart) );
    printf ( "commit-docs %d, avg %d.%03d msec, max %d.%03d msec\n", COMMIT_STEP,
             (int)(tmAvgCommit/1000), (int)(tmAvgCommit%1000),
             (int)(tmMaxCommit/1000), (int)(tmMaxCommit%1000) );
    g_fTotalMB += fTotalMB;
}
示例#5
0
void CISpellAffix::LoadLocale ()
{
	if ( m_bUseDictConversion )
		printf ( "Using dictionary-defined character set\n" );
	else
		if ( !m_sCharsetFile.IsEmpty () )
		{
			FILE * pFile = fopen ( m_sCharsetFile.cstr (), "rt" );
			if ( pFile )
			{
				printf ( "Using charater set from '%s'\n", m_sCharsetFile.cstr () );

				const int MAX_CHARSET_LENGTH = 4096;
				char szBuffer [MAX_CHARSET_LENGTH];

				char * szResult = fgets ( szBuffer, MAX_CHARSET_LENGTH, pFile );
				if ( szResult )
				{
					CSphVector<CSphRemapRange> dRemaps;
					if ( sphParseCharset ( szBuffer, dRemaps ) )
					{
						m_bUseLowerCaser = true;
						m_LowerCaser.AddRemaps ( dRemaps, 0 );
					} else
					{
						printf ( "Failed to parse charset from '%s'\n", m_sCharsetFile.cstr() );
					}
				} else
				{
					printf ( "Failed to read charset from '%s'\n", m_sCharsetFile.cstr() );
				}

				fclose ( pFile );

			} else
			{
				printf ( "Failed to open '%s'\n", m_sCharsetFile.cstr() );
			}

		} else
		{
			if ( !m_sLocale.IsEmpty () )
			{
				char dLocaleC[256], dLocaleUser[256];

				setlocale ( LC_ALL, "C" );
				for ( int i=0; i<256; i++ )
					dLocaleC[i] = (char) tolower(i);

				char * szLocale = setlocale ( LC_CTYPE, m_sLocale.cstr() );
				if ( szLocale )
				{
					printf ( "Using user-defined locale (locale=%s)\n", m_sLocale.cstr() );

					for ( int i=0; i<256; i++ )
						dLocaleUser[i] = (char) tolower(i);

					if ( !memcmp ( dLocaleC, dLocaleUser, 256 ) )
						printf ( "WARNING: user-defined locale provides the same case conversion as the default \"C\" locale\n" );
				} else
					printf ( "WARNING: could not set user-defined locale for case conversions (locale=%s)\n", m_sLocale.cstr() );
			} else
				printf ( "WARNING: no character set specified\n" );
		}
}