C++ (Cpp) sphCreateUTF8Tokenizerの例

プログラミング言語: C++ (Cpp)

メソッド/関数: sphCreateUTF8Tokenizer

hotexamples.comのコード掲載数: 2

C++ (Cpp) sphCreateUTF8Tokenizer - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のsphCreateUTF8Tokenizerの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: testrt.cpp プロジェクト: frankee/csft

int main ()
{
    // threads should be initialized before memory allocations
    char cTopOfMainStack;
    sphThreadInit();
    MemorizeStack ( &cTopOfMainStack );

    CSphString sError;
    CSphDictSettings tDictSettings;

    ISphTokenizer * pTok = sphCreateUTF8Tokenizer();
    CSphDict * pDict = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt1" );
    CSphSource * pSrc = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=0", pTok, pDict );

    ISphTokenizer * pTok2 = sphCreateUTF8Tokenizer();
    CSphDict * pDict2 = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt2" );
    CSphSource * pSrc2 = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=1", pTok2, pDict2 );

    CSphSchema tSrcSchema;
    if ( !pSrc->UpdateSchema ( &tSrcSchema, sError ) )
        sphDie ( "update-schema failed: %s", sError.cstr() );

    CSphSchema tSchema; // source schema must be all dynamic attrs; but index ones must be static
    tSchema.m_dFields = tSrcSchema.m_dFields;
    for ( int i=0; i<tSrcSchema.GetAttrsCount(); i++ )
        tSchema.AddAttr ( tSrcSchema.GetAttr(i), false );

    CSphConfigSection tRTConfig;
    sphRTInit();
    sphRTConfigure ( tRTConfig, true );
    SmallStringHash_T< CSphIndex * > dTemp;
    sphReplayBinlog ( dTemp, 0 );
    ISphRtIndex * pIndex = sphCreateIndexRT ( tSchema, "testrt", 32*1024*1024, "data/dump", false );
    pIndex->SetTokenizer ( pTok ); // index will own this pair from now on
    pIndex->SetDictionary ( pDict );
    if ( !pIndex->Prealloc ( false, false, sError ) )
        sphDie ( "prealloc failed: %s", pIndex->GetLastError().cstr() );
    g_pIndex = pIndex;

    // initial indexing
    int64_t tmStart = sphMicroTimer();

    SphThread_t t1, t2;
    sphThreadCreate ( &t1, IndexingThread, pSrc );
    sphThreadCreate ( &t2, IndexingThread, pSrc2 );
    sphThreadJoin ( &t1 );
    sphThreadJoin ( &t2 );

#if 0
    // update
    tParams.m_sQuery = "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM rt2 WHERE id<=10000";
    SetupIndexing ( pSrc, tParams );
    DoIndexing ( pSrc, pIndex );
#endif

    // search
    DoSearch ( pIndex );

    // shutdown index (should cause dump)
    int64_t tmShutdown = sphMicroTimer();

#if SPH_ALLOCS_PROFILER
    printf ( "pre-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() );
#endif
    SafeDelete ( pIndex );
#if SPH_ALLOCS_PROFILER
    printf ( "post-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() );
#endif

    int64_t tmEnd = sphMicroTimer();
    printf ( "shutdown done in %d.%03d sec\n", (int)((tmEnd-tmShutdown)/1000000), (int)(((tmEnd-tmShutdown)%1000000)/1000) );
    printf ( "total with shutdown %d.%03d sec, %.2f MB/sec\n",
             (int)((tmEnd-tmStart)/1000000), (int)(((tmEnd-tmStart)%1000000)/1000),
             g_fTotalMB*1000000.0f/(tmEnd-tmStart) );

#if SPH_DEBUG_LEAKS || SPH_ALLOCS_PROFILER
    sphAllocsStats();
#endif
#if USE_WINDOWS
    PROCESS_MEMORY_COUNTERS pmc;
    HANDLE hProcess = OpenProcess ( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, GetCurrentProcessId() );
    if ( hProcess && GetProcessMemoryInfo ( hProcess, &pmc, sizeof(pmc)) )
    {
        printf ( "--- peak-wss=%d, peak-pagefile=%d\n", (int)pmc.PeakWorkingSetSize, (int)pmc.PeakPagefileUsage );
    }
#endif

    SafeDelete ( pIndex );
    sphRTDone ();
}

コード例 #2

ファイルを表示

ファイル: sphinxutils.cpp プロジェクト: agibralter/sphinx

ISphTokenizer * sphConfTokenizer ( const CSphConfigSection & hIndex, CSphString & sError )
{
	// charset_type
	CSphScopedPtr<ISphTokenizer> pTokenizer ( NULL );

	if ( !hIndex("charset_type") || hIndex["charset_type"]=="sbcs" )
	{
		pTokenizer = sphCreateSBCSTokenizer ();

	} else if ( hIndex["charset_type"]=="utf-8" )
	{
		pTokenizer = hIndex("ngram_chars")
			? sphCreateUTF8NgramTokenizer ()
			: sphCreateUTF8Tokenizer ();

	} else
	{
		sError.SetSprintf ( "unknown charset type '%s'", hIndex["charset_type"].cstr() );
		return NULL;
	}

	assert ( pTokenizer.Ptr() );

	// charset_table
	if ( hIndex("charset_table") )
		if ( !pTokenizer->SetCaseFolding ( hIndex["charset_table"].cstr(), sError ) )
	{
		sError.SetSprintf ( "'charset_table': %s", sError.cstr() );
		return NULL;
	}

	// min_word_len
	int iMinWordLen = hIndex("min_word_len") ? Max ( hIndex["min_word_len"].intval(), 0 ) : 0;
	if ( iMinWordLen )
		pTokenizer->SetMinWordLen ( iMinWordLen );

	// ngram_chars
	if ( hIndex("ngram_chars") )
		if ( !pTokenizer->SetNgramChars ( hIndex["ngram_chars"].cstr(), sError ) )
	{
		sError.SetSprintf ( "'ngram_chars': %s", sError.cstr() );
		return NULL;
	}

	// ngram_len
	int iNgramLen = hIndex("ngram_len") ? Max ( hIndex["ngram_len"].intval(), 0 ) : 0;
	if ( iNgramLen )
		pTokenizer->SetNgramLen ( iNgramLen );

	// synonyms
	CSphVariant * pExceptions = hIndex("exceptions"); // new option name
	if ( !pExceptions )
		pExceptions = hIndex("synonyms"); // deprecated option name

	if ( pExceptions )
		if ( !pTokenizer->LoadSynonyms ( pExceptions->cstr(), sError ) )
	{
		sError.SetSprintf ( "'exceptions': %s", sError.cstr() );
		return NULL;
	}

	// phrase boundaries
	int iBoundaryStep = hIndex("phrase_boundary_step") ? Max ( hIndex["phrase_boundary_step"].intval(), 0 ) : 0;
	if ( iBoundaryStep>0 && hIndex("phrase_boundary") )
		if ( !pTokenizer->SetBoundary ( hIndex["phrase_boundary"].cstr(), sError ) )
	{
		sError.SetSprintf ( "'phrase_boundary': %s", sError.cstr() );
		return NULL;
	}

	// ignore_chars
	if ( hIndex("ignore_chars") )
		if ( !pTokenizer->SetIgnoreChars ( hIndex["ignore_chars"].cstr(), sError ) )
	{
		sError.SetSprintf ( "'ignore_chars': %s", sError.cstr() );
		return NULL;
	}

	return pTokenizer.LeakPtr();
}