bool sphFixupIndexSettings ( CSphIndex * pIndex, const CSphConfigSection & hIndex, CSphString & sError ) { bool bTokenizerSpawned = false; if ( !pIndex->GetTokenizer () ) { CSphTokenizerSettings tSettings; if ( !sphConfTokenizer ( hIndex, tSettings, sError ) ) return false; ISphTokenizer * pTokenizer = ISphTokenizer::Create ( tSettings, sError ); if ( !pTokenizer ) return false; bTokenizerSpawned = true; pIndex->SetTokenizer ( pTokenizer ); } if ( !pIndex->GetDictionary () ) { CSphDictSettings tSettings; sphConfDictionary ( hIndex, tSettings ); CSphDict * pDict = sphCreateDictionaryCRC ( tSettings, pIndex->GetTokenizer (), sError ); if ( !pDict ) return false; pIndex->SetDictionary ( pDict ); } if ( bTokenizerSpawned ) { ISphTokenizer * pTokenizer = pIndex->LeakTokenizer (); ISphTokenizer * pTokenFilter = ISphTokenizer::CreateTokenFilter ( pTokenizer, pIndex->GetDictionary ()->GetMultiWordforms () ); pIndex->SetTokenizer ( pTokenFilter ? pTokenFilter : pTokenizer ); } if ( !pIndex->IsStripperInited () ) { CSphIndexSettings tSettings = pIndex->GetSettings (); if ( hIndex ( "html_strip" ) ) { tSettings.m_bHtmlStrip = hIndex.GetInt ( "html_strip" )!=0; tSettings.m_sHtmlIndexAttrs = hIndex.GetStr ( "html_index_attrs" ); tSettings.m_sHtmlRemoveElements = hIndex.GetStr ( "html_remove_elements" ); } pIndex->Setup ( tSettings ); } return true; }
int main () { // threads should be initialized before memory allocations char cTopOfMainStack; sphThreadInit(); MemorizeStack ( &cTopOfMainStack ); CSphString sError; CSphDictSettings tDictSettings; ISphTokenizer * pTok = sphCreateUTF8Tokenizer(); CSphDict * pDict = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt1" ); CSphSource * pSrc = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=0", pTok, pDict ); ISphTokenizer * pTok2 = sphCreateUTF8Tokenizer(); CSphDict * pDict2 = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt2" ); CSphSource * pSrc2 = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=1", pTok2, pDict2 ); CSphSchema tSrcSchema; if ( !pSrc->UpdateSchema ( &tSrcSchema, sError ) ) sphDie ( "update-schema failed: %s", sError.cstr() ); CSphSchema tSchema; // source schema must be all dynamic attrs; but index ones must be static tSchema.m_dFields = tSrcSchema.m_dFields; for ( int i=0; i<tSrcSchema.GetAttrsCount(); i++ ) tSchema.AddAttr ( tSrcSchema.GetAttr(i), false ); CSphConfigSection tRTConfig; sphRTInit(); sphRTConfigure ( tRTConfig, true ); SmallStringHash_T< CSphIndex * > dTemp; sphReplayBinlog ( dTemp, 0 ); ISphRtIndex * pIndex = sphCreateIndexRT ( tSchema, "testrt", 32*1024*1024, "data/dump", false ); pIndex->SetTokenizer ( pTok ); // index will own this pair from now on pIndex->SetDictionary ( pDict ); if ( !pIndex->Prealloc ( false, false, sError ) ) sphDie ( "prealloc failed: %s", pIndex->GetLastError().cstr() ); g_pIndex = pIndex; // initial indexing int64_t tmStart = sphMicroTimer(); SphThread_t t1, t2; sphThreadCreate ( &t1, IndexingThread, pSrc ); sphThreadCreate ( &t2, IndexingThread, pSrc2 ); sphThreadJoin ( &t1 ); sphThreadJoin ( &t2 ); #if 0 // update tParams.m_sQuery = "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM rt2 WHERE id<=10000"; SetupIndexing ( pSrc, tParams ); DoIndexing ( pSrc, pIndex ); #endif // search DoSearch ( pIndex ); // shutdown index (should cause dump) int64_t tmShutdown = sphMicroTimer(); #if SPH_ALLOCS_PROFILER printf ( "pre-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() ); #endif SafeDelete ( pIndex ); #if SPH_ALLOCS_PROFILER printf ( "post-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() ); #endif int64_t tmEnd = sphMicroTimer(); printf ( "shutdown done in %d.%03d sec\n", (int)((tmEnd-tmShutdown)/1000000), (int)(((tmEnd-tmShutdown)%1000000)/1000) ); printf ( "total with shutdown %d.%03d sec, %.2f MB/sec\n", (int)((tmEnd-tmStart)/1000000), (int)(((tmEnd-tmStart)%1000000)/1000), g_fTotalMB*1000000.0f/(tmEnd-tmStart) ); #if SPH_DEBUG_LEAKS || SPH_ALLOCS_PROFILER sphAllocsStats(); #endif #if USE_WINDOWS PROCESS_MEMORY_COUNTERS pmc; HANDLE hProcess = OpenProcess ( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, GetCurrentProcessId() ); if ( hProcess && GetProcessMemoryInfo ( hProcess, &pmc, sizeof(pmc)) ) { printf ( "--- peak-wss=%d, peak-pagefile=%d\n", (int)pmc.PeakWorkingSetSize, (int)pmc.PeakPagefileUsage ); } #endif SafeDelete ( pIndex ); sphRTDone (); }