Beispiel #1
0
void UrlBreakInit ( const char * sDict, bool bVerbose )
{
	FILE * fp = fopen ( sDict, "rb" );
	if ( !fp )
		sphDie ( "failed to open %s", sDict );

	int iEntries = 0, iSumFreqs = 0;
	char sBuf[512];
	sBuf[0] = 0;
	while ( fgets ( sBuf, sizeof(sBuf), fp ) )
	{
		// extract keyword
		char * p = sBuf;
		while ( *p && *p!=' ' )
			p++;
		if ( *p!=' ' )
			sphDie ( "bad freqdict line: no space" );
		*p++ = 0;

		// extract freq
		int iFreq = atoi(p);
		if ( !iFreq )
			sphDie ( "bad freqdict line: zero freq" );
		iSumFreqs += iFreq;

#if 0
		// only keep frequent-enough words
		if ( iFreq < MIN_FREQ )
			continue;
#endif

		// only keep all-latin words
		bool bLatin = true;
		char * s = sBuf;
		while ( *s )
		{
			if ( !( *s>='a' && *s<='z' ) && !( *s>='0' && *s<='9' ) )
			{
				bLatin = false;
				break;
			}
			s++;
		}
		if ( !bLatin )
			continue;

		// hash it
		g_LM.AddWord ( sBuf, iFreq );
		iEntries++;
	}

	fclose ( fp );
	g_LM.SetTotal ( iSumFreqs );
	if ( bVerbose )
		printf ( "kept %d entries, total %d hits\n", iEntries, iSumFreqs );
}
Beispiel #2
0
void SetupIndexing ( CSphSource_MySQL * pSrc, const CSphSourceParams_MySQL & tParams )
{
    CSphString sError;
    if ( !pSrc->Setup ( tParams ) )
        sphDie ( "setup failed" );
    if ( !pSrc->Connect ( sError ) )
        sphDie ( "connect failed: %s", sError.cstr() );
    if ( !pSrc->IterateStart ( sError ) )
        sphDie ( "iterate-start failed: %s", sError.cstr() );
}
void sphAllocsCheck ()
{
	for ( CSphMemHeader * pHeader=g_pAllocs; pHeader; pHeader=pHeader->m_pNext )
	{
		BYTE * pBlock = (BYTE*) pHeader;

		if (!( pHeader->m_uMagic==MEMORY_MAGIC_ARRAY || pHeader->m_uMagic==MEMORY_MAGIC_PLAIN ))
			sphDie ( "corrupted header in block %d allocated at %s(%d)",
				pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );

		if ( *(DWORD*)( pBlock+pHeader->m_iSize+sizeof(CSphMemHeader) ) != MEMORY_MAGIC_END )
			sphDie ( "out-of-bounds write beyond block %d allocated at %s(%d)",
				pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );
	}
}
Beispiel #4
0
void StripStdin ( const char * sIndexAttrs, const char * sRemoveElements )
{
	CSphString sError;
	CSphHTMLStripper tStripper ( true );
	if ( !tStripper.SetIndexedAttrs ( sIndexAttrs, sError )
		|| !tStripper.SetRemovedElements ( sRemoveElements, sError ) )
			sphDie ( "failed to configure stripper: %s", sError.cstr() );

	CSphVector<BYTE> dBuffer;
	while ( !feof(stdin) )
	{
		char sBuffer[1024];
		int iLen = fread ( sBuffer, 1, sizeof(sBuffer), stdin );
		if ( !iLen )
			break;

		int iPos = dBuffer.GetLength();
		dBuffer.Resize ( iPos+iLen );
		memcpy ( &dBuffer[iPos], sBuffer, iLen );
	}
	dBuffer.Add ( 0 );

	tStripper.Strip ( &dBuffer[0] );
	fprintf ( stdout, "dumping stripped results...\n%s\n", &dBuffer[0] );
}
Beispiel #5
0
void * sphDebugNew ( size_t iSize, const char * sFile, int iLine, bool bArray )
{
	BYTE * pBlock = (BYTE*) ::malloc ( iSize+sizeof(CSphMemHeader)+sizeof(DWORD) );
	if ( !pBlock )
		sphDie ( "out of memory (unable to allocate "UINT64_FMT" bytes)", (uint64_t)iSize ); // FIXME! this may fail with malloc error too

	*(DWORD*)( pBlock+iSize+sizeof(CSphMemHeader) ) = MEMORY_MAGIC_END;
	g_tAllocsMutex.Lock();

	CSphMemHeader * pHeader = (CSphMemHeader*) pBlock;
	pHeader->m_uMagic = bArray ? MEMORY_MAGIC_ARRAY : MEMORY_MAGIC_PLAIN;
	pHeader->m_sFile = sFile;
	pHeader->m_iLine = iLine;
	pHeader->m_iSize = iSize;
	pHeader->m_iAllocId = ++g_iAllocsId;
	pHeader->m_pNext = g_pAllocs;
	pHeader->m_pPrev = NULL;
	if ( g_pAllocs )
	{
		assert ( !g_pAllocs->m_pPrev );
		g_pAllocs->m_pPrev = pHeader;
	}
	g_pAllocs = pHeader;

	g_iCurAllocs++;
	g_iCurBytes += iSize;

	g_iTotalAllocs++;
	g_iPeakAllocs = Max ( g_iPeakAllocs, g_iCurAllocs );
	g_iPeakBytes = Max ( g_iPeakBytes, g_iCurBytes );

	g_tAllocsMutex.Unlock();
	return pHeader+1;
}
Beispiel #6
0
void * sphDebugNew ( size_t iSize )
{
	BYTE * pBlock = (BYTE*) ::malloc ( iSize+sizeof(size_t)*2 );
	if ( !pBlock )
		sphDie ( "out of memory (unable to allocate %"PRIu64" bytes)", (uint64_t)iSize ); // FIXME! this may fail with malloc error too

	const int iMemType = sphMemStatGet();
	assert ( iMemType>=0 && iMemType<Memory::SPH_MEM_TOTAL );

	g_tAllocsMutex.Lock ();

	g_iAllocsId++;
	g_iCurAllocs++;
	g_iCurBytes += iSize;
	g_iTotalAllocs++;
	g_iPeakAllocs = Max ( g_iCurAllocs, g_iPeakAllocs );
	g_iPeakBytes = Max ( g_iCurBytes, g_iPeakBytes );

	g_dMemCategoryStat[iMemType].m_iSize += iSize;
	g_dMemCategoryStat[iMemType].m_iCount++;

	g_tAllocsMutex.Unlock ();

	size_t * pData = (size_t *)pBlock;
	pData[0] = iSize;
	pData[1] = iMemType;

	return pBlock + sizeof(size_t)*2;
}
void * operator new [] ( size_t iSize )
{
	void * pResult = ::malloc ( iSize );
	if ( !pResult )
		sphDie ( "out of memory (unable to allocate %"PRIu64" bytes)", (uint64_t)iSize ); // FIXME! this may fail with malloc error too
	return pResult;
}
const char * sphLoadConfig ( const char * sOptConfig, bool bQuiet, CSphConfigParser & cp )
{
	// fallback to defaults if there was no explicit config specified
	while ( !sOptConfig )
	{
#ifdef SYSCONFDIR
		sOptConfig = SYSCONFDIR "/sphinx.conf";
		if ( sphIsReadable(sOptConfig) )
			break;
#endif

		sOptConfig = "./sphinx.conf";
		if ( sphIsReadable(sOptConfig) )
			break;

		sOptConfig = NULL;
		break;
	}

	if ( !sOptConfig )
		sphDie ( "no readable config file (looked in "
#ifdef SYSCONFDIR
		SYSCONFDIR "/sphinx.conf, "
#endif
		"./sphinx.conf)" );

	if ( !bQuiet )
		fprintf ( stdout, "using config file '%s'...\n", sOptConfig );

	// load config
	if ( !cp.Parse ( sOptConfig ) )
		sphDie ( "failed to parse config file '%s'", sOptConfig );

	CSphConfig & hConf = cp.m_tConf;
	if ( !hConf ( "index" ) )
		sphDie ( "no indexes found in config file '%s'", sOptConfig );

	return sOptConfig;
}
int checkParameter(Json::Value& value) {
    int ret = 0;
    
    if (value["merge"].asBool() && value["indexes"].size() != 2) {
        printf("merge parame error\n   --merge mainIndex incIndex\n");
        usage();
        exit(-2);
    }
    
    if (value["indexes"].size() == 0) {
        printf("parame error\n    default should give a index\n");
        usage();
        exit(-3);
    }
    
    if (!sphIsReadable(value["config"].asString().c_str())) {
        sphDie("config file '%s' does not exist or is not readable",
                value["config"].asString().c_str());
    }
    
    return ret;
}
Beispiel #10
0
void CharsetFold ( CSphIndex * pIndex, FILE * fp )
{
	CSphVector<BYTE> sBuf1 ( 16384 );
	CSphVector<BYTE> sBuf2 ( 16384 );

	bool bUtf = pIndex->GetTokenizer()->IsUtf8();
	if ( !bUtf )
		sphDie ( "sorry, --fold vs SBCS is not supported just yet" );

	CSphLowercaser tLC = pIndex->GetTokenizer()->GetLowercaser();

#if USE_WINDOWS
	setmode ( fileno(stdout), O_BINARY );
#endif

	int iBuf1 = 0; // how many leftover bytes from previous iteration
	while ( !feof(fp) )
	{
		int iGot = fread ( sBuf1.Begin()+iBuf1, 1, sBuf1.GetLength()-iBuf1, fp );
		if ( iGot<0 )
			sphDie ( "read error: %s", strerror(errno) );

		if ( iGot==0 )
			if ( feof(fp) )
				if ( iBuf1==0 )
					break;


		const BYTE * pIn = sBuf1.Begin();
		const BYTE * pInMax = pIn + iBuf1 + iGot;

		if ( pIn==pInMax && feof(fp) )
			break;

		// tricky bit
		// on full buffer, and not an eof, terminate a bit early
		// to avoid codepoint vs buffer boundary issue
		if ( ( iBuf1+iGot )==sBuf1.GetLength() && iGot!=0 )
			pInMax -= 16;

		// do folding
		BYTE * pOut = sBuf2.Begin();
		BYTE * pOutMax = pOut + sBuf2.GetLength() - 16;
		while ( pIn < pInMax )
		{
			int iCode = sphUTF8Decode ( pIn );
			if ( iCode==0 )
				pIn++; // decoder does not do that!
			assert ( iCode>=0 );

			if ( iCode!=0x09 && iCode!=0x0A && iCode!=0x0D )
			{
				iCode = tLC.ToLower ( iCode ) & 0xffffffUL;
				if ( !iCode )
					iCode = 0x20;
			}

			pOut += sphUTF8Encode ( pOut, iCode );
			if ( pOut>=pOutMax )
			{
				fwrite ( sBuf2.Begin(), 1, pOut-sBuf2.Begin(), stdout );
				pOut = sBuf2.Begin();
			}
		}
		fwrite ( sBuf2.Begin(), 1, pOut-sBuf2.Begin(), stdout );

		// now move around leftovers
		BYTE * pRealEnd = sBuf1.Begin() + iBuf1 + iGot;
		if ( pIn < pRealEnd )
		{
			iBuf1 = pRealEnd - pIn;
			memmove ( sBuf1.Begin(), pIn, iBuf1 );
		}
	}
}
Beispiel #11
0
void DoIndexing ( CSphSource * pSrc, ISphRtIndex * pIndex )
{
    CSphString sError;
    CSphVector<DWORD> dMvas;

    int64_t tmStart = sphMicroTimer ();
    int64_t tmAvgCommit = 0;
    int64_t tmMaxCommit = 0;
    int iCommits = 0;
    for ( ;; )
    {
        if ( !pSrc->IterateDocument ( sError ) )
            sphDie ( "iterate-document failed: %s", sError.cstr() );
        ISphHits * pHitsNext = pSrc->IterateHits ( sError );
        if ( !sError.IsEmpty() )
            sphDie ( "iterate-hits failed: %s", sError.cstr() );

        if ( pSrc->m_tDocInfo.m_iDocID )
            pIndex->AddDocument ( pHitsNext, pSrc->m_tDocInfo, NULL, dMvas, sError );

        if ( ( pSrc->GetStats().m_iTotalDocuments % COMMIT_STEP )==0 || !pSrc->m_tDocInfo.m_iDocID )
        {
            int64_t tmCommit = sphMicroTimer();
            pIndex->Commit ();
            tmCommit = sphMicroTimer()-tmCommit;

            iCommits++;
            tmAvgCommit += tmCommit;
            tmMaxCommit = Max ( tmMaxCommit, tmCommit );

            if ( !pSrc->m_tDocInfo.m_iDocID )
            {
                tmAvgCommit /= iCommits;
                break;
            }
        }

        if (!( pSrc->GetStats().m_iTotalDocuments % 100 ))
            printf ( "%d docs\r", (int)pSrc->GetStats().m_iTotalDocuments );

        static bool bOnce = true;
        if ( iCommits*COMMIT_STEP>=5000 && bOnce )
        {
            printf ( "\n" );
            DoSearch ( pIndex );
            bOnce = false;
        }
    }

    pSrc->Disconnect();

    int64_t tmEnd = sphMicroTimer ();
    float fTotalMB = (float)pSrc->GetStats().m_iTotalBytes/1000000.0f;
    printf ( "commit-step %d, %d docs, %d bytes, %d.%03d sec, %.2f MB/sec\n",
             COMMIT_STEP,
             (int)pSrc->GetStats().m_iTotalDocuments,
             (int)pSrc->GetStats().m_iTotalBytes,
             (int)((tmEnd-tmStart)/1000000), (int)(((tmEnd-tmStart)%1000000)/1000),
             fTotalMB*1000000.0f/(tmEnd-tmStart) );
    printf ( "commit-docs %d, avg %d.%03d msec, max %d.%03d msec\n", COMMIT_STEP,
             (int)(tmAvgCommit/1000), (int)(tmAvgCommit%1000),
             (int)(tmMaxCommit/1000), (int)(tmMaxCommit%1000) );
    g_fTotalMB += fTotalMB;
}
Beispiel #12
0
int main ()
{
    // threads should be initialized before memory allocations
    char cTopOfMainStack;
    sphThreadInit();
    MemorizeStack ( &cTopOfMainStack );

    CSphString sError;
    CSphDictSettings tDictSettings;

    ISphTokenizer * pTok = sphCreateUTF8Tokenizer();
    CSphDict * pDict = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt1" );
    CSphSource * pSrc = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=0", pTok, pDict );

    ISphTokenizer * pTok2 = sphCreateUTF8Tokenizer();
    CSphDict * pDict2 = sphCreateDictionaryCRC ( tDictSettings, pTok, sError, "rt2" );
    CSphSource * pSrc2 = SpawnSource ( "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM posting WHERE id<=10000 AND id%2=1", pTok2, pDict2 );

    CSphSchema tSrcSchema;
    if ( !pSrc->UpdateSchema ( &tSrcSchema, sError ) )
        sphDie ( "update-schema failed: %s", sError.cstr() );

    CSphSchema tSchema; // source schema must be all dynamic attrs; but index ones must be static
    tSchema.m_dFields = tSrcSchema.m_dFields;
    for ( int i=0; i<tSrcSchema.GetAttrsCount(); i++ )
        tSchema.AddAttr ( tSrcSchema.GetAttr(i), false );

    CSphConfigSection tRTConfig;
    sphRTInit();
    sphRTConfigure ( tRTConfig, true );
    SmallStringHash_T< CSphIndex * > dTemp;
    sphReplayBinlog ( dTemp, 0 );
    ISphRtIndex * pIndex = sphCreateIndexRT ( tSchema, "testrt", 32*1024*1024, "data/dump", false );
    pIndex->SetTokenizer ( pTok ); // index will own this pair from now on
    pIndex->SetDictionary ( pDict );
    if ( !pIndex->Prealloc ( false, false, sError ) )
        sphDie ( "prealloc failed: %s", pIndex->GetLastError().cstr() );
    g_pIndex = pIndex;

    // initial indexing
    int64_t tmStart = sphMicroTimer();

    SphThread_t t1, t2;
    sphThreadCreate ( &t1, IndexingThread, pSrc );
    sphThreadCreate ( &t2, IndexingThread, pSrc2 );
    sphThreadJoin ( &t1 );
    sphThreadJoin ( &t2 );

#if 0
    // update
    tParams.m_sQuery = "SELECT id, channel_id, UNIX_TIMESTAMP(published) published, title, UNCOMPRESS(content) content FROM rt2 WHERE id<=10000";
    SetupIndexing ( pSrc, tParams );
    DoIndexing ( pSrc, pIndex );
#endif

    // search
    DoSearch ( pIndex );

    // shutdown index (should cause dump)
    int64_t tmShutdown = sphMicroTimer();

#if SPH_ALLOCS_PROFILER
    printf ( "pre-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() );
#endif
    SafeDelete ( pIndex );
#if SPH_ALLOCS_PROFILER
    printf ( "post-shutdown allocs=%d, bytes="INT64_FMT"\n", sphAllocsCount(), sphAllocBytes() );
#endif

    int64_t tmEnd = sphMicroTimer();
    printf ( "shutdown done in %d.%03d sec\n", (int)((tmEnd-tmShutdown)/1000000), (int)(((tmEnd-tmShutdown)%1000000)/1000) );
    printf ( "total with shutdown %d.%03d sec, %.2f MB/sec\n",
             (int)((tmEnd-tmStart)/1000000), (int)(((tmEnd-tmStart)%1000000)/1000),
             g_fTotalMB*1000000.0f/(tmEnd-tmStart) );

#if SPH_DEBUG_LEAKS || SPH_ALLOCS_PROFILER
    sphAllocsStats();
#endif
#if USE_WINDOWS
    PROCESS_MEMORY_COUNTERS pmc;
    HANDLE hProcess = OpenProcess ( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, GetCurrentProcessId() );
    if ( hProcess && GetProcessMemoryInfo ( hProcess, &pmc, sizeof(pmc)) )
    {
        printf ( "--- peak-wss=%d, peak-pagefile=%d\n", (int)pmc.PeakWorkingSetSize, (int)pmc.PeakPagefileUsage );
    }
#endif

    SafeDelete ( pIndex );
    sphRTDone ();
}
Beispiel #13
0
void sphDebugDelete ( void * pPtr, bool bArray )
{
	if ( !pPtr )
		return;
	g_tAllocsMutex.Lock();

	CSphMemHeader * pHeader = ((CSphMemHeader*)pPtr)-1;
	switch ( pHeader->m_uMagic )
	{
		case MEMORY_MAGIC_ARRAY:
			if ( !bArray )
				sphDie ( "delete [] on non-array block %d allocated at %s(%d)",
					pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );
			break;

		case MEMORY_MAGIC_PLAIN:
			if ( bArray )
				sphDie ( "delete on array block %d allocated at %s(%d)",
					pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );
			break;

		case MEMORY_MAGIC_DELETED:
			sphDie ( "double delete on block %d allocated at %s(%d)",
				pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );
			break;

		default:
			sphDie ( "delete on unmanaged block at 0x%08x", pPtr );
			return;
	}

	BYTE * pBlock = (BYTE*) pHeader;
	if ( *(DWORD*)( pBlock+pHeader->m_iSize+sizeof(CSphMemHeader) )!=MEMORY_MAGIC_END )
		sphDie ( "out-of-bounds write beyond block %d allocated at %s(%d)",
			pHeader->m_iAllocId, pHeader->m_sFile, pHeader->m_iLine );

	// unchain
	if ( pHeader==g_pAllocs )
		g_pAllocs = g_pAllocs->m_pNext;

	if ( pHeader->m_pPrev )
	{
		assert ( pHeader->m_pPrev->m_uMagic==MEMORY_MAGIC_PLAIN || pHeader->m_pPrev->m_uMagic==MEMORY_MAGIC_ARRAY );
		pHeader->m_pPrev->m_pNext = pHeader->m_pNext;
	}
	if ( pHeader->m_pNext )
	{
		assert ( pHeader->m_pNext->m_uMagic==MEMORY_MAGIC_PLAIN || pHeader->m_pNext->m_uMagic==MEMORY_MAGIC_ARRAY );
		pHeader->m_pNext->m_pPrev = pHeader->m_pPrev;
	}

	pHeader->m_pPrev = NULL;
	pHeader->m_pNext = NULL;

	// mark and delete
	pHeader->m_uMagic = MEMORY_MAGIC_DELETED;

	g_iCurAllocs--;
	g_iCurBytes -= pHeader->m_iSize;

#if SPH_DEBUG_DOFREE
	::free ( pHeader );
#endif

	g_tAllocsMutex.Unlock();
}
Beispiel #14
0
int main ( int iArgs, char ** dArgs )
{
	OutputMode_e eMode = M_DEFAULT;
	bool bUseCustomCharset = false;
	CSphString sDict, sAffix, sLocale, sCharsetFile, sResult = "result.txt";

	printf ( "spelldump, an ispell dictionary dumper\n\n" );

	int i = 1;
	for ( ; i < iArgs; i++ )
	{
		if ( !strcmp ( dArgs[i], "-c" ) )
		{
			if ( ++i==iArgs ) break;
			bUseCustomCharset = true;
			sCharsetFile = dArgs[i];

		} else if ( !strcmp ( dArgs[i], "-m" ) )
		{
			if ( ++i==iArgs ) break;
			char * sMode = dArgs[i];

			if ( !strcmp ( sMode, "debug" ) )		{ eMode = M_DEBUG; continue; }
			if ( !strcmp ( sMode, "duplicates" ) )	{ eMode = M_DUPLICATES; continue; }
			if ( !strcmp ( sMode, "last" ) )		{ eMode = M_LAST; continue; }
			if ( !strcmp ( sMode, "default" ) )		{ eMode = M_DEFAULT; continue; }

			printf ( "Unrecognized mode: %s\n", sMode );
			return 1;

		} else
			break;
	}

	switch ( iArgs - i )
	{
		case 4:
			sLocale = dArgs[i + 3];
		case 3:
			sResult = dArgs[i + 2];
		case 2:
			sAffix = dArgs[i + 1];
			sDict = dArgs[i];
			break;
		default:
			printf ( "Usage: spelldump [options] <dictionary> <affix> [result] [locale-name]\n\n"
				"Options:\n"
				"-c <file>\tuse case convertion defined in <file>\n"
				"-m <mode>\toutput (conflict resolution) mode:\n"
				"\t\tdefault - try to guess the best way to resolve a conflict\n"
				"\t\tlast - choose last entry\n"
				"\t\tdebug - dump all mappings (with rules)\n"
				"\t\tduplicates - dump duplicate mappings only (with rules)\n" );
			if ( iArgs>1 )
			{
				printf ( "\n"
					"Examples:\n"
					"spelldump en.dict en.aff\n"
					"spelldump ru.dict ru.aff ru.txt ru_RU.CP1251\n"
					"spelldump ru.dict ru.aff ru.txt .1251\n" );
			}
			return 1;
	}

	printf ( "Loading dictionary...\n" );
	CISpellDict Dict;
	if ( !Dict.Load ( sDict.cstr () ) )
		sphDie ( "Error loading dictionary file '%s'\n", sDict.IsEmpty () ? "" : sDict.cstr () );

	printf ( "Loading affix file...\n" );
	CISpellAffix Affix ( sLocale.cstr (), bUseCustomCharset ? sCharsetFile.cstr () : NULL );
	if ( !Affix.Load ( sAffix.cstr () ) )
		sphDie ( "Error loading affix file '%s'\n", sAffix.IsEmpty () ? "" : sAffix.cstr () );

	if ( sResult.IsEmpty () )
		sphDie ( "No result file specified\n" );

	FILE * pFile = fopen ( sResult.cstr (), "wt" );
	if ( !pFile )
		sphDie ( "Unable to open '%s' for writing\n", sResult.cstr () );

	if ( eMode!=M_DEFAULT )
		printf ( "Output mode: %s\n", dModeName[eMode] );

	Dict.IterateStart ();
	WordMap_t tWordMap;
	const CISpellDict::CISpellDictWord * pWord = NULL;
	int nDone = 0;
	while ( ( pWord = Dict.IterateNext () )!=NULL )
	{
		EmitResult ( tWordMap, pWord->m_sWord, pWord->m_sWord );

		if ( ( ++nDone % 10 )==0 )
		{
			printf ( "\rDictionary words processed: %d", nDone );
			fflush ( stdout );
		}

		if ( pWord->m_sFlags.IsEmpty() )
			continue;

		CSphString sWord, sWordForCross;
		int iFlagLen = strlen ( pWord->m_sFlags.cstr () );
		for ( int iFlag1 = 0; iFlag1 < iFlagLen; ++iFlag1 )
			for ( int iRule1 = 0; iRule1 < Affix.GetNumRules (); ++iRule1 )
			{
				CISpellAffixRule * pRule1 = Affix.GetRule ( iRule1 );
				if ( pRule1->Flag()!=pWord->m_sFlags.cstr()[iFlag1] )
					continue;

				sWord = pWord->m_sWord;

				if ( !pRule1->Apply ( sWord ) )
					continue;

				EmitResult ( tWordMap, sWord, pWord->m_sWord, pRule1->Flag() );

				// apply other rules
				if ( !Affix.CheckCrosses() )
					continue;

				if ( !pRule1->IsCrossProduct() )
					continue;

				for ( int iFlag2 = iFlag1 + 1; iFlag2 < iFlagLen; ++iFlag2 )
					for ( int iRule2 = 0; iRule2 < Affix.GetNumRules (); ++iRule2 )
					{
						CISpellAffixRule * pRule2 = Affix.GetRule ( iRule2 );
						if ( !pRule2->IsCrossProduct () || pRule2->Flag()!=pWord->m_sFlags.cstr()[iFlag2] ||
							pRule2->IsPrefix()==pRule1->IsPrefix() )
							continue;

						sWordForCross = sWord;
						if ( pRule2->Apply ( sWordForCross ) )
							EmitResult ( tWordMap, sWordForCross, pWord->m_sWord, pRule1->Flag(), pRule2->Flag() );
					}
			}
	}
	printf ( "\rDictionary words processed: %d\n", nDone );

	// output

	CSphVector<const char *> dKeys;
	tWordMap.IterateStart();
	while ( tWordMap.IterateNext() )
		dKeys.Add ( tWordMap.IterateGetKey().cstr() );
	dKeys.Sort ( WordLess() );

	ARRAY_FOREACH ( iKey, dKeys )
	{
		const CSphVector<MapInfo_t> & dWords = tWordMap[dKeys[iKey]];
		const char * sKey = dKeys[iKey];

		switch ( eMode )
		{
			case M_LAST:
				fprintf ( pFile, "%s > %s\n", sKey, dWords.Last().m_sWord.cstr() );
				break;

			case M_EXACT_OR_LONGEST:
			{
				int iMatch = 0;
				int iLength = 0;

				ARRAY_FOREACH ( i, dWords )
				{
					if ( dWords[i].m_sWord==sKey )
					{
						iMatch = i;
						break;
					}

					int iWordLength = strlen ( dWords[i].m_sWord.cstr() );
					if ( iWordLength>iLength )
					{
						iLength = iWordLength;
						iMatch = i;
					}
				}

				fprintf ( pFile, "%s > %s\n", sKey, dWords[iMatch].m_sWord.cstr() );
				break;
			}

			case M_DUPLICATES:
				if ( dWords.GetLength()==1 ) break;
			case M_DEBUG:
				ARRAY_FOREACH ( i, dWords )
					fprintf ( pFile, "%s > %s %s/%d\n", sKey, dWords[i].m_sWord.cstr(),
						dWords[i].m_sRules, dWords.GetLength() );
				break;
		}
	}

	fclose ( pFile );

	return 0;
}
Beispiel #15
0
bool sphPluginReload ( const char * sName, CSphString & sError )
{
#if !HAVE_DLOPEN
	sError = "no dlopen(), no plugins";
	return false;
#else
	// find all plugins from the given library
	CSphScopedLock<CSphMutex> tLock ( g_tPluginMutex );

	CSphVector<PluginKey_t> dKeys;
	CSphVector<PluginDesc_c*> dPlugins;

	g_hPlugins.IterateStart();
	while ( g_hPlugins.IterateNext() )
	{
		PluginDesc_c * v = g_hPlugins.IterateGet();
		if ( v->GetLibName()==sName )
		{
			dKeys.Add ( g_hPlugins.IterateGetKey() );
			dPlugins.Add ( g_hPlugins.IterateGet() );
		}
	}

	// no plugins loaded? oops
	if ( dPlugins.GetLength()==0 )
	{
		sError.SetSprintf ( "no active plugins loaded from %s", sName );
		return false;
	}

	// load new library and check every plugin
#if !USE_WINDOWS
	PluginLib_c * pNewLib = LoadPluginLibrary ( sName, sError, true );
#else
	PluginLib_c * pNewLib = LoadPluginLibrary ( sName, sError );
#endif
	if ( !pNewLib )
		return false;

	// load all plugins
	CSphVector<PluginDesc_c*> dNewPlugins;
	ARRAY_FOREACH ( i, dPlugins )
	{
		PluginDesc_c * pDesc = NULL;
		const SymbolDesc_t * pSym = NULL;
		switch ( dKeys[i].m_eType )
		{
			case PLUGIN_RANKER:					pDesc = new PluginRanker_c ( pNewLib ); pSym = g_dSymbolsRanker; break;
			case PLUGIN_INDEX_TOKEN_FILTER:		pDesc = new PluginTokenFilter_c ( pNewLib ); pSym = g_dSymbolsTokenFilter; break;
			case PLUGIN_QUERY_TOKEN_FILTER:		pDesc = new PluginQueryTokenFilter_c ( pNewLib ); pSym = g_dSymbolsQueryTokenFilter; break;
			case PLUGIN_FUNCTION:				pDesc = new PluginUDF_c ( pNewLib, dPlugins[i]->GetUdfRetType() ); pSym = g_dSymbolsUDF; break;
			default:
				sphDie ( "INTERNAL ERROR: unknown plugin type %d in sphPluginReload()", (int)dKeys[i].m_eType );
				return false;
		}

		if ( !PluginLoadSymbols ( pDesc, pSym, pNewLib->GetHandle(), dKeys[i].m_sName.cstr(), sError ) )
		{
			pDesc->Release();
			break;
		}

		dNewPlugins.Add ( pDesc );
	}