void CCharacterScanJob::start()
{
	// open the output file for the character table
	CSString filename= NLMISC::CPath::getFullPath(STAT_GLOBALS::getOutputFilePath(getOutputPath()))+_OutputName+"tbl.csv";
	nlinfo("Opening output file: %s",filename.c_str());
	_CharTblFile=fopen(filename.c_str(),"wb");
	if (_CharTblFile==NULL)
	{
		nlwarning("Failed to open output file: %s",filename.c_str());
		_State=ERROR;
		return;
	}

	// write a header line to the _CharTblFile file
	CSString rowTxt;
	for (uint32 i=0;i<_TblCols.size();++i)
	{
		rowTxt+=_TblCols[i]+',';
	}
	fprintf(_CharTblFile,"%s\n",rowTxt.rightCrop(1).c_str());
	fflush(_CharTblFile);

	// setup the file list before the job begins
	nlinfo("Scanning for input files...");
	_Files.clear();
	getFileList(_Files);
	nlinfo("Input file scan completed: %d files found ... %s",_Files.size(),_Files.empty()?"Nothing to do!":"starting work");

	// set the state to 'WORK' meaning we've finished init and now it's ok to do some work
	_State=WORK;
}
Beispiel #2
0
/*
 * Set the result vector with strings corresponding to the input string:
 * - If inputStr is partially or completely found in the keys, all the matching <key,words> are returned;
 * - If inputStr is partially or completely in the words, all the matching <key, words> are returned.
 * The following tags can modify the behaviour of the search algorithm:
 * - ^mystring returns mystring only if it is at the beginning of a key or word
 * - mystring$ returns mystring only if it is at the end of a key or word
 * All returned words are in UTF8.
 */
void CWordsDictionary::lookup( const CSString& inputStr, CVectorSString& resultVec ) const
{
	// Prepare search string
	if ( inputStr.empty() )
		return;

	CSString searchStr = inputStr;
	bool findAtBeginning = false, findAtEnd = false;
	if ( searchStr[0] == '^' )
	{
		searchStr = searchStr.substr( 1 );
		findAtBeginning = true;
	}
	if ( searchStr[searchStr.size()-1] == '$' )
	{
		searchStr = searchStr.rightCrop( 1 );
		findAtEnd = true;
	}

	// Search
	const vector<string> &vec = reinterpret_cast<const vector<string>&>(_Keys);
//	for ( CVectorSString::const_iterator ivs=_Keys.begin(); ivs!=_Keys.end(); ++ivs )
	for ( vector<string>::const_iterator ivs=vec.begin(); ivs!=vec.end(); ++ivs )
	{
		const CSString& key = *ivs;
		string::size_type p;
		if ( (p = key.findNS( searchStr.c_str() )) != string::npos )
		{
			if ( ((!findAtBeginning) || (p==0)) && ((!findAtEnd) || (p==key.size()-searchStr.size())) )
				resultVec.push_back( makeResult( key, _Words[ivs-vec.begin()] ) );
		}
	}
	for ( CVectorSString::const_iterator ivs=_Words.begin(); ivs!=_Words.end(); ++ivs )
	{
		const CSString& word = *ivs;
		string::size_type p;
		if ( (p = word.findNS( searchStr.c_str() )) != string::npos )
		{
			if ( ((!findAtBeginning) || (p==0)) && ((!findAtEnd) || (p==word.size()-searchStr.size())) )
				resultVec.push_back( makeResult( _Keys[ivs-_Words.begin()], word ) );
		}
	}
}