Ejemplo n.º 1
0
void createDocument(Document& doc, int n, int numFields) {
    doc.clear();
    StringBuffer sb;
    sb.append( _T("a"));
    sb.appendInt(n);
    doc.add(* _CLNEW Field( _T("field1"), sb.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED));
    sb.append(_T(" b"));
    sb.appendInt(n);
    for (int i = 1; i < numFields; i++) {
        TCHAR buf[10];
        _sntprintf(buf,10,_T("field%d"), i+1);
        doc.add(* _CLNEW Field(buf, sb.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED));
    }
}
Ejemplo n.º 2
0
_LUCENE_THREAD_FUNC(atomicIndexTest, _writer){
  IndexWriter* writer= (IndexWriter*)_writer;
  uint64_t stopTime = Misc::currentTimeMillis() + 1000*ATOMIC_SEARCH_RUN_TIME_SEC;
  int count = 0;
  try {
    while(Misc::currentTimeMillis() < stopTime && !atomicSearchFailed) {
      // Update all 100 docs...
      TCHAR buf[30];
      StringBuffer sb;
      for(int i=0; i<100; i++) {
        Document d;
        _i64tot(rand(), buf, 10);

        sb.clear();
        English::IntToEnglish(i+10*count, &sb);
        d.add(*_CLNEW Field(_T("contents"), sb.getBuffer() , Field::STORE_NO | Field::INDEX_TOKENIZED));

        _i64tot(i,buf,10);
        d.add(*_CLNEW Field(_T("id"), buf, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
        Term* t = _CLNEW Term(_T("id"), buf);
        writer->updateDocument(t, &d);
        _CLDECDELETE(t);
      }

      count++;
    }
  } catch (CLuceneError& e) {
    fprintf(stderr, "err 1: #%d: %s\n", e.number(), e.what());
    atomicSearchFailed = true;
  }

  _LUCENE_THREAD_FUNC_RETURN(0);
}
Ejemplo n.º 3
0
	void PhraseScorer::explain(int32_t _doc, Explanation* tfExplanation) {
		while (next() && doc() < _doc){
		}

		float_t phraseFreq = (doc() == _doc) ? freq : 0.0f;
		tfExplanation->setValue(getSimilarity()->tf(phraseFreq));

		StringBuffer buf;
		buf.append(_T("tf(phraseFreq="));
		buf.appendFloat(phraseFreq,2);
		buf.append(_T(")"));
		tfExplanation->setDescription(buf.getBuffer());
	}
Ejemplo n.º 4
0
bool Lexer::ReadInclusiveRange(const TCHAR prev, QueryToken* token) {
   int ch = prev;
   StringBuffer range;
   range.appendChar(ch);

   while(!reader->Eos()) {
      ch = reader->GetNext();
	  if ( ch == -1 )
		break;
      range.appendChar(ch);

      if(ch == ']'){
         token->set(range.getBuffer(), QueryToken::RANGEIN);
         return true;
      }
   }
   queryparser->throwParserException(_T("Unterminated inclusive range! %d %d::%d"),' ',
      reader->Column(),reader->Column());
   return false;
}
Ejemplo n.º 5
0
bool Lexer::ReadQuoted(const UChar inCharPrev, QueryToken* outQueryToken)
{
	int ch = inCharPrev;
	StringBuffer quoted;
	quoted.appendChar(ch);

	while (!reader->Eos()) {
		ch = reader->GetNext();
		if (ch == -1)
			break;
		quoted.appendChar(ch);
		if (ch == '"') {
			outQueryToken->set(quoted.getBuffer(), 0, quoted.length(), QueryToken::QUOTED);
			return true;
		}
	}
	gLog.log(eTypLogError, "Err > Lexer: Unterminated string at %d::%d", reader->Column(), reader->Line());

	return false;
}
Ejemplo n.º 6
0
bool Lexer::ReadFullField(const UChar inCharPrev, QueryToken* outQueryToken)
{
	int ch = inCharPrev;
	StringBuffer range;
	range.appendChar(ch);

	while (!reader->Eos()) {
		ch = reader->GetNext();
		if (ch == -1)
			break;
		range.appendChar(ch);
		if (ch == '>') {
			outQueryToken->set(range.getBuffer(), 0, range.length(), QueryToken::FULLFIELD);
			return true;
		}
	}
	gLog.log(eTypLogError, "Err > Lexer: Unterminated exclusive range at %d::%d", reader->Column(), reader->Line());

	return false;
}
Ejemplo n.º 7
0
bool Lexer::ReadQuoted(const TCHAR prev, QueryToken* token) {
   int ch = prev;
   StringBuffer quoted;
   quoted.appendChar(ch);

   while(!reader->Eos()) {
      ch = reader->GetNext();

	  if (ch==-1)
		break;

      quoted.appendChar(ch);

      if(ch == '"'){
         token->set(quoted.getBuffer(), QueryToken::QUOTED);
         return true;
      }
   }
   queryparser->throwParserException(_T("Unterminated string! %d %d::%d"),' ',
      reader->Column(),reader->Column());
   return false;
}
Ejemplo n.º 8
0
/*
  Run one indexer and 2 searchers against single index as
  stress test.
 */
void runThreadingTests(CuTest* tc, Directory& directory){

  SimpleAnalyzer ANALYZER;
  IndexWriter writer(&directory, &ANALYZER, true);

  // Establish a base index of 100 docs:
  StringBuffer sb;
  TCHAR buf[10];
  for(int i=0;i<100;i++) {
    Document d;
    _i64tot(i,buf,10);
    d.add(*_CLNEW Field(_T("id"), buf, Field::STORE_YES | Field::INDEX_UNTOKENIZED));

    sb.clear();
    English::IntToEnglish(i, &sb);
    d.add(*_CLNEW Field(_T("contents"), sb.getBuffer(), Field::STORE_NO | Field::INDEX_TOKENIZED));
    writer.addDocument(&d);
  }
  writer.flush();

  //read using multiple threads...
  atomicSearchThreads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, 4);
  atomicSearchThreads[0] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer);
  atomicSearchThreads[1] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer);
  atomicSearchThreads[2] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory);
  atomicSearchThreads[3] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory);

  for ( int i=0;i<4;i++ ){
    _LUCENE_THREAD_JOIN(atomicSearchThreads[i]);
  }
  _CLDELETE_ARRAY(atomicSearchThreads);

  writer.close();

  CuAssert(tc, _T("hit unexpected exception in one of the threads\n"), !atomicSearchFailed);
}
Ejemplo n.º 9
0
void FileDocument(const char* f, Document* doc){

    TCHAR tf[CL_MAX_DIR];
    STRCPY_AtoT(tf,f,CL_MAX_DIR);
    doc->add( *_CLNEW Field(_T("path"), tf, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) );

    FILE* fh = fopen(f,"r");
	if ( fh != NULL ){
		StringBuffer str;
		char abuf[1024];
		TCHAR tbuf[1024];
		size_t r;
		do{
			r = fread(abuf,1,1023,fh);
			abuf[r]=0;
			STRCPY_AtoT(tbuf,abuf,r);
			tbuf[r]=0;
			str.append(tbuf);
		}while(r>0);
		fclose(fh);

		doc->add( *_CLNEW Field(_T("contents"), str.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED) );
	}
}
Ejemplo n.º 10
0
bool Lexer::ReadTerm(const TCHAR prev, QueryToken* token) {
   int ch = prev;
   bool completed = false;
   int32_t asteriskCount = 0;
   bool hasQuestion = false;

   StringBuffer val;
   TCHAR buf[3]; //used for readescaped

   while(true) {
      switch(ch) {
		  case -1:
			  break;
         case '\\':
         {
            if ( ReadEscape(ch, buf) )
                val.append( buf );
			else
				return false;
         }
         break;

         case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING:
            asteriskCount++;
            val.appendChar(ch);
            break;
         case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR:
            hasQuestion = true;
            val.appendChar(ch);
            break;
         case '\n':
         case '\t':
         case ' ':
         case '+':
         case '-':
         case '!':
         case '(':
         case ')':
         case ':':
         case '^':
         case '[':
         case ']':
         case '{':
         case '}':
         case '~':
         case '"':
            // create new QueryToken
            reader->UnGet();
            completed = true;
            break;
         default:
            val.appendChar(ch);
            break;
   // end of switch
      }

      if(completed || ch==-1 || reader->Eos() )
         break;
      else
         ch = reader->GetNext();
   }

   // create new QueryToken
   if(hasQuestion)
      token->set(val.getBuffer(), QueryToken::WILDTERM);
   else if(asteriskCount == 1 && val.getBuffer()[val.length() - 1] == '*')
      token->set(val.getBuffer(), QueryToken::PREFIXTERM);
   else if(asteriskCount > 0)
      token->set(val.getBuffer(), QueryToken::WILDTERM);
   else if( _tcsicmp(val.getBuffer(), _T("AND"))==0 || _tcscmp(val.getBuffer(), _T("&&"))==0 )
      token->set(val.getBuffer(), QueryToken::AND_);
   else if( _tcsicmp(val.getBuffer(), _T("OR"))==0 || _tcscmp(val.getBuffer(), _T("||"))==0)
      token->set(val.getBuffer(), QueryToken::OR);
   else if( _tcsicmp(val.getBuffer(), _T("NOT"))==0 )
      token->set(val.getBuffer(), QueryToken::NOT);
   else {
      bool isnum = true;
      int32_t nlen=val.length();
      for (int32_t i=0;i<nlen;++i) {
         TCHAR ch=val.getBuffer()[i];
         if ( _istalpha(ch) ) {
            isnum=false;
            break;
         }
      }

      if ( isnum )
         token->set(val.getBuffer(), QueryToken::NUMBER);
      else
         token->set(val.getBuffer(), QueryToken::TERM);
   }
   return true;
}
Ejemplo n.º 11
0
bool Lexer::ReadTerm(const UChar inCharPrev, QueryToken* outQueryToken)
{
	int ch = inCharPrev;
	bool completed = false;
	int32_t asteriskCount = 0;
	bool hasQuestion = false;

	StringBuffer val;
	UChar buf[3]; //used for readescaped

	while (true) {
		switch (ch) {
		case -1:
			break;
		case '\\': {
			if (ReadEscape(ch, buf))
				val.append(buf);
			else
				return false;
		}
		break;

		case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING:
			asteriskCount++;
			val.appendChar(ch);
			break;
		case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR:
			hasQuestion = true;
			val.appendChar(ch);
			break;
		case '\n':
		case '\t':
		case ' ':
		case '+':
		case '-':
		case '!':
		case '(':
		case ')':
		case ':':
		case '^':
		case '[':
		case ']':
		case '{':
		case '}':
		case '<':
		case '>':
		case '~':
		case '"':
			// create new QueryToken
			reader->UnGet();
			completed = true;
			break;
		default:
			val.appendChar(ch);
			break;
			// end of switch
		}

		if (completed || ch == -1 || reader->Eos())
			break;
		else
			ch = reader->GetNext();
	}

	// Create new QueryToken
	const UChar *uStr = val.getBuffer();
	if (hasQuestion)
		outQueryToken->set(uStr, QueryToken::WILDTERM);
	else if (asteriskCount == 1 && uStr[val.length() - 1] == '*')
		outQueryToken->set(uStr, QueryToken::PREFIXTERM);
	else if (asteriskCount > 0)
		outQueryToken->set(uStr, QueryToken::WILDTERM);
	else if (_tcsicmp(uStr, cStrQueryTokenAnd1) == 0 || _tcscmp(uStr, cStrQueryTokenAnd2) == 0)
		outQueryToken->set(uStr, QueryToken::AND_);
	else if (_tcsicmp(uStr, cStrQueryTokenOr1) == 0 || _tcscmp(uStr, cStrQueryTokenOr2) == 0)
		outQueryToken->set(uStr, QueryToken::OR);
	else if (_tcsicmp(uStr, cStrQueryTokenNot) == 0)
		outQueryToken->set(uStr, QueryToken::NOT);
	else {
		bool isnum = true;
		int32_t nlen = val.length();
		for (int32_t i = 0;i < nlen; ++i) {
			UChar ch = uStr[i];
			if (_istalpha(ch)) {
				isnum = false;
				break;
			}
		}
		if (isnum)
			outQueryToken->set(val.getBuffer(), 0, val.length(), QueryToken::NUMBER);
		else
			outQueryToken->set(val.getBuffer(), 0, val.length(), QueryToken::TERM);
	}

	return true;
}