void createDocument(Document& doc, int n, int numFields) { doc.clear(); StringBuffer sb; sb.append( _T("a")); sb.appendInt(n); doc.add(* _CLNEW Field( _T("field1"), sb.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED)); sb.append(_T(" b")); sb.appendInt(n); for (int i = 1; i < numFields; i++) { TCHAR buf[10]; _sntprintf(buf,10,_T("field%d"), i+1); doc.add(* _CLNEW Field(buf, sb.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED)); } }
_LUCENE_THREAD_FUNC(atomicIndexTest, _writer){ IndexWriter* writer= (IndexWriter*)_writer; uint64_t stopTime = Misc::currentTimeMillis() + 1000*ATOMIC_SEARCH_RUN_TIME_SEC; int count = 0; try { while(Misc::currentTimeMillis() < stopTime && !atomicSearchFailed) { // Update all 100 docs... TCHAR buf[30]; StringBuffer sb; for(int i=0; i<100; i++) { Document d; _i64tot(rand(), buf, 10); sb.clear(); English::IntToEnglish(i+10*count, &sb); d.add(*_CLNEW Field(_T("contents"), sb.getBuffer() , Field::STORE_NO | Field::INDEX_TOKENIZED)); _i64tot(i,buf,10); d.add(*_CLNEW Field(_T("id"), buf, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); Term* t = _CLNEW Term(_T("id"), buf); writer->updateDocument(t, &d); _CLDECDELETE(t); } count++; } } catch (CLuceneError& e) { fprintf(stderr, "err 1: #%d: %s\n", e.number(), e.what()); atomicSearchFailed = true; } _LUCENE_THREAD_FUNC_RETURN(0); }
void PhraseScorer::explain(int32_t _doc, Explanation* tfExplanation) { while (next() && doc() < _doc){ } float_t phraseFreq = (doc() == _doc) ? freq : 0.0f; tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); StringBuffer buf; buf.append(_T("tf(phraseFreq=")); buf.appendFloat(phraseFreq,2); buf.append(_T(")")); tfExplanation->setDescription(buf.getBuffer()); }
bool Lexer::ReadInclusiveRange(const TCHAR prev, QueryToken* token) { int ch = prev; StringBuffer range; range.appendChar(ch); while(!reader->Eos()) { ch = reader->GetNext(); if ( ch == -1 ) break; range.appendChar(ch); if(ch == ']'){ token->set(range.getBuffer(), QueryToken::RANGEIN); return true; } } queryparser->throwParserException(_T("Unterminated inclusive range! %d %d::%d"),' ', reader->Column(),reader->Column()); return false; }
bool Lexer::ReadQuoted(const UChar inCharPrev, QueryToken* outQueryToken) { int ch = inCharPrev; StringBuffer quoted; quoted.appendChar(ch); while (!reader->Eos()) { ch = reader->GetNext(); if (ch == -1) break; quoted.appendChar(ch); if (ch == '"') { outQueryToken->set(quoted.getBuffer(), 0, quoted.length(), QueryToken::QUOTED); return true; } } gLog.log(eTypLogError, "Err > Lexer: Unterminated string at %d::%d", reader->Column(), reader->Line()); return false; }
bool Lexer::ReadFullField(const UChar inCharPrev, QueryToken* outQueryToken) { int ch = inCharPrev; StringBuffer range; range.appendChar(ch); while (!reader->Eos()) { ch = reader->GetNext(); if (ch == -1) break; range.appendChar(ch); if (ch == '>') { outQueryToken->set(range.getBuffer(), 0, range.length(), QueryToken::FULLFIELD); return true; } } gLog.log(eTypLogError, "Err > Lexer: Unterminated exclusive range at %d::%d", reader->Column(), reader->Line()); return false; }
bool Lexer::ReadQuoted(const TCHAR prev, QueryToken* token) { int ch = prev; StringBuffer quoted; quoted.appendChar(ch); while(!reader->Eos()) { ch = reader->GetNext(); if (ch==-1) break; quoted.appendChar(ch); if(ch == '"'){ token->set(quoted.getBuffer(), QueryToken::QUOTED); return true; } } queryparser->throwParserException(_T("Unterminated string! %d %d::%d"),' ', reader->Column(),reader->Column()); return false; }
/* Run one indexer and 2 searchers against single index as stress test. */ void runThreadingTests(CuTest* tc, Directory& directory){ SimpleAnalyzer ANALYZER; IndexWriter writer(&directory, &ANALYZER, true); // Establish a base index of 100 docs: StringBuffer sb; TCHAR buf[10]; for(int i=0;i<100;i++) { Document d; _i64tot(i,buf,10); d.add(*_CLNEW Field(_T("id"), buf, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); sb.clear(); English::IntToEnglish(i, &sb); d.add(*_CLNEW Field(_T("contents"), sb.getBuffer(), Field::STORE_NO | Field::INDEX_TOKENIZED)); writer.addDocument(&d); } writer.flush(); //read using multiple threads... atomicSearchThreads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, 4); atomicSearchThreads[0] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer); atomicSearchThreads[1] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer); atomicSearchThreads[2] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory); atomicSearchThreads[3] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory); for ( int i=0;i<4;i++ ){ _LUCENE_THREAD_JOIN(atomicSearchThreads[i]); } _CLDELETE_ARRAY(atomicSearchThreads); writer.close(); CuAssert(tc, _T("hit unexpected exception in one of the threads\n"), !atomicSearchFailed); }
void FileDocument(const char* f, Document* doc){ TCHAR tf[CL_MAX_DIR]; STRCPY_AtoT(tf,f,CL_MAX_DIR); doc->add( *_CLNEW Field(_T("path"), tf, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) ); FILE* fh = fopen(f,"r"); if ( fh != NULL ){ StringBuffer str; char abuf[1024]; TCHAR tbuf[1024]; size_t r; do{ r = fread(abuf,1,1023,fh); abuf[r]=0; STRCPY_AtoT(tbuf,abuf,r); tbuf[r]=0; str.append(tbuf); }while(r>0); fclose(fh); doc->add( *_CLNEW Field(_T("contents"), str.getBuffer(), Field::STORE_YES | Field::INDEX_TOKENIZED) ); } }
bool Lexer::ReadTerm(const TCHAR prev, QueryToken* token) { int ch = prev; bool completed = false; int32_t asteriskCount = 0; bool hasQuestion = false; StringBuffer val; TCHAR buf[3]; //used for readescaped while(true) { switch(ch) { case -1: break; case '\\': { if ( ReadEscape(ch, buf) ) val.append( buf ); else return false; } break; case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING: asteriskCount++; val.appendChar(ch); break; case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR: hasQuestion = true; val.appendChar(ch); break; case '\n': case '\t': case ' ': case '+': case '-': case '!': case '(': case ')': case ':': case '^': case '[': case ']': case '{': case '}': case '~': case '"': // create new QueryToken reader->UnGet(); completed = true; break; default: val.appendChar(ch); break; // end of switch } if(completed || ch==-1 || reader->Eos() ) break; else ch = reader->GetNext(); } // create new QueryToken if(hasQuestion) token->set(val.getBuffer(), QueryToken::WILDTERM); else if(asteriskCount == 1 && val.getBuffer()[val.length() - 1] == '*') token->set(val.getBuffer(), QueryToken::PREFIXTERM); else if(asteriskCount > 0) token->set(val.getBuffer(), QueryToken::WILDTERM); else if( _tcsicmp(val.getBuffer(), _T("AND"))==0 || _tcscmp(val.getBuffer(), _T("&&"))==0 ) token->set(val.getBuffer(), QueryToken::AND_); else if( _tcsicmp(val.getBuffer(), _T("OR"))==0 || _tcscmp(val.getBuffer(), _T("||"))==0) token->set(val.getBuffer(), QueryToken::OR); else if( _tcsicmp(val.getBuffer(), _T("NOT"))==0 ) token->set(val.getBuffer(), QueryToken::NOT); else { bool isnum = true; int32_t nlen=val.length(); for (int32_t i=0;i<nlen;++i) { TCHAR ch=val.getBuffer()[i]; if ( _istalpha(ch) ) { isnum=false; break; } } if ( isnum ) token->set(val.getBuffer(), QueryToken::NUMBER); else token->set(val.getBuffer(), QueryToken::TERM); } return true; }
bool Lexer::ReadTerm(const UChar inCharPrev, QueryToken* outQueryToken) { int ch = inCharPrev; bool completed = false; int32_t asteriskCount = 0; bool hasQuestion = false; StringBuffer val; UChar buf[3]; //used for readescaped while (true) { switch (ch) { case -1: break; case '\\': { if (ReadEscape(ch, buf)) val.append(buf); else return false; } break; case LUCENE_WILDCARDTERMENUM_WILDCARD_STRING: asteriskCount++; val.appendChar(ch); break; case LUCENE_WILDCARDTERMENUM_WILDCARD_CHAR: hasQuestion = true; val.appendChar(ch); break; case '\n': case '\t': case ' ': case '+': case '-': case '!': case '(': case ')': case ':': case '^': case '[': case ']': case '{': case '}': case '<': case '>': case '~': case '"': // create new QueryToken reader->UnGet(); completed = true; break; default: val.appendChar(ch); break; // end of switch } if (completed || ch == -1 || reader->Eos()) break; else ch = reader->GetNext(); } // Create new QueryToken const UChar *uStr = val.getBuffer(); if (hasQuestion) outQueryToken->set(uStr, QueryToken::WILDTERM); else if (asteriskCount == 1 && uStr[val.length() - 1] == '*') outQueryToken->set(uStr, QueryToken::PREFIXTERM); else if (asteriskCount > 0) outQueryToken->set(uStr, QueryToken::WILDTERM); else if (_tcsicmp(uStr, cStrQueryTokenAnd1) == 0 || _tcscmp(uStr, cStrQueryTokenAnd2) == 0) outQueryToken->set(uStr, QueryToken::AND_); else if (_tcsicmp(uStr, cStrQueryTokenOr1) == 0 || _tcscmp(uStr, cStrQueryTokenOr2) == 0) outQueryToken->set(uStr, QueryToken::OR); else if (_tcsicmp(uStr, cStrQueryTokenNot) == 0) outQueryToken->set(uStr, QueryToken::NOT); else { bool isnum = true; int32_t nlen = val.length(); for (int32_t i = 0;i < nlen; ++i) { UChar ch = uStr[i]; if (_istalpha(ch)) { isnum = false; break; } } if (isnum) outQueryToken->set(val.getBuffer(), 0, val.length(), QueryToken::NUMBER); else outQueryToken->set(val.getBuffer(), 0, val.length(), QueryToken::TERM); } return true; }