void MultiLevelSkipListWriter::init() { skipBuffer = _CLNEW CL_NS(util)::ObjectArray<CL_NS(store)::RAMOutputStream>(numberOfSkipLevels); for (int32_t i = 0; i < numberOfSkipLevels; i++) { skipBuffer->values[i] = _CLNEW RAMOutputStream; } }
size_t Compare::Char::operator()( const char* val1) const{ return CL_NS(util)::Misc::ahashCode(val1); }
#endif #include "CLucene/StdHeader.h" #include "FSDirectory.h" #include "CLucene/index/IndexReader.h" #include "CLucene/util/Misc.h" #include "CLucene/debug/condition.h" CL_NS_DEF(store) CL_NS_USE(util) /** This cache of directories ensures that there is a unique Directory * instance per path, so that synchronization on the Directory can be used to * synchronize access between readers and writers. */ static CL_NS(util)::CLHashMap<const char*,FSDirectory*,CL_NS(util)::Compare::Char,CL_NS(util)::Equals::Char> DIRECTORIES(false,false); bool FSDirectory::disableLocks=false; FSDirectory::FSIndexInput::FSIndexInput(const char* path, int32_t __bufferSize): BufferedIndexInput(__bufferSize) { //Func - Constructor. // Opens the file named path //Pre - path != NULL //Post - if the file could not be opened an exception is thrown. CND_PRECONDITION(path != NULL, "path is NULL"); handle = _CLNEW SharedHandle(); strcpy(handle->path,path);
TCHAR* Term::toString() const { return CL_NS(util)::Misc::join(_field, _T(":"), _text); }
const int32_t DocumentsWriter::POINTER_NUM_BYTE = 4; const int32_t DocumentsWriter::INT_NUM_BYTE = 4; const int32_t DocumentsWriter::CHAR_NUM_BYTE = 2; //TODO: adjust for c++... const int32_t DocumentsWriter::MAX_TERM_LENGTH = DocumentsWriter::CHAR_BLOCK_SIZE-1; AbortException::AbortException(CLuceneError& _err, DocumentsWriter* docWriter): err(_err) { docWriter->setAborting(); } DocumentsWriter::DocumentsWriter(CL_NS(store)::Directory* directory, IndexWriter* writer): bufferedDeleteTerms(_CLNEW TermNumMapType(true, true)), freeCharBlocks(FreeCharBlocksType(true)), freeByteBlocks(FreeByteBlocksType(true)), waitingThreadStates( CL_NS(util)::ValueArray<ThreadState*>(MAX_THREAD_STATE) ) { numBytesAlloc = 0; numBytesUsed = 0; this->directory = directory; this->writer = writer; this->hasNorms = this->bufferIsFull = false; fieldInfos = _CLNEW FieldInfos(); maxBufferedDeleteTerms = IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSize = (int64_t) (IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024); maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS;
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "clucene/stdheader.h" #include "termscorer.h" #include "clucene/index/terms.h" #include "termquery.h" CL_NS_USE(index) CL_NS_DEF(search) //TermScorer takes TermDocs and delets it when TermScorer is cleaned up TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td, Similarity* similarity,uint8_t* _norms): Scorer(similarity), termDocs(td), norms(_norms), weight(w), weightValue(w->getValue()), _doc(0), pointer(0), pointerMax(0) { memset(docs,0,32*sizeof(int32_t)); memset(freqs,0,32*sizeof(int32_t)); for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity()->tf(i) * weightValue;
filter++; logic++; } else bts = _CLNEW BitSet( reader->maxDoc() ); while( *filter ) { doChain( bts, reader, *logic, *filter ); filter++; logic++; } return bts; } void ChainedFilter::doUserChain( CL_NS(util)::BitSet* chain, CL_NS(util)::BitSet* filter, int logic ){ _CLTHROWA(CL_ERR_Runtime,"User chain logic not implemented by superclass"); } BitSet* ChainedFilter::doChain( BitSet* resultset, IndexReader* reader, int logic, Filter* filter ) { BitSet* filterbits = filter->bits( reader ); int32_t maxDoc = reader->maxDoc(); int32_t i=0; if ( logic >= ChainedFilter::USER ){ doUserChain(resultset,filterbits,logic); }else{ switch( logic ) { case OR: for( i=0; i < maxDoc; i++ )
valueType = VALUE_STRING; } void Field::setValue(Reader* value) { _resetValue(); fieldsData = value; valueType = VALUE_READER; } void Field::setValue(ValueArray<uint8_t>* value) { _resetValue(); fieldsData = value; valueType = VALUE_BINARY; } /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */ void Field::setValue(CL_NS(analysis)::TokenStream* value) { _resetValue(); fieldsData = value; valueType = VALUE_TOKENSTREAM; } void Field::setBoost(const float_t boost) { this->boost = boost; } float_t Field::getBoost() const { return boost; } void Field::setConfig(const uint32_t x){ uint32_t newConfig=0; //set storage settings if ( (x & STORE_YES) || (x & STORE_COMPRESS) ){ newConfig |= STORE_YES; if ( x & STORE_COMPRESS )
/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" #include "_CharStream.h" #include "_FastCharStream.h" #include "CLucene/util/CLStreams.h" CL_NS_DEF(queryParser) FastCharStream::FastCharStream(CL_NS(util)::Reader* r, bool ownsReader) : buffer(NULL), _bufferSize(0), bufferLength(0), bufferPosition(0), tokenStart(0), bufferStart(0), input(r), _ownsReader(ownsReader) { } FastCharStream::~FastCharStream() { if (_ownsReader ){ _CLLDELETE(input); } _CLDELETE_LCARRAY(buffer);
StandardAnalyzer::StandardAnalyzer(): stopSet(false) { StopFilter::fillStopTable( &stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS); }
Analyzer::Analyzer(){ _internal = new Internal; _internal->tokenStreams = _CLNEW CL_NS(util)::ThreadLocal<TokenStream*, CL_NS(util)::Deletor::Object<TokenStream> >; }
CL_NS(document)::Document* Searchable::doc(const int32_t i){ CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document; if (!doc(i,ret) ) _CLDELETE(ret); return ret; }
#include "Hits.h" #include "_FieldDocSortedHitQueue.h" #include <assert.h> CL_NS_USE(index) CL_NS_DEF(search) CL_NS(document)::Document* Searchable::doc(const int32_t i){ CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document; if (!doc(i,ret) ) _CLDELETE(ret); return ret; } //static Query* Query::mergeBooleanQueries(CL_NS(util)::ArrayBase<Query*>* queries) { std::vector<BooleanClause*> allClauses; CL_NS(util)::ValueArray<BooleanClause*> clauses; for (size_t i = 0; i < queries->length; i++) { assert(BooleanQuery::getClassName() == queries->values[i]->getObjectName()); BooleanQuery* booleanQuery = (BooleanQuery*)queries->values[i]; clauses.resize((booleanQuery->getClauseCount())); booleanQuery->getClauses(clauses.values); for (size_t j = 0; j < clauses.length; j++) { allClauses.push_back(clauses.values[j]->clone()); } } bool coordDisabled = ( queries->length == 0 ) ? false : ((BooleanQuery*)queries->values[0])->isCoordDisabled(); BooleanQuery* result = _CLNEW BooleanQuery(coordDisabled);
/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "SloppyPhraseScorer.h" #include "PhraseScorer.h" #include "CLucene/index/Terms.h" CL_NS_USE(index) CL_NS_DEF(search) SloppyPhraseScorer::SloppyPhraseScorer(Weight* weight, CL_NS(index)::TermPositions** tps, int32_t* positions, Similarity* similarity, int32_t slop, uint8_t* norms): PhraseScorer(weight,tps,positions,similarity,norms){ //Func - Constructor //Pre - tps != NULL // tpsLength >= 0 // n != NULL //Post - Instance has been created CND_PRECONDITION(tps != NULL, "tps is NULL"); //CND_PRECONDITION(n != NULL, _T("n is NULL")) = checked in PhraseScorer; this->slop = slop; }
ret[0] = 0; _sntprintf( ret, len, _T("QueryFilter(%s)"), qt ); _CLDELETE_CARRAY(qt); return ret; } /** Returns a BitSet with true for documents which should be permitted in search results, and false for those that should not. */ BitSet* QueryFilter::bits( IndexReader* reader ) { BitSet* bits = _CLNEW BitSet(reader->maxDoc()); IndexSearcher s(reader); QFHitCollector hc(bits); s._search(query, NULL, &hc); return bits; } QueryFilter::QFHitCollector::QFHitCollector(CL_NS(util)::BitSet* bits){ this->bits = bits; } void QueryFilter::QFHitCollector::collect(const int32_t doc, const qreal score) { bits->set(doc); // set bit for hit } CL_NS_END
/** See if a custom object is in the cache. */ FieldCacheAuto* FieldCacheImpl::lookup (IndexReader* reader, const TCHAR* field, SortComparatorSource* comparer) { FieldCacheAuto* ret = NULL; FileEntry* entry = _CLNEW FileEntry (field, comparer); { // CPIXASYNC SCOPED_LOCK_MUTEX(THIS_LOCK) SCOPED_LOCK_CRUCIAL_MUTEX(FieldCacheImpl_THIS_LOCK) fieldcacheCacheReaderType* readerCache = cache.get(reader); if (readerCache != NULL) ret = readerCache->get (entry); _CLDELETE(entry); } return ret; } void FieldCacheImpl::closeCallback(CL_NS(index)::IndexReader* reader, void* fieldCacheImpl) { FieldCacheImpl* fci = (FieldCacheImpl*)fieldCacheImpl; // CPIXASYNC SCOPED_LOCK_MUTEX(fci->THIS_LOCK) SCOPED_LOCK_CRUCIAL_MUTEX(fci->FieldCacheImpl_THIS_LOCK) fci->cache.remove(reader); } /** Put an object into the cache. */ void FieldCacheImpl::store (IndexReader* reader, const TCHAR* field, int32_t type, FieldCacheAuto* value) { FileEntry* entry = _CLNEW FileEntry (field, type); { // CPIX_ASYNC SCOPED_LOCK_MUTEX(THIS_LOCK) SCOPED_LOCK_CRUCIAL_MUTEX(FieldCacheImpl_THIS_LOCK) fieldcacheCacheReaderType* readerCache = cache.get(reader); if (readerCache == NULL) { readerCache = _CLNEW fieldcacheCacheReaderType;
size_t Compare::Qstring::operator ()(const QString& val1) const { return CL_NS(util)::Misc::qhashCode(val1); }
{ SCOPED_LOCK_MUTEX(Comparators.THIS_LOCK) hitqueueCacheReaderType* readerCache = Comparators.get(reader); if (readerCache == NULL){ _CLDELETE(entry); return NULL; } sdc = readerCache->get (entry); _CLDELETE(entry); } return sdc; } void FieldSortedHitQueue::closeCallback(CL_NS(index)::IndexReader* reader, void*){ SCOPED_LOCK_MUTEX(Comparators.THIS_LOCK) Comparators.remove(reader); } //static void FieldSortedHitQueue::store (IndexReader* reader, const TCHAR* field, int32_t type, SortComparatorSource* factory, ScoreDocComparator* value) { FieldCacheImpl::FileEntry* entry = (factory != NULL) ? _CLNEW FieldCacheImpl::FileEntry (field, factory) : _CLNEW FieldCacheImpl::FileEntry (field, type); { SCOPED_LOCK_MUTEX(Comparators.THIS_LOCK) hitqueueCacheReaderType* readerCache = Comparators.get(reader); if (readerCache == NULL) { readerCache = _CLNEW hitqueueCacheReaderType(true);
if (length == 0) // start of token start = offset-1; buffer[length++] = normalize(c); // buffer it, normalized if (length == LUCENE_MAX_WORD_LEN) // buffer overflow! break; } else if (length > 0) // at non-Letter w/ chars break; // return 'em } buffer[length]=0; token->set( buffer, start, start+length); return token; } void CharTokenizer::reset(CL_NS(util)::Reader* input) { Tokenizer::reset(input); bufferIndex = 0; offset = 0; dataLen = 0; } LetterTokenizer::LetterTokenizer(CL_NS(util)::Reader* in): CharTokenizer(in) { } LetterTokenizer::~LetterTokenizer(){} bool LetterTokenizer::isTokenChar(const TCHAR c) const {
void IndexWriter::_IndexWriter(const bool create) { //Func - Initialises the instances //Pre - create indicates if the indexWriter must create a new index // located at path or just open it similarity = CL_NS(search)::Similarity::getDefault(); useCompoundFile = true; if ( directory->getDirectoryType() == RAMDirectory::DirectoryType() ) useCompoundFile = false; //Create a ramDirectory ramDirectory = _CLNEW TransactionalRAMDirectory; CND_CONDITION(ramDirectory != NULL, "ramDirectory is NULL"); //Initialize the writeLock to writeLock = NULL; //initialise the settings... maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; mergeFactor = DEFAULT_MERGE_FACTOR; maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; writeLockTimeout = WRITE_LOCK_TIMEOUT; commitLockTimeout = COMMIT_LOCK_TIMEOUT; minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; //Create a new lock using the name "write.lock" LuceneLock* newLock = directory->makeLock(IndexWriter::WRITE_LOCK_NAME); //Condition check to see if newLock has been allocated properly CND_CONDITION(newLock != NULL, "No memory could be allocated for LuceneLock newLock"); //Try to obtain a write lock if (!newLock->obtain(writeLockTimeout)){ //Write lock could not be obtained so delete it _CLDELETE(newLock); //Reset the instance _finalize(); //throw an exception because no writelock could be created or obtained _CLTHROWA(CL_ERR_IO, "Index locked for write or no write access." ); } //The Write Lock has been obtained so save it for later use this->writeLock = newLock; //Create a new lock using the name "commit.lock" LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME); //Condition check to see if lock has been allocated properly CND_CONDITION(lock != NULL, "No memory could be allocated for LuceneLock lock"); LockWith2 with(lock, commitLockTimeout, this, NULL, create); { SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync with.run(); } //Release the commit lock _CLDELETE(lock); isOpen = true; }
/** Returns a hash code value for this object.*/ size_t RangeQuery::hashCode() const { return Similarity::floatToByte(getBoost()) ^ (lowerTerm != NULL ? lowerTerm->hashCode() : 0) ^ (upperTerm != NULL ? upperTerm->hashCode() : 0) ^ (this->inclusive ? 1 : 0); } const char* RangeQuery::getObjectName() const{ return getClassName(); } const char* RangeQuery::getClassName(){ return "RangeQuery"; } Query* RangeQuery::combine(CL_NS(util)::ArrayBase<Query*>* queries) { return Query::mergeBooleanQueries(queries); } bool RangeQuery::equals(Query * other) const{ if (!(other->instanceOf(RangeQuery::getClassName()))) return false; RangeQuery* rq = (RangeQuery*)other; bool ret = (this->getBoost() == rq->getBoost()) && (this->isInclusive() == rq->isInclusive()) && (this->getLowerTerm()->equals(rq->getLowerTerm())) && (this->getUpperTerm()->equals(rq->getUpperTerm())); return ret; }
return buffer.toString(); } ///////////////////////////////////////////////////////////////////////////// bool NearSpansUnordered::CellQueue::lessThan(SpansCell * spans1, SpansCell* spans2 ) { if( spans1->doc() == spans2->doc() ) return NearSpansOrdered::docSpansOrdered( spans1, spans2 ); else return spans1->doc() < spans2->doc(); } ///////////////////////////////////////////////////////////////////////////// NearSpansUnordered::NearSpansUnordered( SpanNearQuery * query, CL_NS(index)::IndexReader * reader ) { // this->ordered = new ArrayList(); this->more = true; this->firstTime = true; this->max = NULL; // CLucene specific, SpansCell::adjust tests this member to NULL this->first = NULL; // CLucene specific this->last = NULL; // CLucene specific, addToList test this member to NULL this->totalLength = 0; // CLucene specific this->query = query; this->slop = query->getSlop(); SpanQuery ** clauses = query->getClauses(); this->queue = _CLNEW CellQueue( query->getClausesCount() );
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. * * Changes are Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "CompoundFile.h" #include "CLucene/util/Misc.h" CL_NS_USE(store) CL_NS_USE(util) CL_NS_DEF(index) CompoundFileReader::CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length) { this->base = base; this->fileOffset = fileOffset; this->_length = length; } void CompoundFileReader::CSIndexInput::readInternal(uint8_t* b, const int32_t len) { SCOPED_LOCK_MUTEX(base->THIS_LOCK) int64_t start = getFilePointer(); if(start + len > _length) _CLTHROWA(CL_ERR_IO, "read past EOF"); base->seek(fileOffset + start);
hitDocs = _CLNEW CL_NS(util)::CLVector<HitDoc*, CL_NS(util)::Deletor::Object<HitDoc> >; nDeletions = countDeletions(s); //retrieve 100 initially getMoreDocs(50); _lengthAtStart = _length; } Hits::~Hits(){ _CLLDELETE(hitDocs); } // count # deletions, return -1 if unknown. int32_t Hits::countDeletions(CL_NS(search)::Searcher* s) { int32_t cnt = -1; if ( s->getObjectName() == IndexSearcher::getClassName() ) { cnt = s->maxDoc() - static_cast<IndexSearcher*>(s)->getReader()->numDocs(); } return cnt; } size_t Hits::length() const { return _length; } Document& Hits::doc(const int32_t n){ HitDoc* hitDoc = getHitDoc(n); // Update LRU cache of documents
size_t Compare::TChar::operator()( const TCHAR* val1) const{ return CL_NS(util)::Misc::thashCode(val1); }
StandardAnalyzer::StandardAnalyzer( const TCHAR** stopWords): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,stopWords ); } StandardAnalyzer::StandardAnalyzer(const char* stopwordsFile, const char* enc): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { if ( enc == NULL ) enc = "ASCII"; WordlistLoader::getWordSet(stopwordsFile, enc, stopSet); } StandardAnalyzer::StandardAnalyzer(CL_NS(util)::Reader* stopwordsReader, const bool _bDeleteReader): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { WordlistLoader::getWordSet(stopwordsReader, stopSet, _bDeleteReader); } class StandardAnalyzer::SavedStreams : public TokenStream { public: StandardTokenizer* tokenStream; TokenStream* filteredTokenStream; SavedStreams():tokenStream(NULL), filteredTokenStream(NULL) { } void close(){}
size_t Compare::WChar::operator()( const wchar_t* val1) const{ return CL_NS(util)::Misc::whashCode(val1); }
StandardAnalyzer::StandardAnalyzer(): stopSet(_CLNEW CLTCSetList(true)), maxTokenLength(DEFAULT_MAX_TOKEN_LENGTH) { StopFilter::fillStopTable( stopSet,CL_NS(analysis)::StopAnalyzer::ENGLISH_STOP_WORDS); }
* * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "TermVector.h" #include "CLucene/util/Misc.h" CL_NS_USE(util) CL_NS_DEF(index) const char* TermVectorsWriter::LUCENE_TVX_EXTENSION = ".tvx"; const char* TermVectorsWriter::LUCENE_TVD_EXTENSION = ".tvd"; const char* TermVectorsWriter::LUCENE_TVF_EXTENSION = ".tvf"; TermVectorsWriter::TermVectorsWriter(CL_NS(store)::Directory* directory, const char* segment,FieldInfos* fieldInfos) { // Open files for TermVector storage char fbuf[CL_MAX_NAME]; strcpy(fbuf,segment); char* fpbuf=fbuf+strlen(fbuf); strcpy(fpbuf,LUCENE_TVX_EXTENSION); tvx = directory->createOutput(fbuf); tvx->writeInt(FORMAT_VERSION); strcpy(fpbuf,LUCENE_TVD_EXTENSION); tvd = directory->createOutput(fbuf); tvd->writeInt(FORMAT_VERSION);
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/_ApiHeader.h" #include "CLucene/index/Terms.h" #include "CLucene/index/Term.h" #include "CLucene/util/StringBuffer.h" #include "_TermSpans.h" #include <limits.h> CL_NS_DEF2(search, spans) TermSpans::TermSpans( CL_NS(index)::TermPositions * positions, CL_NS(index)::Term * term ) { this->positions = positions; this->term = _CL_POINTER( term ); doc_ = -1; freq = 0; count = 0; position = 0; } TermSpans::~TermSpans() { _CLLDELETE( positions ); _CLLDECDELETE( term ); }