Пример #1
0
QString Misc::segmentname(const QString& segment, const QString& ext, int32_t x)
{
	CND_PRECONDITION(!ext.isEmpty(), "ext is NULL");
	
    if (x == -1)
        return QString(segment + ext);
	
    QString buf(QLatin1String("%1%2%3"));
	return buf.arg(segment).arg(ext).arg(x);
}
Пример #2
0
void Misc::segmentname(char* buffer,int32_t bufferLen, const char* Segment, const char* ext, const int32_t x){
//Func - Static Method
//       Creates a filename in buffer by concatenating Segment with ext and x
//Pre  - buffer  != NULL
//       Segment != NULL and holds the name of the segment
//       ext     != NULL and holds the extension
//       x contains a number
//Post - When x = -1 buffer contains the concatenation of Segment and ext otherwise
//       buffer contains the contentation of Segment, ext and x

	CND_PRECONDITION(buffer  != NULL, "buffer is NULL");
	CND_PRECONDITION(Segment != NULL, "Segment is NULL");
	CND_PRECONDITION(ext     != NULL, "ext is NULL");

	if ( x== -1 )
		_snprintf(buffer,bufferLen,"%s%s", Segment,ext );
	else
		_snprintf(buffer,bufferLen,"%s%s%d", Segment,ext,x );
}
Пример #3
0
  bool MMapIndexInput::open(const char* path, IndexInput*& ret, CLuceneError& error, int32_t __bufferSize )    {

	//Func - Constructor.
	//       Opens the file named path
	//Pre  - path != NULL
	//Post - if the file could not be opened  an exception is thrown.

	  CND_PRECONDITION(path != NULL, "path is NULL");

    Internal* _internal = _CLNEW Internal;

#if defined(_CL_HAVE_FUNCTION_MAPVIEWOFFILE)
	  _internal->mmaphandle = NULL;
	  _internal->fhandle = CreateFileA(path,GENERIC_READ,FILE_SHARE_READ, 0,OPEN_EXISTING,0,0);
	  
	  //Check if a valid fhandle was retrieved
	  if (_internal->fhandle < 0){
		_cl_dword_t err = GetLastError();
        if ( err == ERROR_FILE_NOT_FOUND )
        error.set(CL_ERR_IO, "File does not exist");
        else if ( err == ERROR_ACCESS_DENIED )
        error.set(CL_ERR_IO, "File Access denied");
        else if ( err == ERROR_TOO_MANY_OPEN_FILES )
        error.set(CL_ERR_IO, "Too many open files");
		else
          error.set(CL_ERR_IO, "Could not open file");
	  }

	  _cl_dword_t dummy=0;
	  _internal->_length = GetFileSize(_internal->fhandle, &dummy);

	  if ( _internal->_length > 0 ){
			_internal->mmaphandle = CreateFileMappingA(_internal->fhandle,NULL,PAGE_READONLY,0,0,NULL);
			if ( _internal->mmaphandle != NULL ){
				void* address = MapViewOfFile(_internal->mmaphandle,FILE_MAP_READ,0,0,0);
				if ( address != NULL ){
					_internal->data = (uint8_t*)address;
          ret = _CLNEW MMapIndexInput(_internal);
          return true;
				}
			}
			
			//failure:
			int errnum = GetLastError(); 
			
			CloseHandle(_internal->mmaphandle);
	
			char* lpMsgBuf=strerror(errnum);
			size_t len = strlen(lpMsgBuf)+80;
			char* errstr = _CL_NEWARRAY(char, len); 
			cl_sprintf(errstr, len, "MMapIndexInput::MMapIndexInput failed with error %d: %s", errnum, lpMsgBuf); 
	
	    error.set(CL_ERR_IO, errstr);
			_CLDELETE_CaARRAY(errstr);
	  }
Пример #4
0
  uint64_t IndexReader::lastModified(const char* directory2) {
  //Func - Static method
  //       Returns the time the index in the named directory was last modified.
  //Pre  - directory != NULL and contains the path name of the directory to check
  //Post - The last modified time of the index has been returned

    CND_PRECONDITION(directory2 != NULL, "directory is NULL");

	  IndexReaderFindSegmentsFile runner(directory2);
	  return (uint64_t)runner.run();
  }
Пример #5
0
	 Field* Document::getField(const TCHAR* name)  const{
	    CND_PRECONDITION(name != NULL, "name is NULL");

		for (DocumentFieldEnumeration::DocumentFieldList* list = fieldList; list != NULL; list = list->next)
		   //cannot use interning here, because name is probably not interned
			if ( _tcscmp(list->field->name(), name) == 0 ){ 
				return list->field;
			}
		
		return NULL;
	}
Пример #6
0
    DocumentFieldList::DocumentFieldList(Field* f, DocumentFieldList* n) {
    //Func - Constructor
	//Pre  - f != NULL
	//       n may be NULL
	//Post - Instance has been created

	    CND_PRECONDITION(f != NULL, "f is NULL")

		field = f;
		next  = n;
	}
Пример #7
0
char* Misc::segmentname(const char* segment, const char* ext, int32_t x)
{
	CND_PRECONDITION(ext != NULL, "ext is NULL");

	char* buf = _CL_NEWARRAY(char, CL_MAX_PATH);
	if (x == -1)
		_snprintf(buf, CL_MAX_PATH, "%s%s", segment, ext);
	else
		_snprintf(buf, CL_MAX_PATH, "%s%s%d", segment, ext, x);
	return buf;
}
Пример #8
0
	FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength):
		FilteredTermEnum(),d(NULL),dLen(0),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)),
		text(NULL),textLen(0),prefix(NULL)/* ISH: was STRDUP_TtoT(LUCENE_BLANK_STRING)*/,prefixLength(0),
		minimumSimilarity(minSimilarity)
	{
		CND_PRECONDITION(term != NULL,"term is NULL");

		if (minSimilarity >= 1.0f)
			_CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be greater than or equal to 1");
		else if (minSimilarity < 0.0f)
			_CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be less than 0");

		scale_factor = 1.0f / (1.0f - minimumSimilarity); // only now we are safe from a division by zero
		//TODO: this.field = searchTerm.field();

		//The prefix could be longer than the word.
		//It's kind of silly though.  It means we must match the entire word.
		const size_t fullSearchTermLength = searchTerm->textLength();
		const size_t realPrefixLength = _prefixLength > fullSearchTermLength ? fullSearchTermLength : _prefixLength;

		text = STRDUP_TtoT(searchTerm->text() + realPrefixLength);
		textLen = fullSearchTermLength - realPrefixLength;

		prefix = _CL_NEWARRAY(TCHAR,realPrefixLength+1);
		_tcsncpy(prefix, searchTerm->text(), realPrefixLength);
		prefix[realPrefixLength]='\0';
        prefixLength = realPrefixLength;

		initializeMaxDistances();

		Term* trm = _CLNEW Term(searchTerm->field(), prefix); // _CLNEW Term(term, prefix); -- not intern'd?
		setEnum(reader->terms(trm));
		_CLLDECDELETE(trm);


		/* LEGACY:
		//Initialize e to NULL
		e          = NULL;
		eWidth     = 0;
		eHeight    = 0;

		if(prefixLength > 0 && prefixLength < textLen){
		this->prefixLength = prefixLength;

		prefix = _CL_NEWARRAY(TCHAR,prefixLength+1);
		_tcsncpy(prefix,text,prefixLength);
		prefix[prefixLength]='\0';

		textLen = prefixLength;
		text[textLen]='\0';
		}
		*/
	}
      PhraseScorer(weight,tps,positions,similarity,norms){
  //Func - Constructor
  //Pre  - tps != NULL 
  //       tpsLength >= 0
  //       n != NULL
  //Post - Instance has been created

      CND_PRECONDITION(tps != NULL, "tps is NULL");
      //CND_PRECONDITION(n != NULL, _T("n is NULL")) = checked in PhraseScorer;

      this->slop = slop;
  }
Пример #10
0
Field::Field(const TCHAR* Name, ValueArray<uint8_t>* Value, int config, bool duplicateValue):
	lazy(false)
{
	CND_PRECONDITION(Name != NULL, "Name cannot be NULL");
	CND_PRECONDITION(Value != NULL, "value cannot be NULL");

	_name        = CLStringIntern::intern( Name );

	if ( duplicateValue ){
		ValueArray<uint8_t>* tmp = _CLNEW ValueArray<uint8_t>(Value->length);
		memcpy(tmp->values, Value->values, Value->length * sizeof(uint8_t));
		fieldsData = tmp;
	}else{
		fieldsData = Value;
	}
	valueType = VALUE_BINARY;

	boost=1.0f;

	setConfig(config);
}
Пример #11
0
  void PhrasePositions::firstPosition(){
  //Func - Read the first TermPosition
  //Pre  - tp != NULL
  //Post - 

      CND_PRECONDITION(tp != NULL,"tp is NULL");

      //read first pos
      count = tp->freq();				  
      //Move to the next TermPosition
	  nextPosition();
  }
Пример #12
0
Field::Field(const TCHAR* Name, int config):
	lazy(false)
{
	CND_PRECONDITION(Name != NULL, "Name cannot be NULL");

	_name        = CLStringIntern::intern( Name );
	fieldsData = NULL;
	valueType = VALUE_NONE;

	boost=1.0f;

	if (config) setConfig(config);
}
Пример #13
0
SegmentMergeInfo::SegmentMergeInfo(const int32_t b, TermEnum* te, IndexReader* r):
    docMap(NULL),termEnum(te),base(b),reader(r) {
//Func - Constructor
//Pre  - b >= 0
//       te contains a valid reference to a SegmentTermEnum instance
//       r contains a valid reference to a SegmentReader instance
//Post - The instance has been created

    CND_PRECONDITION(b >= 0, "b is a negative number");

    postings=NULL;
	term   = te->term();
}
Пример #14
0
	bool SegmentMergeQueue::lessThan(SegmentMergeInfo* stiA, SegmentMergeInfo* stiB) {
   //Func - Overloaded method that implements the lessThan operator for the parent class
   //       This method is used by the parent class Priority queue to reorder its internal
   //       data structures. This implementation check if stiA is less than the current term of stiB.
   //Pre  - stiA != NULL
   //		stiB != NULL
   //Post - true is returned if stiA < stiB otherwise false

        CND_PRECONDITION(stiA != NULL, "stiA is NULL");
        CND_PRECONDITION(stiB != NULL, "stiB is NULL");

		//Compare the two terms 
		int32_t comparison = stiA->term->compareTo(stiB->term);
		//Check if they match
		if (comparison == 0){ //todo: can we do an optimized compare here? compare using equals, then compare properly?
			//If the match check if the base of stiA is smaller than the base of stiB
			//Note that different bases means that the terms of stiA an stiB ly in different segments
			return stiA->base < stiB->base; 
		}else{
			//Terms didn't match so return the difference in positions
			return comparison < 0;
		}
	}
Пример #15
0
	void PhraseScorer::pqToList(){
	//Func - Transfers the PhrasePositions from the PhraseQueue pq to
	//       the PhrasePositions list with first as its first element
	//Pre  - pq != NULL
	//       first = NULL
	//       last = NULL
	//Post - All PhrasePositions have been transfered to the list
	//       of PhrasePositions of which the first element is pointed to by first
	//       and the last element is pointed to by last

		CND_PRECONDITION(pq != NULL,"pq is NULL")
		CND_PRECONDITION(first == NULL,"first must be NULL")
		CND_PRECONDITION(last == NULL,"last must be NULL")

		PhrasePositions* PhrasePos = NULL;

		//As long pq is not empty
		while (pq->top() != NULL){
			//Pop a PhrasePositions instance
			PhrasePos = pq->pop();

			// add next to end of list
			if (last != NULL) {
				last->next = PhrasePos;
			} else {
				first = PhrasePos;
			}

			//Let last point to the new last PhrasePositions instance just added
			last = PhrasePos;
			//Reset the next of last to NULL
			last->next = NULL;
		}

		//Check to see that pq is empty now
		CND_CONDITION(pq->Size()==0,"pq is not empty while it should be")
	}
Пример #16
0
SegmentTermPositions::SegmentTermPositions(const SegmentReader* _parent):
  SegmentTermDocs(_parent){
//Func - Constructor
//Pre  - Parent != NULL
//Post - The instance has been created

    CND_PRECONDITION(_parent != NULL, "Parent is NULL");
    
    proxStream = _parent->proxStream->clone();
    
    CND_CONDITION(proxStream != NULL,"proxStream is NULL");
    
    position  = 0;
    proxCount = 0;
}
Пример #17
0
  FuzzyQuery::FuzzyQuery(Term* term, float_t _minimumSimilarity, size_t _prefixLength):
    MultiTermQuery(term),
    minimumSimilarity(_minimumSimilarity),
    prefixLength(_prefixLength)
  {
	  if ( minimumSimilarity < 0 )
		  minimumSimilarity = defaultMinSimilarity;

	  CND_PRECONDITION(term != NULL,"term is NULL");

	  if (minimumSimilarity >= 1.0f)
		  _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity >= 1");
	  else if (minimumSimilarity < 0.0f)
		  _CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity < 0");
  }
Пример #18
0
SegmentInfo* SegmentInfos::info(int32_t i) const
{
    //Func - Returns a reference to the i-th SegmentInfo in the list.
    //Pre  - i >= 0
    //Post - A reference to the i-th SegmentInfo instance has been returned

    CND_PRECONDITION(i >= 0, "i contains negative number");

    //Get the i-th SegmentInfo instance
    SegmentInfo *ret = infos.value(i, 0);

    //Condition check to see if the i-th SegmentInfo has been retrieved
    CND_CONDITION(ret != NULL, "No SegmentInfo instance found");

    return ret;
}
Пример #19
0
SegmentMerger::SegmentMerger(IndexWriter* writer, const char* name, MergePolicy::OneMerge* merge){
//Func - Constructor
//Pre  - dir holds a valid reference to a Directory
//       name != NULL
//Post - Instance has been created

  CND_PRECONDITION(name != NULL, "name is NULL");

  this->init();
  this->directory		   = writer->getDirectory();
  this->segment        = name;
  if (merge != NULL)
    this->checkAbort = _CLNEW CheckAbort(merge, directory);
  this->termIndexInterval= writer->getTermIndexInterval();
  this->mergedDocs = 0;
  this->maxSkipLevels = 0;
}
Пример #20
0
	SegmentMergeQueue::SegmentMergeQueue(const int32_t size) {
	//Func - Constructor
	//       Creates a queue of length size
	//Pre  - size >= 0
	//Post - The queue has been created of length size

		//BVK: bug. changed condition from size > 0 to size >= 0
		//if size is 0, as it is  when retrieving a TermEnum 
		//from an empty index this should this should not fail.
        CND_PRECONDITION(size >= 0, "size is too small");

		//Call the initialize method of its superclass. The boolean value  passed here
		//indicates that the superclass PriorityQueue takes the responsibility to have its elements deleted
		//The destructor of SegmentMergInfo will make sure that each intstance it will be closed properly 
		//before it is deleted
 		initialize(size,true);
	}
Пример #21
0
  IndexReader* IndexReader::open(const char* path, bool closeDirectoryOnCleanup, IndexDeletionPolicy* deletionPolicy){
  //Func - Static method.
  //       Returns an IndexReader reading the index in an FSDirectory in the named path.
  //Pre  - path != NULL and contains the path of the index for which an IndexReader must be
  //       instantiated
  //       closeDir indicates if the directory needs to be closed
  //Post - An IndexReader has been returned that reads tnhe index located at path

	  CND_PRECONDITION(path != NULL, "path is NULL");
	   Directory* dir = FSDirectory::getDirectory(path);
     IndexReader* reader = open(dir,closeDirectoryOnCleanup,deletionPolicy);
     //because fsdirectory will now have a refcount of 1 more than
     //if the reader had been opened with a directory object,
     //we need to do a refdec
     _CLDECDELETE(dir);
     return reader;
  }
  TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos* fis, const int32_t readBufferSize):
      directory (dir),fieldInfos (fis), indexTerms(NULL), indexInfos(NULL), indexPointers(NULL), indexDivisor(1)
  {
  //Func - Constructor.
  //       Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
  //Pre  - dir is a reference to a valid Directory
  //       Fis contains a valid reference to an FieldInfos instance
  //       seg != NULL and contains the name of the segment
  //Post - An instance has been created and the index named seg has been read. (Remember
  //       a segment is nothing more then an independently readable index)

      CND_PRECONDITION(seg != NULL, "seg is NULL");

	  //Initialize the name of the segment
      segment    =  seg;

      //Create a filname fo a Term Info File
	  string tisFile = Misc::segmentname(segment,".tis");
	  string tiiFile = Misc::segmentname(segment,".tii");
	  bool success = false;
    origEnum = indexEnum = NULL;
    _size = indexTermsLength = totalIndexInterval = 0;

	  try {
		  //Create an SegmentTermEnum for storing all the terms read of the segment
		  origEnum = _CLNEW SegmentTermEnum( directory->openInput( tisFile.c_str(), readBufferSize ), fieldInfos, false);
		  _size =  origEnum->size;
		  totalIndexInterval = origEnum->indexInterval;
		  indexEnum = _CLNEW SegmentTermEnum( directory->openInput( tiiFile.c_str(), readBufferSize ), fieldInfos, true);

		  //Check if enumerator points to a valid instance
		  CND_CONDITION(origEnum != NULL, "No memory could be allocated for orig enumerator");
		  CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index enumerator");

		  success = true;
	  } _CLFINALLY({
		  // With lock-less commits, it's entirely possible (and
		  // fine) to hit a FileNotFound exception above. In
		  // this case, we want to explicitly close any subset
		  // of things that were opened so that we don't have to
		  // wait for a GC to do so.
		  if (!success) {
			  close();
		  }
	  });
Пример #23
0
	PhraseScorer::PhraseScorer(Weight* weight, TermPositions** tps, 
		int32_t* positions, Similarity* similarity, uint8_t* norms):
		Scorer(similarity)
	{
	//Func - Constructor
	//Pre  - tps != NULL and is an array of TermPositions
	//       tpsLength >= 0
	//       n != NULL
	//Post - The instance has been created

		CND_PRECONDITION(tps != NULL,"tps is NULL");
		
		//norms are only used if phraseFreq returns more than 0.0
		//phraseFreq should only return more than 0.0 if norms != NULL
		//CND_PRECONDITION(n != NULL,"n is NULL");

		firstTime = true;
		more = true;
		this->norms = norms;
		this->weight = weight;
		this->value = weight->getValue();

		//reset internal pointers
		first   = NULL;
		last    = NULL;

		//use pq to build a sorted list of PhrasePositions
		int32_t i = 0;
		while(tps[i] != NULL){
			PhrasePositions *pp = _CLNEW PhrasePositions(tps[i], positions[i]);
			CND_CONDITION(pp != NULL,"Could not allocate memory for pp");

			//Store PhrasePos into the PhrasePos pq
			if (last != NULL) {			  // add next to end of list
				last->_next = pp;
			} else
				first = pp;
			last = pp;

			i++;
		}

		pq = _CLNEW PhraseQueue(i); //i==tps.length
		CND_CONDITION(pq != NULL,"Could not allocate memory for pq");
	}
Пример #24
0
	void PhraseScorer::firstToLast(){
	//Func - Moves first to the end of the list
	//Pre  - first is NULL or points to an PhrasePositions Instance
	//       last  is NULL or points to an PhrasePositions Instance
	//       first and last both are NULL or both are not NULL
	//Post - The first element has become the last element in the list

		CND_PRECONDITION(((first==NULL && last==NULL) ||(first !=NULL && last != NULL)),
					   "Either first or last is NULL but not both");

		//Check if first and last are valid pointers
		if(first && last){
			last->_next = first;
			last = first;
			first = first->_next;
			last->_next = NULL;
		}
	}
Пример #25
0
//internal static function shared for clucene
string Misc::segmentname( const char* segment, const char* ext, const int32_t x ){
//Func -  Returns an allocated buffer in which it creates a filename by
//       concatenating segment with ext and x
//Pre    ext != NULL and holds the extension
//       x contains a number
//Post - A buffer has been instantiated an when x = -1 buffer contains the concatenation of
//       segment and ext otherwise buffer contains the contentation of segment, ext and x

	CND_PRECONDITION(ext != NULL, "ext is NULL");


  if ( x!=-1 ){
    char buf[30];
    _snprintf(buf,10,"%d",x);
    return string(segment) + ext + buf;
  }else{
    return string(segment) + ext;
  }
}
FieldsReader::FieldsReader(Directory* d, const QString& segment, FieldInfos* fn)
    : fieldInfos(fn)
{
    //Func - Constructor
    //Pre  - d contains a valid reference to a Directory
    //       segment != NULL
    //       fn contains a valid reference to a FieldInfos
    //Post - The instance has been created

    CND_PRECONDITION(!segment.isEmpty(), "segment != NULL");

    QString buf = Misc::segmentname(segment, QLatin1String(".fdt"));
    fieldsStream = d->openInput(buf);

    buf = Misc::segmentname(segment, QLatin1String(".fdx"));
    indexStream = d->openInput(buf);

    _size = (int32_t)indexStream->length() / 8;
}
Пример #27
0
  IndexWriter::IndexWriter(const char* path, Analyzer* a, const bool create, const bool _closeDir):		
		analyzer(a),
		closeDir(_closeDir),
		writeLock( NULL ),
		directory( FSDirectory::getDirectory(path, create) ),
		segmentInfos (_CLNEW SegmentInfos)		
		 {
  //Func - Constructor
  //       Constructs an IndexWriter for the index in path.
  //Pre  - path != NULL and contains a named directory path
  //       a holds a valid reference to an analyzer and analyzes the text to be indexed
  //       create indicates if the indexWriter must create a new index located at path or just open it
  //Post - If create is true, then a new, empty index has been created in path, replacing the index
  //       already there, if any. The named directory path is owned by this Instance

	  CND_PRECONDITION(path != NULL, "path is NULL");

	  //Continue initializing the instance by _IndexWriter
	  _IndexWriter ( create );
  }
Пример #28
0
  bool PhrasePositions::nextPosition(){
  //Func - Move to the next position
  //Pre  - tp != NULL
  //Post -

      CND_PRECONDITION(tp != NULL,"tp is NULL");

      if (count-- > 0) {				  
		  //read subsequent pos's
          position = tp->nextPosition() - offset;

		  //Check position always bigger than or equal to 0
          //bvk: todo, bug??? position < 0 occurs, cant figure out why,
          //old version does it too and will fail the "SearchTest" test
          //CND_CONDITION(position >= 0, "position has become a negative number");
          return true;
      }else{
          return false;
      }
	}
Пример #29
0
	void FilteredTermEnum::setEnum(TermEnum* actualEnum) {
	//Func - Sets the actual Enumeration
	//Pre  - actualEnum != NULL
	//Post - The instance has been created

		CND_PRECONDITION(actualEnum != NULL,"actualEnum is NULL");

		_CLLDELETE(this->actualEnum);
        this->actualEnum = actualEnum;

        // Find the first term that matches
        //Ordered term not to return reference ownership here.
        Term* term = actualEnum->term(false);
        if (term != NULL && termCompare(term)){
            _CLDECDELETE(currentTerm);
            currentTerm = _CL_POINTER(term);
        }else{
            next();
		}
    }
Пример #30
0
   void Document::removeField(const TCHAR* name) {
	  CND_PRECONDITION(name != NULL, "name is NULL");

      DocumentFieldEnumeration::DocumentFieldList* previous = NULL;
      DocumentFieldEnumeration::DocumentFieldList* current = fieldList;
      while (current != NULL) {
         //cannot use interning here, because name is probably not interned
         if ( _tcscmp(current->field->name(),name) == 0 ){
            if (previous){
               previous->next = current->next;
            }else
               fieldList = current->next;
            current->next=NULL; //ensure fieldlist destructor doesnt delete it
            _CLDELETE(current);
            return;
         }
		 		previous = current;
         current = current->next;
      }
   }