Beispiel #1
0
void FastCharStream::refill() {
	int32_t newPosition = bufferLength - tokenStart;

	if (tokenStart == 0) {			  // token won't fit in buffer
		if (buffer == NULL) {		  // first time: alloc buffer
			buffer = _CL_NEWARRAY(TCHAR, 2048);
			_bufferSize = 2048;
		} else if (bufferLength == _bufferSize) { // grow buffer
			_bufferSize *= 2;
			TCHAR* newBuffer = _CL_NEWARRAY(TCHAR, _bufferSize);
			_tcsncpy(newBuffer, buffer, bufferLength);
			_CLDELETE_LCARRAY(buffer);
			buffer = newBuffer;
		}
	} else {					  // shift token to front
		_tcsncpy(buffer, buffer+tokenStart,newPosition);
	}

	bufferLength = newPosition;			  // update state
	bufferPosition = newPosition;
	bufferStart += tokenStart;
	tokenStart = 0;

	const TCHAR* charBuf = NULL;
	int32_t charsRead =				  // fill space in buffer
		input->read(charBuf, newPosition, _bufferSize-newPosition);
	if (charsRead == -1){
		_CLTHROWA(CL_ERR_IO, "read past eof");
	}
	else {
		memcpy(buffer, charBuf, charsRead * sizeof(TCHAR)); // TODO: Can we use the reader buffer instead of copying to our own?
		bufferLength += charsRead;
	}
}
DefaultSkipListWriter::DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, IndexOutput* freqOutput, IndexOutput* proxOutput):
  MultiLevelSkipListWriter(skipInterval, numberOfSkipLevels, docCount)
{
  this->freqOutput = freqOutput;
  this->proxOutput = proxOutput;
  this->curDoc = this->curPayloadLength = 0;
  this->curFreqPointer =this->curProxPointer = 0;
  
  lastSkipDoc = _CL_NEWARRAY(int32_t,numberOfSkipLevels);
  lastSkipPayloadLength =  _CL_NEWARRAY(int32_t,numberOfSkipLevels);
  lastSkipFreqPointer =  _CL_NEWARRAY(int64_t,numberOfSkipLevels);
  lastSkipProxPointer =  _CL_NEWARRAY(int64_t,numberOfSkipLevels);
}
Beispiel #3
0
TCHAR* FastCharStream::GetImage() {
	size_t len = bufferPosition - tokenStart;
	TCHAR* ret = _CL_NEWARRAY(TCHAR, len + 1);
	_tcsncpy(ret, buffer+tokenStart, len);
	ret[len] = 0; // NULL terminated string
	return ret;
}
void MultiReader::initialize(IndexReader** subReaders){
  this->subReadersLength = 0;
  this->subReaders = subReaders;

  //count the subReaders size
  if ( subReaders != NULL ){
     while ( subReaders[subReadersLength] != NULL ){
        subReadersLength++;
     }
  }
  _maxDoc        = 0;
  _numDocs       = -1;
  ones           = NULL;

  starts = _CL_NEWARRAY(int32_t,subReadersLength + 1);    // build starts array
  for (int32_t i = 0; i < subReadersLength; i++) {
     starts[i] = _maxDoc;

     // compute maxDocs
     _maxDoc += subReaders[i]->maxDoc();      
     if (subReaders[i]->hasDeletions())
        _hasDeletions = true;
  }
  starts[subReadersLength] = _maxDoc;
}
  TopDocs* MultiSearcher::_search(Query* query, Filter* filter, const int32_t nDocs) {
    HitQueue* hq = _CLNEW HitQueue(nDocs);
    int32_t totalHits = 0;
	TopDocs* docs;
	int32_t j;
	ScoreDoc* scoreDocs;
    for (int32_t i = 0; i < searchablesLen; i++) {  // search each searcher
		docs = searchables[i]->_search(query, filter, nDocs);
		totalHits += docs->totalHits;		  // update totalHits
		scoreDocs = docs->scoreDocs;
		for ( j = 0; j <docs->scoreDocsLength; ++j) { // merge scoreDocs int_to hq
			scoreDocs[j].doc += starts[i];		  // convert doc
			if ( !hq->insert(scoreDocs[j]))
				break;				  // no more scores > minScore
		}
		
		_CLDELETE(docs);
    }

    int32_t scoreDocsLen = hq->size();
	scoreDocs = _CL_NEWARRAY(ScoreDoc, scoreDocsLen);
	{//MSVC 6 scope fix
		for (int32_t i = scoreDocsLen-1; i >= 0; --i)	  // put docs in array
	  		scoreDocs[i] = hq->pop();
	}

	//cleanup
	_CLDELETE(hq);

    return _CLNEW TopDocs(totalHits, scoreDocs, scoreDocsLen);
  }
	/* Returns the marked-up text for this text fragment 
	 */
	TCHAR* TextFragment::toString(StringBuffer* buffer) {
		TCHAR* ret = _CL_NEWARRAY(TCHAR,_textEndPos-_textStartPos+1);
		_tcsncpy(ret,buffer->getBuffer()+_textStartPos,_textEndPos-_textStartPos);
		ret[_textEndPos-_textStartPos]=_T('\0');
		
		return ret;
	}
  void RAMIndexOutput::flushBuffer(const uint8_t* src, const int32_t len) {
    uint8_t* b = NULL;
    int32_t bufferPos = 0;
    while (bufferPos != len) {
	    uint32_t bufferNumber = pointer/CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE;
	    int32_t bufferOffset = pointer%CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE;
	    int32_t bytesInBuffer = CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE - bufferOffset;
	    int32_t remainInSrcBuffer = len - bufferPos;
      	int32_t bytesToCopy = bytesInBuffer >= remainInSrcBuffer ? remainInSrcBuffer : bytesInBuffer;
	
		if (bufferNumber == file->buffers.size()){
		  b = _CL_NEWARRAY(uint8_t, CL_NS(store)::BufferedIndexOutput::BUFFER_SIZE);
	      file->buffers.push_back( b );
		}else{
		  b = file->buffers[bufferNumber];	
		}
		memcpy(b+bufferOffset, src+bufferPos, bytesToCopy * sizeof(uint8_t));
		bufferPos += bytesToCopy;
        pointer += bytesToCopy;
	}
    if (pointer > file->length)
      file->length = pointer;

    file->lastModified = Misc::currentTimeMillis();
  }
Beispiel #8
0
/* DSR:CL_BUG: (See comment for join method in Misc.h): */
TCHAR* Misc::join (const TCHAR* a, const TCHAR* b, const TCHAR* c,
                   const TCHAR* d, const TCHAR* e, const TCHAR* f)
{
#define LEN(x) (x == NULL ? 0 : _tcslen(x))
    const size_t totalLen = LEN(a) + LEN(b) + LEN(c) + LEN(d) + LEN(e) + LEN(f)
        + sizeof(TCHAR); /* Space for terminator. */

    TCHAR* buf = _CL_NEWARRAY(TCHAR, totalLen);
    buf[0] = 0;
    if (a != NULL)
        _tcscat(buf, a);

    if (b != NULL)
        _tcscat(buf, b);

    if (c != NULL)
        _tcscat(buf, c);
    
    if (d != NULL)
        _tcscat(buf, d);

    if (e != NULL)
        _tcscat(buf, e);

    if (f != NULL)
        _tcscat(buf, f);

    return buf;
}
Beispiel #9
0
  int32_t IndexInput::readString(char* buffer, const int32_t maxLength){
  	TCHAR* buf = _CL_NEWARRAY(TCHAR,maxLength);
    int32_t ret = -1;
  	try{
	  	ret = readString(buf,maxLength);
	  	STRCPY_TtoA(buffer,buf,ret+1);
  	}_CLFINALLY ( _CLDELETE_CARRAY(buf); )
  	return ret;
Beispiel #10
0
TCHAR* QueryFilter::toString()
{
	TCHAR* qt = query->toString();
	size_t len = _tcslen(qt) + 14;
	TCHAR* ret = _CL_NEWARRAY( TCHAR, len );
	ret[0] = 0;
	_sntprintf( ret, len, _T("QueryFilter(%s)"), qt );
	_CLDELETE_CARRAY(qt);
	return ret;
}
	FuzzyTermEnum::FuzzyTermEnum(IndexReader* reader, Term* term, float_t minSimilarity, size_t _prefixLength):
		FilteredTermEnum(),d(NULL),dLen(0),_similarity(0),_endEnum(false),searchTerm(_CL_POINTER(term)),
		text(NULL),textLen(0),prefix(NULL)/* ISH: was STRDUP_TtoT(LUCENE_BLANK_STRING)*/,prefixLength(0),
		minimumSimilarity(minSimilarity)
	{
		CND_PRECONDITION(term != NULL,"term is NULL");

		if (minSimilarity >= 1.0f)
			_CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be greater than or equal to 1");
		else if (minSimilarity < 0.0f)
			_CLTHROWA(CL_ERR_IllegalArgument,"minimumSimilarity cannot be less than 0");

		scale_factor = 1.0f / (1.0f - minimumSimilarity); // only now we are safe from a division by zero
		//TODO: this.field = searchTerm.field();

		//The prefix could be longer than the word.
		//It's kind of silly though.  It means we must match the entire word.
		const size_t fullSearchTermLength = searchTerm->textLength();
		const size_t realPrefixLength = _prefixLength > fullSearchTermLength ? fullSearchTermLength : _prefixLength;

		text = STRDUP_TtoT(searchTerm->text() + realPrefixLength);
		textLen = fullSearchTermLength - realPrefixLength;

		prefix = _CL_NEWARRAY(TCHAR,realPrefixLength+1);
		_tcsncpy(prefix, searchTerm->text(), realPrefixLength);
		prefix[realPrefixLength]='\0';
        prefixLength = realPrefixLength;

		initializeMaxDistances();

		Term* trm = _CLNEW Term(searchTerm->field(), prefix); // _CLNEW Term(term, prefix); -- not intern'd?
		setEnum(reader->terms(trm));
		_CLLDECDELETE(trm);


		/* LEGACY:
		//Initialize e to NULL
		e          = NULL;
		eWidth     = 0;
		eHeight    = 0;

		if(prefixLength > 0 && prefixLength < textLen){
		this->prefixLength = prefixLength;

		prefix = _CL_NEWARRAY(TCHAR,prefixLength+1);
		_tcsncpy(prefix,text,prefixLength);
		prefix[prefixLength]='\0';

		textLen = prefixLength;
		text[textLen]='\0';
		}
		*/
	}
  StringBuffer::StringBuffer(){
  //Func - Constructor. Allocates a buffer with the default length.
  //Pre  - true
  //Post - buffer of length bufferLength has been allocated
 
      //Initialize 
      bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE;
	  len          = 0;
	  //Allocate a buffer of length bufferLength
      buffer       = _CL_NEWARRAY(TCHAR,bufferLength);
	  bufferOwner  = true;
  }
  StringBuffer::StringBuffer(const int32_t initSize){
  //Func - Constructor. Allocates a buffer of length initSize + 1
  //Pre  - initSize > 0
  //Post - A buffer has been allocated of length initSize + 1

	  //Initialize the bufferLength to initSize + 1 The +1 is for the terminator '\0'
      bufferLength = initSize + 1;
      len = 0;
      //Allocate a buffer of length bufferLength
      buffer = _CL_NEWARRAY(TCHAR,bufferLength);
	  bufferOwner  = true;
  }
void testRAMDirectorySize(CuTest * tc)  {

    MockRAMDirectory * ramDir = _CLNEW MockRAMDirectory(indexDir);
    WhitespaceAnalyzer analyzer;
    IndexWriter * writer = _CLNEW IndexWriter(ramDir, &analyzer, false);
    writer->optimize();

    CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size"));

    _LUCENE_THREADID_TYPE* threads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, numThreads);
    ThreadData * tdata = _CL_NEWARRAY(ThreadData, numThreads);

    for (int i=0; i<numThreads; i++) {
        tdata[i].num = i;
        tdata[i].dir = ramDir;
        tdata[i].tc = tc;
        tdata[i].writer = writer;
        threads[i] = _LUCENE_THREAD_CREATE(&indexDocs, &tdata[i]);
    }

    for (int i=0; i<numThreads; i++) {
        _LUCENE_THREAD_JOIN(threads[i]);
    }

    _CLDELETE_ARRAY(threads);
    _CLDELETE_ARRAY(tdata);

    writer->optimize();
    CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size"));

    CuAssertEquals(tc, docsToAdd + (numThreads * (docsPerThread-1)), writer->docCount(), _T("document count"));

    writer->close();
    _CLLDELETE(writer);

    ramDir->close();
    _CLLDELETE(ramDir);
}
  void StringBuffer::clear(){
  //Func - Clears the Stringbuffer and resets it to it default empty state
  //Pre  - true
  //Post - pre(buffer) has been destroyed and a new one has been allocated

      //Destroy the current buffer if present
	  _CLDELETE_CARRAY(buffer);

	  //Initialize 
      len = 0;
      bufferLength = LUCENE_DEFAULT_TOKEN_BUFFER_SIZE;
      //Allocate a buffer of length bufferLength
      buffer = _CL_NEWARRAY(TCHAR,bufferLength);
  }
Beispiel #16
0
  BufferedIndexInput::BufferedIndexInput(const BufferedIndexInput& other):
  	IndexInput(other),
    buffer(NULL),
	bufferSize(other.bufferSize),
    bufferStart(other.bufferStart),
    bufferLength(other.bufferLength),
    bufferPosition(other.bufferPosition)
  {
    /* DSR: Does the fact that sometime clone.buffer is not NULL even when
    ** clone.bufferLength is zero indicate memory corruption/leakage?
    **   if ( clone.buffer != NULL) { */
    if (other.bufferLength != 0 && other.buffer != NULL) {
      buffer = _CL_NEWARRAY(uint8_t,bufferLength);
	  memcpy(buffer,other.buffer,bufferLength * sizeof(uint8_t));
    }
  }
Beispiel #17
0
   TCHAR* IndexInput::readString(const bool _unique){
    int32_t len = readVInt();
      
    if ( len == 0){
      if ( _unique ) //todo: does non unique ever occur?
         return stringDuplicate(LUCENE_BLANK_STRING);
      else
         return LUCENE_BLANK_STRING;
    }

    TCHAR* ret = _CL_NEWARRAY(TCHAR,len+1);
    readChars(ret, 0, len);
    ret[len] = 0;

    return ret;
  }
  void StringBuffer::reserve(const int32_t size){
  	if ( bufferLength >= size )
  		return;
	bufferLength = size;

	//Allocate a new buffer of length bufferLength
    TCHAR* tmp = _CL_NEWARRAY(TCHAR,bufferLength);
    _tcsncpy(tmp, buffer, len);
    tmp[len] = '\0';
	
	//destroy the old buffer
	if (buffer){
		_CLDELETE_CARRAY(buffer);
	}
	//Assign the new buffer tmp to buffer
    buffer = tmp;
  }
  TCHAR* StringBuffer::toString(){
  //Func - Returns a copy of the current string in the StringBuffer sized equal to the length of the string 
  //       in the StringBuffer.
  //Pre  - true
  //Post - The copied string has been returned

	  //Instantiate a buffer equal to the length len + 1
      TCHAR* ret = _CL_NEWARRAY(TCHAR,len + 1);
	  if (ret){
		  //Copy the string in buffer
	      _tcsncpy(ret, buffer, len);
		  //terminate the string
          ret[len] = '\0';
	  }
      //return the the copy  
      return ret;
  }
Beispiel #20
0
  void BufferedIndexInput::refill() {
    int64_t start = bufferStart + bufferPosition;
    int64_t end = start + bufferSize;
    if (end > length())				  // don't read past EOF
      end = length();
    bufferLength = (int32_t)(end - start);
    if (bufferLength == 0)
      _CLTHROWA(CL_ERR_IO, "IndexInput read past EOF");

    if (buffer == NULL){
      buffer = _CL_NEWARRAY(uint8_t,bufferSize);		  // allocate buffer lazily
    }
    readInternal(buffer, bufferLength);


    bufferStart = start;
    bufferPosition = 0;
  }
  void StringBuffer::growBuffer(const int32_t minLength, const int32_t skippingNInitialChars) {
  //Func - Has the buffer grown to a minimum length of minLength or bigger and shifts the
  //       current string in buffer by skippingNInitialChars forward
  //Pre  - After growth, must have at least enough room for contents + terminator so
  //       minLength >= skippingNInitialChars + len + 1
  //       skippingNInitialChars >= 0
  //Post - The buffer has been grown to a minimum length of minLength or bigger and
  //       if skippingNInitialChars > 0, the contents of the buffer has beeen shifted
  //       forward by skippingNInitialChars positions as the buffer is reallocated,
  //       leaving the first skippingNInitialChars uninitialized (presumably to be
  //       filled immediately thereafter by the caller).

    CND_PRECONDITION (skippingNInitialChars >= 0, "skippingNInitialChars is less than zero");
    CND_PRECONDITION (minLength >= skippingNInitialChars + len + 1,"skippingNInitialChars is not large enough");

    //More aggressive growth strategy to offset smaller default buffer size:
	if ( !bufferOwner ){
		if ( bufferLength<minLength )
			_CLTHROWA(CL_ERR_IllegalArgument,"[StringBuffer::grow] un-owned buffer could not be grown");
		return;
	}
	
	bufferLength *= 2;
	//Check that bufferLength is bigger than minLength
	if (bufferLength < minLength){
	    //Have bufferLength become minLength because it still was too small
        bufferLength = minLength;
	}

	//Allocate a new buffer of length bufferLength
    TCHAR* tmp = _CL_NEWARRAY(TCHAR,bufferLength);
    //The old buffer might not have been null-terminated, so we _tcsncpy
    //only len bytes, not len+1 bytes (the latter might read one char off the
    //end of the old buffer), then apply the terminator to the new buffer.
    _tcsncpy(tmp + skippingNInitialChars, buffer, len);
    tmp[skippingNInitialChars + len] = '\0';
	
	//destroy the old buffer
	if (buffer){
		_CLDELETE_CARRAY(buffer);
	}
	//Assign the new buffer tmp to buffer
    buffer = tmp;
  }
int32_t* SegmentMergeInfo::getDocMap(){
	if ( docMap == NULL ){
      	// build array which maps document numbers around deletions 
		if (reader->hasDeletions()) {
			//Get the total number of documents managed by the reader including the deleted ones
			int32_t maxDoc = reader->maxDoc();
			//Create a map for all documents
			docMap = _CL_NEWARRAY(int32_t,maxDoc);
			int32_t j = 0;
			//Iterate through all the document numbers
			for (int32_t i = 0; i < maxDoc; i++) {
            //Check if document i is marked deleted
				if (reader->isDeleted(i)){
					//Document i has not been marked deleted so assign -1
					docMap[i] = -1;
				}else{
					docMap[i] = j++;
				}
			}
		}
	}
	return docMap;
}
Beispiel #23
0
//static
const TCHAR* Misc::replace_all( const TCHAR* val, const TCHAR* srch, const TCHAR* repl )
{
int32_t cnt = 0;
size_t repLen = _tcslen(repl);
size_t srchLen = _tcslen(srch);
size_t srcLen = _tcslen(val);

const TCHAR* pos = val;
while( (pos = _tcsstr(pos+1, srch)) != NULL ) {
    ++cnt;
}

size_t lenNew = (srcLen - (srchLen * cnt)) + (repLen * cnt);
TCHAR* ret = _CL_NEWARRAY(TCHAR,lenNew+1);
ret[lenNew] = 0;
if ( cnt == 0 ){
    _tcscpy(ret,val);
    return ret;
}

TCHAR* cur = ret; //position of return buffer
const TCHAR* lst = val; //position of value buffer
pos = val; //searched position of value buffer
while( (pos = _tcsstr(pos+1,srch)) != NULL ) {
    _tcsncpy(cur,lst,pos-lst); //copy till current
    cur += (pos-lst);
    lst = pos; //move val position

    _tcscpy( cur,repl); //copy replace
    cur += repLen; //move return buffer position
    lst += srchLen; //move last value buffer position
}
_tcscpy(cur, lst ); //copy rest of buffer

return ret;
}
  StringBuffer::StringBuffer(const TCHAR* value){
  //Func - Constructor. 
  //       Creates an instance of Stringbuffer containing a copy of the string value
  //Pre  - value != NULL
  //Post - An instance of StringBuffer has been created containing the copy of the string value
  
      //Initialize the length of the string to be stored in buffer
	  len = (int32_t) _tcslen(value);

	  //Calculate the space occupied in buffer by a copy of value
      const int32_t occupiedLength = len + 1;
      
	  // Minimum allocated buffer length is LUCENE_DEFAULT_TOKEN_BUFFER_SIZE.
      bufferLength = (occupiedLength >= LUCENE_DEFAULT_TOKEN_BUFFER_SIZE 
	 	? occupiedLength : LUCENE_DEFAULT_TOKEN_BUFFER_SIZE);

	  //Allocate a buffer of length bufferLength
      buffer = _CL_NEWARRAY(TCHAR,bufferLength);
	  bufferOwner  = true;
      //Copy the string value into buffer
      _tcsncpy(buffer, value, occupiedLength);
	  //Assert that the buffer has been terminated at the end of the string
      CND_PRECONDITION (buffer[len] == '\0', "Buffer was not correctly terminated");
  }
/*
  Run one indexer and 2 searchers against single index as
  stress test.
 */
void runThreadingTests(CuTest* tc, Directory& directory){

  SimpleAnalyzer ANALYZER;
  IndexWriter writer(&directory, &ANALYZER, true);

  // Establish a base index of 100 docs:
  StringBuffer sb;
  TCHAR buf[10];
  for(int i=0;i<100;i++) {
    Document d;
    _i64tot(i,buf,10);
    d.add(*_CLNEW Field(_T("id"), buf, Field::STORE_YES | Field::INDEX_UNTOKENIZED));

    sb.clear();
    English::IntToEnglish(i, &sb);
    d.add(*_CLNEW Field(_T("contents"), sb.getBuffer(), Field::STORE_NO | Field::INDEX_TOKENIZED));
    writer.addDocument(&d);
  }
  writer.flush();

  //read using multiple threads...
  atomicSearchThreads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, 4);
  atomicSearchThreads[0] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer);
  atomicSearchThreads[1] = _LUCENE_THREAD_CREATE(&atomicIndexTest, &writer);
  atomicSearchThreads[2] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory);
  atomicSearchThreads[3] = _LUCENE_THREAD_CREATE(&atomicSearchTest, &directory);

  for ( int i=0;i<4;i++ ){
    _LUCENE_THREAD_JOIN(atomicSearchThreads[i]);
  }
  _CLDELETE_ARRAY(atomicSearchThreads);

  writer.close();

  CuAssert(tc, _T("hit unexpected exception in one of the threads\n"), !atomicSearchFailed);
}
Beispiel #26
0
TCHAR* FastCharStream::GetSuffix(const int32_t len) {
	TCHAR* value = _CL_NEWARRAY(TCHAR, len + 1);
	_tcsncpy(value, buffer+(bufferPosition - len), len);
	value[len] = 0; // NULL terminated string
	return value;
}
TCHAR* DateField::timeToString(const int64_t time) {
    TCHAR* buf = _CL_NEWARRAY(TCHAR,DATEFIELD_DATE_LEN + 1);
    timeToString(time,buf);
    return buf;
}