C++ (Cpp) RdbBase Examples

Programming Language: C++ (Cpp)

Class/Type: RdbBase

Examples at hotexamples.com: 14

C++ (Cpp) RdbBase - 14 examples found. These are the top rated real world C++ (Cpp) examples of RdbBase extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getNumFiles(6)

getFile(3)

addNewFile(2)

generateGlobalIndex(2)

getGlobalIndex(2)

getIndex(2)

incorporateMerge(2)

attemptMerge(1)

getFileId2(1)

getMaps(1)

getNumGlobalRecs(1)

hasMergeFile(1)

Example #1

Show file

File: RdbMerge.cpp Project: BKJackson/open-source-search-engine

void RdbMerge::doneMerging ( ) {
	// let RdbDump free its m_verifyBuf buffer if it existed
	m_dump.reset();
	// debug msg
	//fprintf(stderr,"exiting, g_errno=%s!\n",mstrerror(g_errno));
	//exit(-1);
	// . free the list's memory, reset() doesn't do it
	// . when merging titledb i'm still seeing 200MB allocs to read from
	//   tfndb.
	m_list.freeList();
	// nuke our msg3
	//delete (m_msg3);
	// log a msg
	log(LOG_INFO,"db: Merge status: %s.",mstrerror(g_errno));
	// . reset our class
	// . this will free it's cutoff keys buffer, trash buffer, treelist
	// . TODO: should we not reset to keep the mem handy for next time
	//   to help avoid out of mem errors?
	m_msg5.reset();
	// . do we really need these anymore?
	// . turn these off before calling incorporateMerge() since it
	//   will call attemptMerge() on all the other dbs
	m_isMerging     = false;
	m_isSuspended   = false;
	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return;
	// pass g_errno on to incorporate merge so merged file can be unlinked
	base->incorporateMerge ( );
	// nuke the lock so others can merge
	//s_isMergeLocked = false;
}

Example #2

Show file

File: RdbBaseTest.cpp Project: privacore/open-source-search-engine

TEST_P(RdbBasePosdbIndexSingleDocTest, PosdbGenerateIndexSingleDocId) {
	const collnum_t collNum = 0;
	const int64_t docId = 1;

	RdbBase *base = g_posdb.getRdb()->getBase(collNum);

	ASSERT_EQ(0, base->addNewFile());
	RdbIndex *index0 = base->getIndex(0);
	int64_t termId0 = ::testing::get<0>(GetParam());
	GbTest::addPosdbKey(index0, termId0, docId, 0, ::testing::get<0>(GetParam()) == POSDB_DELETEDOC_TERMID);
	index0->writeIndex();

	ASSERT_EQ(1, base->addNewFile());
	RdbIndex *index1 = base->getIndex(1);
	int64_t termId1 = ::testing::get<1>(GetParam());
	GbTest::addPosdbKey(index1, termId1, docId, 0, ::testing::get<1>(GetParam()) == POSDB_DELETEDOC_TERMID);
	index1->writeIndex();

	ASSERT_EQ(2, base->addNewFile());
	RdbIndex *index2 = base->getIndex(2);
	int64_t termId2 = ::testing::get<2>(GetParam());
	GbTest::addPosdbKey(index2, termId2, docId, 0, ::testing::get<2>(GetParam()) == POSDB_DELETEDOC_TERMID);
	index2->writeIndex();

	base->generateGlobalIndex();
	auto globalIndex = base->getGlobalIndex();
	ASSERT_EQ(1, globalIndex->size());
	int64_t result = (((docId << RdbIndex::s_docIdOffset) | (!termId2 == POSDB_DELETEDOC_TERMID)) << RdbBase::s_docIdFileIndex_docIdOffset | 2);
	EXPECT_EQ(result, *globalIndex->begin());
}

Example #3

Show file

File: RdbMerge.cpp Project: lemire/open-source-search-engine

void RdbMerge::doneMerging ( ) {
	// save this
	int32_t saved = g_errno;
	// let RdbDump free its m_verifyBuf buffer if it existed
	m_dump.reset();
	// debug msg
	//fprintf(stderr,"exiting, g_errno=%s!\n",mstrerror(g_errno));
	//exit(-1);
	// . free the list's memory, reset() doesn't do it
	// . when merging titledb i'm still seeing 200MB allocs to read from
	//   tfndb.
	m_list.freeList();
	// nuke our msg3
	//delete (m_msg3);
	// log a msg
	log(LOG_INFO,"db: Merge status: %s.",mstrerror(g_errno));
	// . reset our class
	// . this will free it's cutoff keys buffer, trash buffer, treelist
	// . TODO: should we not reset to keep the mem handy for next time
	//   to help avoid out of mem errors?
	m_msg5.reset();
	// . do we really need these anymore?
	// . turn these off before calling incorporateMerge() since it
	//   will call attemptMerge() on all the other dbs
	m_isMerging     = false;
	m_isSuspended   = false;

	// if collection rec was deleted while merging files for it
	// then the rdbbase should be NULL i guess.
	if ( saved == ENOCOLLREC ) return;

	// if we are exiting then dont bother renaming the files around now.
	// this prevents a core in RdbBase::incorporateMerge()
	if ( g_process.m_mode == EXIT_MODE ) {
		log("merge: exiting. not ending merge.");
		return;
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base = getRdbBase( m_rdbId, m_collnum );
	if ( ! base ) {
		return;
	}
	// pass g_errno on to incorporate merge so merged file can be unlinked
	base->incorporateMerge ( );
	// nuke the lock so others can merge
	//s_isMergeLocked = false;
}

Example #4

Show file

File: Msg3a.cpp Project: rdhananjaya/open-source-search-engine

void setTermFreqWeights ( char *coll,
                          Query *q ,
                          long long *termFreqs,
                          float *termFreqWeights ) {

    long long numDocsInColl = 0;
    RdbBase *base = getRdbBase ( RDB_CLUSTERDB  , coll );
    if ( base ) numDocsInColl = base->getNumGlobalRecs();
    // issue? set it to 1000 if so
    if ( numDocsInColl < 0 ) {
        log("query: Got num docs in coll of %lli < 0",numDocsInColl);
        // avoid divide by zero below
        numDocsInColl = 1;
    }
    // now get term freqs again, like the good old days
    long long *termIds = q->getTermIds();
    // just use rdbmap to estimate!
    for ( long i = 0 ; i < q->getNumTerms(); i++ ) {
        long long tf = g_posdb.getTermFreq ( coll ,termIds[i]);
        if ( termFreqs ) termFreqs[i] = tf;
        float tfw = getTermFreqWeight(tf,numDocsInColl);
        termFreqWeights[i] = tfw;
    }
}

Example #5

Show file

File: Indexdb.cpp Project: BKJackson/open-source-search-engine

void Indexdb::deepVerify ( char *coll ) {
	log ( LOG_INFO, "db: Deep Verifying Indexdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//long minRecSizes = 64000;
	
	collnum_t collnum = g_collectiondb.getCollnum(coll);
	RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum);
	long numFiles = rdbBase->getNumFiles();
	long currentFile = 0;
	
deepLoop:
	// done after scanning all files
	if ( currentFile >= numFiles ) {
		g_threads.enableThreads();
		log ( LOG_INFO, "db: Finished deep verify for %li files.",
				numFiles );
		return;
	}
	// scan this file
	if ( ! msg5.getList ( RDB_INDEXDB   ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      currentFile   , // startFileNum  ,
			      1             , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      false         )) {
		g_threads.enableThreads();
		log("db: HEY! it did not block");
		return;
	}

	long count = 0;
	long got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
		unsigned long groupId = getGroupId ( RDB_INDEXDB , &k );
		if ( groupId == g_hostdb.m_groupId ) got++;
	}
	if ( got != count ) {
		BigFile *f = rdbBase->getFile(currentFile);
		log ("db: File %s: Out of first %li records in indexdb, "
		     "only %li belong to our group.",
		     f->getFilename(),count,got );
	}
	//else
	//	log ( LOG_INFO, "db: File %li: Indexdb passed verification "
	//	      "successfully for %li recs.",currentFile,count );
	// next file
	currentFile++;
	goto deepLoop;
}

Example #6

Show file

File: Msg3.cpp Project: lemire/open-source-search-engine

// . but now that we may get a list remotely to fix data corruption,
//   this may indeed block
bool Msg3::doneScanning ( ) {
	QUICKPOLL(m_niceness);
	// . did we have any error on any scan?
	// . if so, repeat ALL of the scans
	g_errno = m_errno;
	// 2 retry is the default
	int32_t max = 2;
	// see if explicitly provided by the caller
	if ( m_maxRetries >= 0 ) max = m_maxRetries;
	// now use -1 (no max) as the default no matter what
	max = -1;
	// ENOMEM is particulary contagious, so watch out with it...
	if ( g_errno == ENOMEM && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == ENOMEM && m_maxRetries != -1 ) max = m_maxRetries;
	// when thread cannot alloc enough read buf it keeps the read buf
	// set to NULL and BigFile.cpp sets g_errno to EBUFTOOSMALL
	if ( g_errno == EBUFTOOSMALL && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == EBUFTOOSMALL && m_maxRetries != -1 ) max = m_maxRetries;
	// . if no thread slots available, that hogs up serious memory.
	//   the size of Msg3 is 82k, so having just 5000 of them is 430MB.
	// . i just made Msg3 alloc mem when it needs more than about 2k
	//   so this problem is greatly reduced, therefore let's keep 
	//   retrying... forever if no thread slots in thread queue since
	//   we become the thread queue in a way.
	if ( g_errno == ENOTHREADSLOTS ) max = -1;
	// this is set above if the map has the same consecutive key repeated
	// and the read is enormous
	if ( g_errno == ECORRUPTDATA ) max = 0;
	// usually bad disk failures, don't retry those forever
	//if ( g_errno == EIO ) max = 3;
        // no, now our hitachis return these even when they're good so
	// we have to keep retrying forever
	if ( g_errno == EIO ) max = -1;
	// count these so we do not take drives offline just because
	// kernel ring buffer complains...
	if ( g_errno == EIO ) g_numIOErrors++;
	// bail early on high priority reads for these errors
	if ( g_errno == EDISKSTUCK && m_niceness == 0 ) max = 0;
	if ( g_errno == EIO        && m_niceness == 0 ) max = 0;

	// how does this happen? we should never bail out on a low priority
	// disk read... we just wait for it to complete...
	if ( g_errno == EDISKSTUCK && m_niceness != 0 ) { char *xx=NULL;*xx=0;}

	// on I/O, give up at call it corrupt after a while. some hitachis
	// have I/O errros on little spots, like gk88, maybe we can fix him
	if ( g_errno == EIO && m_retryNum >= 5 ) {
		m_errno = ECORRUPTDATA;
		m_hadCorruption = true;
		// do not do any retries any more
		max = 0;
	}

	// convert m_errno to ECORRUPTDATA if it is EBUFTOOSMALL and the
	// max of the bytesToRead are over 500MB.
	// if bytesToRead was ludicrous, then assume that the data file
	// was corrupted, the map was regenerated and it patched
	// over the corrupted bits which were 500MB or more in size.
	// we cannot practically allocate that much, so let's just
	// give back an empty buffer. treat it like corruption...
	// the way it patches is to store the same key over all the corrupted
	// pages, which can get pretty big. so if you read a range with that
	// key you will be hurting!!
	// this may be the same scenario as when the rdbmap has consecutive
	// same keys. see above where we set m_errno to ECORRUPTDATA...
	if ( g_errno == EBUFTOOSMALL ) { 
		int32_t biggest = 0;
		for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
			if ( m_scans[i].m_bytesToRead < biggest ) continue;
			biggest = m_scans[i].m_bytesToRead;
		}
		if ( biggest > 500000000 ) {
			log("db: Max read size was %" PRId32" > 500000000. Assuming "
			    "corrupt data in data file.",biggest);
			m_errno = ECORRUPTDATA;
			m_hadCorruption = true;
			// do not do any retries on this, the read was > 500MB
			max = 0;
		}
	}

	// if shutting down gb then limit to 20 so we can shutdown because
	// it can't shutdown until all threads are out of the queue i think
	if ( g_process.m_mode == EXIT_MODE && max < 0 ) {
		//log("msg3: forcing retries to 0 because shutting down");
		max = 0;
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base = getRdbBase( m_rdbId, m_collnum );
	if ( ! base ) {
		return true;
	}

	// this really slows things down because it blocks the cpu so
	// leave it out for now
#ifdef GBSANITYCHECK
	// check for corruption here, do not do it again in Msg5 if we pass
	if ( ! g_errno ) { // && g_conf.m_doErrorCorrection ) {
		int32_t i;
		for ( i = 0 ; i < m_numFileNums ; i++ )
			if ( ! m_lists[i].checkList_r ( false, false ) ) break;
		if ( i < m_numFileNums ) {
			g_errno = ECORRUPTDATA;
			m_errno = ECORRUPTDATA;
			max     = g_conf.m_corruptRetries; // try 100 times
			log("db: Encountered corrupt list in file %s.",
			    base->getFile(m_fileNums[i])->getFilename());
		}
		else
			m_listsChecked = true;
	}
#endif

	// try to fix this error i've seen
	if ( g_errno == EBADENGINEER && max == -1 )
		max = 100;

	// . if we had a ETRYAGAIN error, then try again now
	// . it usually means the whole file or a part of it was deleted 
	//   before we could finish reading it, so we should re-read all now
	// . RdbMerge deletes BigFiles after it merges them and also chops
	//   off file heads
	// . now that we have threads i'd imagine we'd get EBADFD or something
	// . i've also seen "illegal seek" as well
	if ( m_errno && (m_retryNum < max || max < 0) &&
	     // this will complete in due time, we can't call a sleep wrapper
	     // on it because the read is really still pending...
	     m_errno != EDISKSTUCK ) {
		// print the error
		static time_t s_time  = 0;
		time_t now = getTime();
		if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) {
			log("net: Had error reading %s: %s. Retrying. "
			    "(retry #%" PRId32")", 
			    base->m_dbname,mstrerror(m_errno) , m_retryNum );
			s_time = now;
		}
		// send email alert if in an infinite loop, but don't send
		// more than once every 2 hours
		static int32_t s_lastSendTime = 0;
		if ( m_retryNum == 100 && getTime() - s_lastSendTime > 3600*2){
			// remove this for now it is going off all the time
			//g_pingServer.sendEmail(NULL,//g_hostdb.getMyHost(),
			//		       "100 read retries",true);
			s_lastSendTime = getTime();
		}
		// clear g_errno cuz we should for call to readList()
		g_errno = 0;
		// free the list buffer since if we have 1000 Msg3s retrying
		// it will totally use all of our memory
		for ( int32_t i = 0 ; i < m_numChunks ; i++ ) 
			m_lists[i].destructor();
		// count retries
		m_retryNum++;
		// backoff scheme, wait 100ms more each time
		int32_t wait ;
		if ( m_retryNum == 1 ) wait = 10;
		else                   wait = 200 * m_retryNum;
		// . don't wait more than 10 secs between tries
		// . i've seen gf0 and gf16 get mega saturated
		if ( wait > 10000 ) wait = 10000;
		// wait 500 ms
		if ( g_loop.registerSleepCallback ( wait  , // ms
						    this  ,
						    doneSleepingWrapper3,
						    m_niceness))
			return false;
		// otherwise, registration failed
		log(
		    "net: Failed to register sleep callback for retry. "
		    "Abandoning read. This is bad.");
		// return, g_errno should be set
		g_errno = EBUFTOOSMALL;
		m_errno = EBUFTOOSMALL;
		return true;
	}

	// if we got an error and should not retry any more then give up
	if ( g_errno ) {
		log(
		    "net: Had error reading %s: %s. Giving up after %" PRId32" "
		    "retries.",
		    base->m_dbname,mstrerror(g_errno) , m_retryNum );
		return true;
	}

	// note it if the retry finally worked
	if ( m_retryNum > 0 ) 
		log(LOG_INFO,"disk: Read succeeded after retrying %" PRId32" times.",
		    (int32_t)m_retryNum);

	// count total bytes for logging
	int32_t count = 0;
	// . constrain all lists to make merging easier
	// . if we have only one list, then that's nice cuz the constrain
	//   will allow us to send it right away w/ zero copying
	// . if we have only 1 list, it won't be merged into a final list,
	//   that is, we'll just set m_list = &m_lists[i]
	for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
		QUICKPOLL(m_niceness);
		// count total bytes for logging
		count += m_lists[i].getListSize();
		// . hint offset is relative to the offset of first key we read
		// . if that key was only 6 bytes RdbScan shift the list buf
		//   down 6 bytes to make the first key 12 bytes... a 
		//   requirement for all RdbLists
		// . don't inc it, though, if it was 0, pointing to the start
		//   of the list because our shift won't affect that
		if ( m_scans[i].m_shifted == 6 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 6;
		// posdb double compression
		if ( m_scans[i].m_shifted == 12 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 12;
		// . don't constrain on minRecSizes here because it may
		//   make our endKey smaller, which will cause problems
		//   when Msg5 merges these lists.
		// . If all lists have different endKeys RdbList's merge
		//   chooses the min and will merge in recs beyond that
		//   causing a bad list BECAUSE we don't check to make
		//   sure that recs we are adding are below the endKey
		// . if we only read from one file then constrain based 
		//   on minRecSizes so we can send the list back w/o merging
		//   OR if just merging with RdbTree's list
		int32_t mrs ;
		// . constrain to m_minRecSizesOrig, not m_minRecSizes cuz 
		//   that  could be adjusted by compensateForNegativeRecs()
		// . but, really, they should be the same if we only read from
		//   the root file
		if ( m_numFileNums == 1 ) mrs = m_minRecSizesOrig;
		else                      mrs = -1;
		// . this returns false and sets g_errno on error
		// . like if data is corrupt
		BigFile *ff = base->getFile(m_fileNums[i]);
		// if we did a merge really quick and delete one of the 
		// files we were reading, i've seen 'ff' be NULL
		char *filename = "lostfilename";
		if ( ff ) filename = ff->getFilename();

		// compute cache info
		RdbCache *rpc = getDiskPageCache ( m_rdbId );
		if ( ! m_allowPageCache ) rpc = NULL;
		int64_t vfd ;
		if ( ff ) vfd = ff->getVfd();
		key192_t ck ;
		if ( ff )
			ck = makeCacheKey ( vfd ,
					    m_scans[i].m_offset ,
					    m_scans[i].m_bytesToRead );
		if ( m_validateCache && ff && rpc && vfd != -1 ) {
			bool inCache;
			char *rec; int32_t recSize;
			inCache = rpc->getRecord ( (collnum_t)0 , // collnum
						   (char *)&ck , 
						   &rec , 
						   &recSize ,
						   true , // copy?
						   -1 , // maxAge, none 
						   true ); // inccounts?
			if ( inCache && 
			     // 1st byte is RdbScan::m_shifted
			     ( m_lists[i].m_listSize != recSize-1 ||
			       memcmp ( m_lists[i].m_list , rec+1,recSize-1) ||
			       *rec != m_scans[i].m_shifted ) ) {
				log("msg3: cache did not validate");
				char *xx=NULL;*xx=0;
			}
			mfree ( rec , recSize , "vca" );
		}


		///////
		//
		// STORE IN PAGE CACHE
		//
		///////
		// store what we read in the cache. don't bother storing
		// if it was a retry, just in case something strange happened.
		// store pre-constrain call is more efficient.
		if ( m_retryNum<=0 && ff && rpc && vfd != -1 &&
		     ! m_scans[i].m_inPageCache )
			rpc->addRecord ( (collnum_t)0 , // collnum
					 (char *)&ck , 
					 // rec1 is this little thingy
					 &m_scans[i].m_shifted,
					 1,
					 // rec2
					 m_lists[i].getList() ,
					 m_lists[i].getListSize() ,
					 0 ); // timestamp. 0 = now

		QUICKPOLL(m_niceness);

		// if from our 'page' cache, no need to constrain
		if ( ! m_lists[i].constrain ( m_startKey       ,
					      m_constrainKey   , // m_endKey
					      mrs           , // m_minRecSizes
					      m_hintOffsets[i] ,
					      //m_hintKeys   [i] ,
					      &m_hintKeys   [i*m_ks] ,
					      filename,//ff->getFilename() ,
					      m_niceness ) ) {
			log("net: Had error while constraining list read from "
			    "%s: %s/%s. vfd=%" PRId32" parts=%" PRId32". "
			    "This is likely caused by corrupted "
			    "data on disk.", 
			    mstrerror(g_errno), ff->getDir(),
			    ff->getFilename(), ff->m_vfd , 
			    (int32_t)ff->m_numParts );
			continue;
		}
	}

	// print the time
	if ( g_conf.m_logTimingDb ) {
		int64_t now = gettimeofdayInMilliseconds();
		int64_t took = now - m_startTime;
		log(LOG_TIMING,
		    "net: Took %" PRId64" ms to read %" PRId32" lists of %" PRId32" bytes total"
		     " from %s (niceness=%" PRId32").",
		     took,m_numFileNums,count,base->m_dbname,m_niceness);
	}
	return true;
}

Example #7

Show file

File: Msg3.cpp Project: lemire/open-source-search-engine

// . return false if blocked, true otherwise
// . set g_errno on error
// . read list of keys in [startKey,endKey] range
// . read at least "minRecSizes" bytes of keys in that range
// . the "m_endKey" of resulting, merged list may have a smaller endKey
//   than the argument, "endKey" due to limitation by "minRecSizes"
// . resulting list will contain ALL keys between ITS [m_startKey,m_endKey]
// . final merged list "should" try to have a size of at least "minRecSizes"
//   but due to negative/postive rec elimination may be less
// . the endKey of the lists we read may be <= "endKey" provided
// . we try to shrink the endKey if minRecSizes is >= 0 in order to
//   avoid excessive reading
// . by shrinking the endKey we cannot take into account the size of deleted
//   records, so therefore we may fall short of "minRecSizes" in actuality,
//   in fact, the returned list may even be empty with a shrunken endKey
// . we merge all lists read from disk into the provided "list"
// . caller should call Msg3.getList(int32_t i) and Msg3:getNumLists() to retrieve
// . this makes the query engine faster since we don't need to merge the docIds
//   and can just send them across the network separately and they will be
//   hashed into IndexTable's table w/o having to do time-wasting merging.
// . caller can specify array of filenums to read from so incremental syncing
//   in Sync class can just read from titledb*.dat files that were formed
//   since the last sync point.
bool Msg3::readList  ( char           rdbId         ,
		       collnum_t collnum ,
		       const char       *startKeyArg   ,
		       const char       *endKeyArg     ,
		       int32_t           minRecSizes   , // max size of scan
		       int32_t           startFileNum  , // first file to scan
		       int32_t           numFiles      , // rel. to startFileNum
		       void          *state         , // for callback
		       void        (* callback ) ( void *state ) ,
		       int32_t           niceness      ,
		       int32_t           retryNum      ,
		       int32_t           maxRetries    ,
		       bool           compensateForMerge ,
		       bool           justGetEndKey ,
		       bool           allowPageCache ,
		       bool           hitDisk        ) {

	// set this to true to validate
	m_validateCache = false;//true;

	// clear, this MUST be done so if we return true g_errno is correct
	g_errno = 0;
	// assume lists are not checked for corruption
	m_listsChecked = false;
	// warn
	if ( minRecSizes < -1 ) {
		log(LOG_LOGIC,"db: Msg3 got minRecSizes of %" PRId32", changing "
		    "to -1.",minRecSizes);
		minRecSizes = -1;
	}
	// reset m_alloc and data in all lists in case we are a re-call
	reset();
	// warning
	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg3.");
	// remember the callback
	m_rdbId              = rdbId;
	m_collnum = collnum;
	m_callback           = callback;
	m_state              = state;
	m_niceness           = niceness;
	m_numScansCompleted  = 0;
	m_retryNum           = retryNum;
	m_maxRetries         = maxRetries;
	m_compensateForMerge = compensateForMerge;
	m_allowPageCache     = allowPageCache;
	m_hitDisk            = hitDisk;
	m_hadCorruption      = false;
	// get keySize of rdb
	m_ks = getKeySizeFromRdbId ( m_rdbId );
	// reset the group error
	m_errno    = 0;
	// . reset all our lists 
	// . these are reset in call the RdbScan::setRead() below
	//for ( int32_t i = 0 ; i < MAX_RDB_FILES ; i++ ) m_lists[i].reset();
	// . ensure startKey last bit clear, endKey last bit set
	// . no! this warning is now only in Msg5
	// . if RdbMerge is merging some files, not involving the root 
	//   file, then we can expect to get a lot of unmatched negative recs.
	// . as a consequence, our endKeys may often be negative. This means
	//   it may not annihilate with the positive key, but we should only
	//   miss like this at the boundaries of the lists we fetch.
	// . so in that case RdbList::merge will stop merging once the
	//   minRecSizes limit is reached even if it means ending on a negative
	//   rec key
	//if ( (startKey.n0 & 0x01) == 0x01 ) 
	if ( !KEYNEG(startKeyArg) )
		log(LOG_REMIND,"net: msg3: StartKey lastbit set."); 
	if (  KEYNEG(endKeyArg) )
		log(LOG_REMIND,"net: msg3: EndKey lastbit clear."); 

	// declare vars here becaues of 'goto skip' below
	int32_t mergeFileNum = -1 ;
	int32_t max ;

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base = getRdbBase( m_rdbId, m_collnum );
	if ( ! base ) {
		return true;
	}

	// store the file numbers in the array, these are the files we read
	m_numFileNums = 0;

	// save startFileNum here, just for recall
	m_startFileNum = startFileNum;
	m_numFiles     = numFiles;

	// . if we have a merge going on, we may have to change startFileNum
	// . if some files get unlinked because merge completes then our 
	//   reads will detect the error and loop back here
	// . we launch are reads right after this without giving up the cpu
	//   and we use file descriptors, so any changes to Rdb::m_files[]
	//   should not hurt us
	// . WARNING: just make sure you don't lose control of cpu until after
	//   you call RdbScan::set()
	// . we use hasMergeFile() instead of isMerging() because he may not 
	//   be merging cuz he got suspended or he restarted and
	//   hasn't called attemptMerge() yet, but he may still contain it
	if ( g_conf.m_logDebugQuery )
		log(LOG_DEBUG,
		    "net: msg3: "
		    "c=%" PRId32" hmf=%" PRId32" sfn=%" PRId32" msfn=%" PRId32" nf=%" PRId32" db=%s.",
		     (int32_t)compensateForMerge,(int32_t)base->hasMergeFile(),
		     (int32_t)startFileNum,(int32_t)base->m_mergeStartFileNum-1,
		     (int32_t)numFiles,base->m_dbname);
	int32_t pre = -10;
	if ( compensateForMerge && base->hasMergeFile() && 
	     startFileNum >= base->m_mergeStartFileNum - 1 &&
	     (startFileNum > 0 || numFiles != -1) ) {
		// now also include the file being merged into, but only
		// if we are reading from a file being merged...
		if ( startFileNum < base->m_mergeStartFileNum +
		     base->m_numFilesToMerge - 1 )
			//m_fileNums [ m_numFileNums++ ] =
			//	base->m_mergeStartFileNum - 1;
			pre = base->m_mergeStartFileNum - 1;
		// debug msg
		if ( g_conf.m_logDebugQuery )
			log(LOG_DEBUG,
			   "net: msg3: startFileNum from %" PRId32" to %" PRId32" (mfn=%" PRId32")",
			    startFileNum,startFileNum+1,mergeFileNum);
		// if merge file was inserted before us, inc our file number
		startFileNum++;
	}
	// adjust num files if we need to, as well
	if ( compensateForMerge && base->hasMergeFile() && 
	     startFileNum < base->m_mergeStartFileNum - 1 &&
	     numFiles != -1 &&
	     startFileNum + numFiles - 1 >= base->m_mergeStartFileNum - 1 ) {
		// debug msg
		if ( g_conf.m_logDebugQuery )
			log(LOG_DEBUG,"net: msg3: numFiles up one.");
		// if merge file was inserted before us, inc our file number
		numFiles++;
	}

	// . how many rdb files does this base have?
	// . IMPORTANT: this can change since files are unstable because they
	//   might have all got merged into one!
	// . so do this check to make sure we're safe... especially if
	//   there was an error before and we called readList() on ourselves
	max = base->getNumFiles();
	// -1 means we should scan ALL the files in the base
	if ( numFiles == -1 ) numFiles = max;
	// limit it by startFileNum, however
	if ( numFiles > max - startFileNum ) numFiles = max - startFileNum;
	// set g_errno and return true if it is < 0
	if ( numFiles < 0 ) { 
		log(LOG_LOGIC,
		   "net: msg3: readList: numFiles = %" PRId32" < 0 (max=%" PRId32")(sf=%" PRId32")",
		    numFiles , max , startFileNum );
		g_errno = EBADENGINEER; 
		// force core dump
		char *xx=NULL;*xx=0;
		return true; 
	}

	// . allocate buffer space
	// . m_scans, m_startpg, m_endpg, m_hintKeys, m_hintOffsets,
	//   m_fileNums, m_lists
	int32_t chunk = sizeof(RdbScan) + // m_scans
		4 +                    // m_startpg
		4 +                    // m_endpg
		//sizeof(key_t) +        // m_hintKeys
		m_ks +                 // m_hintKeys
		4 +                    // m_hintOffsets
		4 +                    // m_fileNums
		sizeof(RdbList) ;      // m_lists
	int32_t nn   = numFiles;
	if ( pre != -10 ) nn++;
	m_numChunks = nn;
	int32_t need = nn * (chunk);
	m_alloc = m_buf;
	if ( need > (int32_t)MSG3_BUF_SIZE ) {
		m_allocSize = need;
		m_alloc = (char *)mcalloc ( need , "Msg3" );
		if ( ! m_alloc ) {
			log("disk: Could not allocate %" PRId32" bytes read "
			    "structures to read %s.",need,base->m_dbname);
			return true;
		}
	}
	char *p = m_alloc;
	m_scans       = (RdbScan *)p; p += nn * sizeof(RdbScan);
	m_startpg     = (int32_t    *)p; p += nn * 4;
	m_endpg       = (int32_t    *)p; p += nn * 4;
	//m_hintKeys    = (key_t   *)p; p += nn * sizeof(key_t);
	m_hintKeys    = (char    *)p; p += nn * m_ks;
	m_hintOffsets = (int32_t    *)p; p += nn * 4;
	m_fileNums    = (int32_t    *)p; p += nn * 4;
	m_lists       = (RdbList *)p; p += nn * sizeof(RdbList);
	// sanity check
	if ( p - m_alloc != need ) {
		log(LOG_LOGIC,"disk: Bad malloc in Msg3.cpp.");
		char *xx = NULL; *xx = 0;
	}
	// call constructors
	for ( int32_t i = 0 ; i < nn ; i++ ) m_lists[i].constructor();
	// make fix from up top
	if ( pre != -10 ) m_fileNums [ m_numFileNums++ ] = pre;

	// store them all
	for ( int32_t i = startFileNum ; i < startFileNum + numFiles ; i++ )
		m_fileNums [ m_numFileNums++ ] = i;

	// . remove file nums that are being unlinked after a merge now
	// . keep it here (below skip: label) so sync point reads can use it
	int32_t n = 0;
	for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
		// skip those that are being unlinked after the merge
		if ( base->m_isUnlinking && 
		     m_fileNums[i] >= base->m_mergeStartFileNum &&
		     m_fileNums[i] <  base->m_mergeStartFileNum + 
		                      base->m_numFilesToMerge      )
			continue;
		// otherwise, keep it
		m_fileNums[n++] = m_fileNums[i];
	}
	m_numFileNums = n;

	// . if root file is being merged, he's file #0, & root file is file #1
	// . this is a hack so caller gets what he wants
	//if ( startFileNum == 0 && base->getFileId(0) == 0 && numFiles == 1 )
	//	numFiles = 2;

	// remember the file range we should scan
	m_numScansStarted    = 0;
	m_numScansCompleted  = 0;
	//m_startKey           = startKey;
	//m_endKey             = endKey;
	//m_constrainKey       = endKey; // set in case justGetEndKey is true
	KEYSET(m_startKey,startKeyArg,m_ks);
	KEYSET(m_endKey,endKeyArg,m_ks);
	KEYSET(m_constrainKey,endKeyArg,m_ks);//set incase justGetEndKey istrue
	m_minRecSizes        = minRecSizes;
	m_compensateForMerge = compensateForMerge;
	// bail if 0 files to scan -- no! need to set startKey/endKey
	if ( numFiles == 0 ) return true;
	// don't read anything if endKey < startKey
	//if ( m_startKey > m_endKey ) return true;
	if ( KEYCMP(m_startKey,m_endKey,m_ks)>0 ) return true;
	// keep the original in tact in case g_errno == ETRYAGAIN
	//m_endKeyOrig        = endKey;
	KEYSET(m_endKeyOrig,endKeyArg,m_ks);
	m_minRecSizesOrig   = minRecSizes;
	// start reading at this key
	m_fileStartKey = startKeyArg;
	// start the timer, keep it fast for clusterdb though
	if ( g_conf.m_logTimingDb ) m_startTime = gettimeofdayInMilliseconds();
	// translate base to an id, for the sake of m_msg0
	//char baseId = m_msg0->getRdbId ( base );
	// map ptrs
	RdbMap **maps = base->getMaps();
	// . we now boost m_minRecSizes to account for negative recs 
	// . but not if only reading one list, cuz it won't get merged and
	//   it will be too big to send back
	if ( m_numFileNums > 1 ) compensateForNegativeRecs ( base );
	// . often endKey is too big for an efficient read of minRecSizes bytes
	//   because we end up reading too much from all the files
	// . this will set m_startpg[i], m_endpg[i] for each RdbScan/RdbFile
	//   to ensure we read "minRecSizes" worth of records, not much more
	// . returns the new endKey for all ranges
	// . now this just overwrites m_endKey
	//m_endKey = setPageRanges ( base           ,
	setPageRanges ( base           ,
			m_fileNums     ,
			m_numFileNums  ,
			m_fileStartKey , // start reading @ key
			m_endKey       , // stop reading @ key
			m_minRecSizes  );

	// . NEVER let m_endKey be a negative key, because it will 
	//   always be unmatched, since delbit is cleared
	// . adjusting it here ensures our generated hints are valid
	// . we will use this key to call constrain() with
	//m_constrainKey = m_endKey;
	//if ( ( m_constrainKey.n0 & 0x01) == 0x00 ) 
	//	m_constrainKey -= (uint32_t)1;
	KEYSET(m_constrainKey,m_endKey,m_ks);
	if ( KEYNEG(m_constrainKey) )
		KEYSUB(m_constrainKey,m_ks);

	// Msg5 likes to get the endkey for getting the list from the tree
	if ( justGetEndKey ) return true;

	// sanity check
	if ( m_numFileNums > nn ) {
		log(LOG_LOGIC,"disk: Failed sanity check in Msg3.");
		char *xx = NULL; *xx = 0;
	}

	// debug msg
	//log("msg3 getting list (msg5=%" PRIu32")",m_state);
	// . MDW removed this -- go ahead an end on a delete key
	// . RdbMerge might not pick it up this round, but oh well
	// . so we can have both positive and negative co-existing in same file
	// make sure the last bit is set so we don't end on a delete key
	//m_endKey.n0 |= 0x01LL;
	// . now start reading/scanning the files
	// . our m_scans array starts at 0
	for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
		// get the page range
		//int32_t p1 = m_startpg [ i ];
		//int32_t p2 = m_endpg   [ i ];
		//#ifdef GBSANITYCHECK
		int32_t fn = m_fileNums[i];
		// this can happen somehow!
		if ( fn < 0 ) {
			log(LOG_LOGIC,"net: msg3: fn=%" PRId32". Bad engineer.",fn);
			continue;
		}
		// sanity check
		if ( i > 0 && m_fileNums[i-1] >= fn ) {
			log(LOG_LOGIC,
			    "net: msg3: files must be read in order "
			    "from oldest to newest so RdbList::indexMerge_r "
			    "works properly. Otherwise, corruption will "
			    "result. ");
			char *xx = NULL; *xx = 0;
			return true;
		}
		// . sanity check?
		// . no, we must get again since we turn on endKey's last bit
		int32_t p1 , p2;
		maps[fn]->getPageRange ( m_fileStartKey , 
					m_endKey       , 
					&p1            , 
					&p2            ,
					NULL           );
		//if ( p1 != p1c || p2 != p2c ) {
		//	fprintf(stderr,"Msg3::bad page range\n");
		//	sleep(50000);
		//}
		// sanity check, each endpg's key should be > endKey
		//if ( p2 < maps[fn]->getNumPages() && 
		//     maps[fn]->getKey ( p2 ) <= m_endKey ) {
		//	fprintf(stderr,"Msg3::bad page range 2\n");
		//	sleep(50000);
		//}
		//#endif
		//int32_t p1 , p2; 
		//maps[fn]->getPageRange (startKey,endKey,minRecSizes,&p1,&p2);
		// now get some read info
		int64_t offset      = maps[fn]->getAbsoluteOffset ( p1 );
		int32_t      bytesToRead = maps[fn]->getRecSizes ( p1, p2, false);
		// max out the endkey for this list
		// debug msg
		//#ifdef _DEBUG_		
		//if ( minRecSizes == 2000000 ) 
		//log("Msg3:: reading %" PRId32" bytes from file #%" PRId32,bytesToRead,i);
		//#endif
		// inc our m_numScans
		m_numScansStarted++;
		// . keep stats on our disk accesses
		// . count disk seeks (assuming no fragmentation)
		// . count disk bytes read
		if ( bytesToRead > 0 ) {
			base->m_rdb->didSeek (             );
			base->m_rdb->didRead ( bytesToRead );
		}
		// . the startKey may be different for each RdbScan class
		// . RdbLists must have all keys within their [startKey,endKey]
		// . therefore set startKey individually from first page in map
		// . this endKey must be >= m_endKey 
		// . this startKey must be < m_startKey
		//key_t startKey = maps[fn]->getKey ( p1 );
		//key_t endKey   = maps[fn]->getKey ( p2 );
		char startKey2 [ MAX_KEY_BYTES ];
		char endKey2   [ MAX_KEY_BYTES ];
		maps[fn]->getKey ( p1 , startKey2 );
		maps[fn]->getKey ( p2 , endKey2 );
		//char *startKey = maps[fn]->getKeyPtr ( p1 );
		//char *endKey   = maps[fn]->getKeyPtr ( p2 );
		// store in here
		m_startpg [ i ] = p1;
		m_endpg   [ i ] = p2;

		// . we read UP TO that endKey, so reduce by 1
		// . but iff p2 is NOT the last page in the map/file
		// . maps[fn]->getKey(lastPage) will return the LAST KEY
		//   and maps[fn]->getOffset(lastPage) the length of the file
		//if ( maps[fn]->getNumPages()!=p2) endKey -=(uint32_t)1;
		if ( maps[fn]->getNumPages() != p2 ) KEYSUB(endKey2,m_ks);
		// otherwise, if we're reading all pages, then force the
		// endKey to virtual inifinite
		//else endKey.setMax();
		else KEYMAX(endKey2,m_ks);

		// . set up the hints
		// . these are only used if we are only reading from 1 file
		// . these are used to call constrain() so we can constrain
		//   the end of the list w/o looping through all the recs
		//   in the list
		int32_t h2 = p2 ;
		// decrease by one page if we're on the last page
		if ( h2 > p1 && maps[fn]->getNumPages() == h2 ) h2--;
		// . decrease hint page until key is <= endKey on that page
		//   AND offset is NOT -1 because the old way would give
		//   us hints passed the endkey
		// . also decrease so we can constrain on minRecSizes in
		//   case we're the only list being read
		// . use >= m_minRecSizes instead of >, otherwise we may
		//   never be able to set "size" in RdbList::constrain()
		//   because "p" could equal "maxPtr" right away
		while ( h2 > p1 && 
			//( maps[fn]->getKey   (h2) > m_constrainKey ||
		      (KEYCMP(maps[fn]->getKeyPtr(h2),m_constrainKey,m_ks)>0||
			  maps[fn]->getOffset(h2) == -1            ||
			  maps[fn]->getAbsoluteOffset(h2) - offset >=
			  m_minRecSizes ) )
			h2--;
		// now set the hint
		m_hintOffsets [ i ] = maps[fn]->getAbsoluteOffset ( h2 ) -
			              maps[fn]->getAbsoluteOffset ( p1 ) ;
		//m_hintKeys    [ i ] = maps[fn]->getKey            ( h2 );
		KEYSET(&m_hintKeys[i*m_ks],maps[fn]->getKeyPtr(h2),m_ks);

		// reset g_errno before calling setRead()
		g_errno = 0;
		// . this fix is now in RdbList::checklist_r()
		// . we can now have dup keys, so, we may read in
		//   a rec with key "lastMinKey" even though we don't read
		//   in the first key on the end page, so don't subtract 1...
		//if ( endKey != m_endKeyOrig ) 
		//	endKey += (uint32_t) 1;

		// timing debug
		if ( g_conf.m_logTimingDb )
			log(LOG_TIMING,
			    "net: msg: reading %" PRId32" bytes from %s file #%" PRId32" "
			     "(niceness=%" PRId32")",
			     bytesToRead,base->m_dbname,i,m_niceness);

		// log huge reads, those hurt us
		if ( bytesToRead > 150000000 ) {
			logf(LOG_INFO,"disk: Reading %" PRId32" bytes at offset %" PRId64" "
			    "from %s.",
			    bytesToRead,offset,base->m_dbname);
		}

		// if any keys in the map are the same report corruption
		char tmpKey    [16];
		char lastTmpKey[16];
		int32_t ccount = 0;
		if ( bytesToRead     > 10000000      && 
		     bytesToRead / 2 > m_minRecSizes &&
		     base->m_fixedDataSize >= 0        ) {
			for ( int32_t pn = p1 ; pn <= p2 ; pn++ ) {
				maps[fn]->getKey ( pn , tmpKey );
				if ( KEYCMP(tmpKey,lastTmpKey,m_ks) == 0 ) 
					ccount++;
				gbmemcpy(lastTmpKey,tmpKey,m_ks);
			}
		}
		if ( ccount > 10 ) {
			logf(LOG_INFO,"disk: Reading %" PRId32" bytes from %s file #"
			     "%" PRId32" when min "
			     "required is %" PRId32". Map is corrupt and has %" PRId32" "
			     "identical consecutive page keys because the "
			     "map was \"repaired\" because out of order keys "
			     "in the index.",
			     (int32_t)bytesToRead,
			     base->m_dbname,fn,
			     (int32_t)m_minRecSizes,
			     (int32_t)ccount);
			m_numScansCompleted++;
			m_errno = ECORRUPTDATA;
			m_hadCorruption = true;
			//m_maxRetries = 0;
			break;
		}

		////////
		//
		// try to get from PAGE CACHE
		//
		////////
		BigFile *ff = base->getFile(m_fileNums[i]);
		RdbCache *rpc = getDiskPageCache ( m_rdbId );
		if ( ! m_allowPageCache ) rpc = NULL;
		// . vfd is unique 64 bit file id
		// . if file is opened vfd is -1, only set in call to open()
		int64_t vfd = ff->getVfd();
		key192_t ck = makeCacheKey ( vfd , offset, bytesToRead);
		char *rec; int32_t recSize;
		bool inCache = false;
		if ( rpc && vfd != -1 && ! m_validateCache ) 
			inCache = rpc->getRecord ( (collnum_t)0 , // collnum
						   (char *)&ck , 
						   &rec , 
						   &recSize ,
						   true , // copy?
						   -1 , // maxAge, none 
						   true ); // inccounts?
		m_scans[i].m_inPageCache = false;
		if ( inCache ) {
			m_scans[i].m_inPageCache = true;
			m_numScansCompleted++;
			// now we have to store this value, 6 or 12 so
			// we can modify the hint appropriately
			m_scans[i].m_shifted = *rec;
			m_lists[i].set ( rec +1,
					 recSize-1 ,
					 rec , // alloc
					 recSize , // allocSize
					 startKey2 ,
					 endKey2 ,
					 base->m_fixedDataSize ,
					 true , // owndata
					 base->useHalfKeys() ,
					 getKeySizeFromRdbId ( m_rdbId ) );
			continue;
		}

		// . do the scan/read of file #i
		// . this returns false if blocked, true otherwise
		// . this will set g_errno on error
		bool done = m_scans[i].setRead (base->getFile(m_fileNums[i]),
						base->m_fixedDataSize ,
						 offset                 ,
						 bytesToRead            ,
						 startKey2              ,
						 endKey2                ,
						m_ks                    ,
						 &m_lists[i]            ,
						 this                   ,
						 doneScanningWrapper    ,
						 base->useHalfKeys()    ,
						m_rdbId,
						 m_niceness             ,
						 m_allowPageCache       ,
						 m_hitDisk              ) ;
		// . damn, usually the above will indirectly launch a thread
		//   to do the reading, but it sets g_errno to EINTR,
		//   "interrupted system call"!
		// . i guess the thread does the read w/o blocking and then
		//   queues the signal on g_loop's queue before it exits
		// . try ignoring, and keep going
		if ( g_errno == EINTR ) {
			log("net: Interrupted system call while reading file. "
			    "Ignoring.");
			g_errno = 0;
		}
		// debug msg
		//fprintf(stderr,"Msg3:: reading %" PRId32" bytes from file #%" PRId32","
		//	"done=%" PRId32",offset=%" PRId64",g_errno=%s,"
		//	"startKey=n1=%" PRIu32",n0=%" PRIu64",  "
		//	"endKey=n1=%" PRIu32",n0=%" PRIu64"\n",
		//	bytesToRead,i,(int32_t)done,offset,mstrerror(g_errno),
		//	m_startKey,m_endKey);
		//if ( bytesToRead == 0 )
		//	fprintf(stderr,"shit\n");
		// if it did not block then it completed, so count it
		if ( done ) m_numScansCompleted++;
		// break on an error, and remember g_errno in case we block
		if ( g_errno && g_errno != ENOTHREADSLOTS ) { 
			int32_t tt = LOG_WARN;
			if ( g_errno == EFILECLOSED ) tt = LOG_INFO;
			log(tt,"disk: Reading %s had error: %s.",
			    base->m_dbname, mstrerror(g_errno));
			m_errno = g_errno; 
			break; 
		}
	}
	// debug test
	//if ( rand() % 100 <= 10 ) m_errno = EIO;

	// if we blocked, return false
	if ( m_numScansCompleted < m_numScansStarted ) return false;
	// . if all scans completed without blocking then wrap it up & ret true
	// . doneScanning may now block if it finds data corruption and must
	//   get the list remotely
	return doneScanning();
}

Example #8

Show file

File: RdbMerge.cpp Project: FlavioFalcao/open-source-search-engine

bool RdbMerge::getAnotherList ( ) {
	log(LOG_DEBUG,"db: Getting another list for merge.");
	// clear it up in case it was already set
	g_errno = 0;
	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;
	// if merging titledb files, we must adjust m_endKey so we do
	// not have to read a huge 200MB+ tfndb list
	//key_t newEndKey = m_endKey;
	char newEndKey[MAX_KEY_BYTES];
	KEYSET(newEndKey,m_endKey,m_ks);

	//CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
	//char *coll = cr->m_coll;

	/*
	if ( m_rdbId == RDB_TITLEDB ) { // && m_rdbId == RDB_TFNDB ) {
		//long long docId1 = g_titledb.getDocIdFromKey ( m_startKey );
	       long long docId1=g_titledb.getDocIdFromKey((key_t *)m_startKey);
		//long long docId2 = g_titledb.getDocIdFromKey ( m_endKey );
		// tfndb is pretty much uniformly distributed
		RdbBase *ubase = getRdbBase(RDB_TFNDB,m_coll);
		if ( ! ubase ) return true;
		long long space    = ubase->getDiskSpaceUsed();
		//long long readSize = (space * (docId2-docId1)) / DOCID_MASK;
		long long bufSize  = g_conf.m_mergeBufSize;
		// for now force to 100k
		bufSize = 100000;
		if ( bufSize > space ) bufSize = space;
		long long docId3   = (long long) (((double)bufSize /
						  (double)space) *
			(double)DOCID_MASK  + docId1);
		// constrain newEndKey based on docId3
		if ( docId3 < 0 ) docId3 = DOCID_MASK;
		//if ( docId3 >= DOCID_MASK ) newEndKey.setMax();
		if ( docId3 >= DOCID_MASK ) KEYMAX(newEndKey,m_ks);
		//else newEndKey = g_titledb.makeLastKey ( docId3 );
		else {
			key_t nk = g_titledb.makeLastKey(docId3);
			KEYSET(newEndKey,(char *)&nk,m_ks);
		}
		//log(LOG_DEBUG,"build: remapping endkey from %lx.%llx to "
		//    "%lx.%llx to avoid big tfndb read.",
		//    m_endKey.n1,m_endKey.n0, newEndKey.n1,newEndKey.n0);
		log(LOG_DEBUG,"build: remapping endkey from %llx.%llx to "
		    "%llx.%llx to avoid big tfndb read.",
		    KEY1(m_endKey,m_ks),KEY0(m_endKey),
		    KEY1(newEndKey,m_ks),KEY0(newEndKey));
	}
	*/
	// . this returns false if blocked, true otherwise
	// . sets g_errno on error
	// . we return false if it blocked
	// . m_maxBufSize may be exceeded by a rec, it's just a target size
	// . niceness is usually MAX_NICENESS, but reindex.cpp sets to 0
	// . this was a call to Msg3, but i made it call Msg5 since
	//   we now do the merging in Msg5, not in msg3 anymore
	// . this will now handle truncation, dup and neg rec removal
	// . it remembers last termId and count so it can truncate even when
	//   IndexList is split between successive reads
	// . IMPORTANT: when merging titledb we could be merging about 255
	//   files, so if we are limited to only X fds it can have a cascade
	//   affect where reading from one file closes the fd of another file
	//   in the read (since we call open before spawning the read thread)
	//   and can therefore take 255 retries for the Msg3 to complete 
	//   because each read gives a EFILCLOSED error.
	//   so to fix it we allow one retry for each file in the read plus
	//   the original retry of 25
	long nn = base->getNumFiles();
	if ( m_numFiles > 0 && m_numFiles < nn ) nn = m_numFiles;
	// don't access any biased page caches
	bool usePageCache = true;
	if ( m_rdbId == RDB_CLUSTERDB )
		usePageCache = false;
	// . i don't trust page cache too much (mdw)... well, give it a shot
	// . see if ths helps fix WD corruption... i doubt it
	usePageCache = false;
	// for now force to 100k
	long bufSize = 100000; // g_conf.m_mergeBufSize , // minRecSizes
	// get it
	return m_msg5.getList ( m_rdbId        ,
				m_collnum           ,
				&m_list        ,
				m_startKey     ,
				newEndKey      , // usually is maxed!
				bufSize        ,
				false          , // includeTree?
				false          , // add to cache?
				0              , // max cache age for lookup
				m_startFileNum , // startFileNum
				m_numFiles     ,
				this           , // state 
				gotListWrapper , // callback
				m_niceness     , // niceness
				true           , // do error correction?
				NULL           , // cache key ptr
				0              , // retry #
				nn + 75        , // max retries (mk it high)
				false          , // compensate for merge?
				-1LL           , // sync point
				&m_msg5b       ,
				true           , // isRealMerge? absolutely!
				usePageCache   );
}

Example #9

Show file

File: RdbMerge.cpp Project: FlavioFalcao/open-source-search-engine

// . returns false if blocked, true otherwise
// . sets g_errno on error
bool RdbMerge::gotLock ( ) {
	// get total recSizes of files we're merging
	//long totalSize = 0;
	//for ( long i=m_startFileNum ; i < m_startFileNum + m_numFiles ; i++ )
	//totalSize += m_base->m_files[i]->getSize();
	// . grow the map now so it doesn't have to keep growing dynamically
	//   which wastes memory
	// . setMapSize() returns false and sets g_errno on error
	// . we return true if it had an error
	//if ( ! m_targetMap->setMapSizeFromFileSize ( totalSize ) ) {
	//log("RdbMerge::getLockFile: targetMap setMapSize failed");
	//return true;
	//}

	// . get last mapped offset
	// . this may actually be smaller than the file's actual size
	//   but the excess is not in the map, so we need to do it again
	long long startOffset = m_targetMap->getFileSize();

	// if startOffset is > 0 use the last key as RdbDump:m_prevLastKey
	// so it can compress the next key it dumps providee m_useHalfKeys
	// is true (key compression) and the next key has the same top 6 bytes
	// as m_prevLastKey
	//key_t prevLastKey;
	//if ( startOffset > 0 ) prevLastKey = m_targetMap->getLastKey();
	//else                   prevLastKey.setMin();
	char prevLastKey[MAX_KEY_BYTES];
	if ( startOffset > 0 ) m_targetMap->getLastKey(prevLastKey);
	else                   KEYMIN(prevLastKey,m_ks);

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

	// . set up a a file to dump the records into
	// . returns false and sets g_errno on error
	// . this will open m_target as O_RDWR | O_NONBLOCK | O_ASYNC ...
	m_dump.set ( m_collnum          ,
		     m_target           ,
		     m_id2              ,
		     //m_startFileNum - 1 , // merge fileNum in Rdb::m_files[]
		     (m_rdbId == RDB_TITLEDB||m_rdbId== RDB2_TITLEDB2) ,
		     NULL         , // buckets to dump is NULL, we call dumpList
		     NULL         , // tree to dump is NULL, we call dumpList
		     m_targetMap  ,
		     NULL         , // for caching dumped tree
		     0            , // m_maxBufSize. not needed if no tree! 
		     true         , // orderedDump?
		     m_dedup      ,
		     m_niceness   , // niceness of dump
		     this         , // state
		     dumpListWrapper ,
		     base->useHalfKeys() ,
		     startOffset  ,
		     prevLastKey  ,
		     m_ks         ,
		     m_pc         ,
		     m_maxTargetFileSize ,
		     NULL                ); // set m_base::m_needsToSave? no.
	// what kind of error?
	if ( g_errno ) {
		log("db: gotLock: %s.", mstrerror(g_errno) );
		return true;
	}
	// . create a new msg3
	// . don't keep static because it contains a msg3, treeList & diskList
	// . these can take up  many megs of mem
	// . yes, but we need to avoid fragmentation, so hold on to our mem!
	//m_msg3 = new (Msg3);
	//if ( ! m_msg3 ) return false;
	// we're now merging since the dump was set up successfully
	m_isMerging     = true;
	// make it suspended for now
	m_isSuspended   = true;
	// grab the lock
	//s_isMergeLocked = true;
	// . this unsuspends it
	// . this returns false on error and sets g_errno
	// . it returns true if blocked or merge completed successfully
	return resumeMerge ( );
}

Example #10

Show file

File: PageIndexdb.cpp Project: BKJackson/open-source-search-engine

// . make a web page from results stored in msg40
// . send it on TcpSocket "s" when done
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool gotIndexList ( void *state ) {
	// the state
	State10 *st = (State10 *) state;
	// launch more
	if ( ! launchRequests ( st ) ) return false;
	/*
	// get the date list
	//fprintf(stderr,"termId now=%lli\n",st->m_termId);
	//fprintf(stderr,"should be=%lli\n",(st->m_termId & TERMID_MASK));
	// . now get the indexList for this termId
	// . date is complemented, so start with bigger one first
	key128_t startKey = g_datedb.makeStartKey ( st->m_termId ,0xffffffff);
	key128_t endKey   = g_datedb.makeEndKey   ( st->m_termId ,0x0);
	// get the rdb ptr to titledb's rdb
	//Rdb *rdb = g_indexdb.getRdb();
	// -1 means read from all files in Indexdb
	long numFiles = -1;
	// make it zero if caller doesn't want to hit the disk
	if ( ! st->m_useDisk ) numFiles = 0;
	// get the title rec at or after this docId
	if ( ! st->m_msg0.getList ( -1 ,
				    0  ,
				    0  ,
				    0  ,    // max cache age
				    false , // add to cache?
				    RDB_DATEDB  , // rdbId of 2 = indexdb
				    st->m_coll ,
				    &st->m_list2  ,
				    (char *)&startKey  ,
				    (char *)&endKey    ,
				    st->m_numRecs * sizeof(key128_t),//recSizes
				    //st->m_useTree   , // include tree?
				    //st->m_useCache  , // include cache?
				    //false     , // add to cache?
				    //0         , // startFileNum
				    //numFiles  , // numFiles
				    st        , // state
				    gotIndexListWrapper2 ,
				    0  ) )  // niceness
		return false;
	// otherwise call gotResults which returns false if blocked, true else
	// and sets g_errno on error
	return gotIndexList2 ( (void *) st , NULL );
}


void gotIndexListWrapper2 ( void *state , RdbList *list ) {
	gotIndexList2 ( state , list );
}

void addedKeyWrapper ( void *state ) {
	gotIndexList2 ( state, NULL );
}

// . make a web page from results stored in msg40
// . send it on TcpSocket "s" when done
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool gotIndexList2 ( void *state , RdbList *list ) {
	// the state
	State10 *st = (State10 *) state;
	*/
	// get the socket
	TcpSocket *s = st->m_socket;
	// don't allow pages bigger than 128k in cache
	//char  buf [ 64*1024 ];
	// a ptr into "buf"
	//char *p    = buf;
	//char *pend = buf + 64*1024;
	/*
	// get termId
	key_t k = *(key_t *)st->m_list.getStartKey();
	long long termId = g_indexdb.getTermId ( k );
	// get groupId from termId
	//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
	unsigned long groupId = g_indexdb.getGroupIdFromKey ( &k );
	long hostnum = g_hostdb.makeHostId ( groupId );
	*/
	// check box " checked" strings
	char *ubs = "";
	char *uts = "";
	char *uds = "";
	char *ucs = "";
	char *add = "";
	char *del = "";
	if ( st->m_useDatedb) ubs = " checked";
	if ( st->m_useTree  ) uts = " checked";
	if ( st->m_useDisk  ) uds = " checked";
	if ( st->m_useCache ) ucs = " checked";
	if ( st->m_add      ) add = " checked";
	if ( st->m_del      ) del = " checked";

	SafeBuf *pbuf = &st->m_pbuf;

	g_pages.printAdminTop ( pbuf , st->m_socket , &st->m_r );

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; 
	if (!(base=getRdbBase((uint8_t)RDB_INDEXDB,st->m_coll))) return true;

	// print the standard header for admin pages
	pbuf->safePrintf ( 
		  "<center>\n"
		  "<table cellpadding=2><tr><td colspan=4>"
		  "useDatedb:<input type=checkbox value=1 name=ub%s> "
		  "useTree:<input type=checkbox value=1 name=ut%s> "
		  "useDisk:<input type=checkbox value=1 name=ud%s> "
		  "useCache:<input type=checkbox value=1 name=uc%s> "
		  "ADD:<input type=checkbox value=1 name=add%s> "
		  "DELETE:<input type=checkbox value=1 name=del%s>"
		  "</td></tr><tr><td>"
		  "query:"
		  "</td><td>"
		  "<input type=text name=q value=\"%s\" size=20>"
		  "</td><td>"
		  "collection:"
		  "</td><td>"
		  "<input type=text name=c value=\"%s\" size=10>"
		  "</td></tr><tr><td>"
		  "termId:"
		  "</td><td>"
		  "<input type=text name=t value=%lli size=20>"
		  "</td><td>"
		  "numRecs:"
		  "</td><td>"
		  "<input type=text name=numRecs value=%li size=10> "
		  "</td></tr><tr><td>"
		  "docId:"
		  "</td><td>"
		  "<input type=text name=d value=%lli size=20> "
		  "</td><td>"
		  "score:"
		  "</td><td>"
		  "<input type=text name=score value=%li size=10> "
		  "</td><td>"
		  "<input type=submit value=ok border=0>"
		  "</td></tr>"
		  "<tr><td colspan=2>"
		  "term appears in about %lli docs +/- %li"
		  "</td></tr>"
		  //"<tr><td colspan=2>"
		  //"this indexlist held by host #%li and twins"
		  //"</td></tr>"
		  "</table>"
		  "</form><br><br>" ,
		  ubs, uts, uds, ucs, add, del,
		  st->m_query , st->m_coll , st->m_termId  , 
		  st->m_numRecs  ,
		  st->m_docId , (long)st->m_score ,
		  st->m_termFreq ,
		  2 * (long)GB_INDEXDB_PAGE_SIZE / 6 * 
		  base->getNumFiles() );
		  //hostnum );

	if ( g_errno || (st->m_list.isEmpty() ) ) {//&&st->m_list2.isEmpty())){
		if (g_errno)pbuf->safePrintf("Error = %s",mstrerror(g_errno));
		else        pbuf->safePrintf("List is empty");
		pbuf->safePrintf("</center>");
		// erase g_errno for sending
		g_errno = 0;
		// now encapsulate it in html head/tail and send it off
		bool status = g_httpServer.sendDynamicPage(s , 
							   pbuf->getBufStart(),
							   pbuf->length() );
		// delete it
		mdelete ( st , sizeof(State10) , "PageIndexdb" );
		delete (st);
		return status;
	}

	pbuf->safePrintf ( 
		  "<table cellpadding=1 border=1>" 
		  "<tr><td>#</td><td>score</td>"
		  "<td>docId</td><td>domHash</td></tr>");

	//if ( searchingEvents

	// now print the score/docId of indexlist
	long i = 0;
	for (   st->m_list.resetListPtr () ;
	      ! st->m_list.isExhausted  () ;
		st->m_list.skipCurrentRecord () ) {
		// break if buf is low
		//if ( p + 1024 >= pend ) break;
		// but set the ip/port to a host that has this titleRec
		// stored locally!
		long long     docId   = st->m_list.getCurrentDocId () ;
		unsigned long groupId = getGroupIdFromDocId ( docId );
		// get the first host's hostId in this groupId
		Host *h = g_hostdb.getFastestHostInGroup ( groupId );
		// . pick the first host to handle the cached titleRec request
		// . we assume it has the best time and is up!! TODO: fix!
		// . use local ip though if it was an internal request
		// . otherwise, use the external ip
		//unsigned long  ip   = h->m_externalIp;
		unsigned long  ip   = h->m_ip;
		// use the NAT mapped port
		unsigned short port = h->m_externalHttpPort;
		// log the first docid so we can blaster url: queries
		// to PageIndexdb and see if they are in indexdb
		if ( i == 0 ) 
			logf(LOG_INFO,"indexdb: %llu %s",docId,st->m_query);
		// adjust ip/port if local
		if ( st->m_isLocal ) {
			ip   = h->m_ip;
			port = h->m_httpPort;
		}
		unsigned long date = 0;
		if ( st->m_useDatedb )
			date = (unsigned long)st->m_list.getCurrentDate();
		uint8_t dh = g_titledb.getDomHash8FromDocId ( docId );
		char ds[32];
		ds[0]=0;
		if ( st->m_useDatedb ) sprintf (ds,"%lu/",date);
		pbuf->safePrintf ( 
			  "<tr><td>%li.</td>"
			  "<td>%s%i</td>"
			  "<td>"
			  //"<a href=http://%s:%hu/master/titledb?d=%llu>"
			  "<a href=/master/titledb?c=%s&d=%llu>"
			  "%llu"
			  //"<td><a href=/cgi/4.cgi?d=%llu>%llu"
			  "</td>"
			  "<td>"
			  "0x%02lx"
			  "</td>"
			  "</tr>\n" ,
			  i++,
			  ds, (int)st->m_list.getCurrentScore() ,
			  //iptoa(ip) , port ,
			  st->m_coll,
			  docId , 
			  docId ,
			  (long)dh );
	}	
	pbuf->safePrintf ( "</table>" );

	/*
	if ( ! st->m_list2.isEmpty() ) 
		p += sprintf ( p ,
			       "<br>"
			       "<br>"
			       "<table cellpadding=1 border=1>" 
			       "<tr><td>#</td><td>termId</td>"
			       "<td>date</td><td>score</td>"
			       "<td>docId</td></tr>");

	// now print the score/docId of datedb list
	i = 0;
	for (   st->m_list2.resetListPtr () ;
	      ! st->m_list2.isExhausted  () ;
		st->m_list2.skipCurrentRecord () ) {
		// break if buf is low
		if ( p + 1024 >= pend ) break;
		// but set the ip/port to a host that has this titleRec
		// stored locally!
		long long     docId   = st->m_list2.getCurrentDocId () ;
		unsigned long groupId = g_titledb.getGroupId ( docId );
		// get the first host's hostId in this groupId
		Host *h = g_hostdb.getFastestHostInGroup ( groupId );
		// . pick the first host to handle the cached titleRec request
		// . we assume it has the best time and is up!! TODO: fix!
		// . use local ip though if it was an internal request
		// . otherwise, use the external ip
		//unsigned long  ip   = h->m_externalIp;
		unsigned long  ip   = h->m_ip;
		// use the NAT mapped port
		unsigned short port = h->m_externalHttpPort;
		// adjust ip/port if local
		if ( st->m_isLocal ) {
			ip   = h->m_ip;
			port = h->m_httpPort;
		}
		// debug
		char kb[16];
		st->m_list2.getCurrentKey(kb);
		//log(LOG_INFO,"debug: n1=%016llx n0=%016llx",
		//    *(long long *)(kb+8),*(long long *)(kb+0));
		//if ( (unsigned long)st->m_list2.getCurrentDate() == 0 )
		//	log("STOP");
		sprintf ( p , 
			  "<tr><td>%li.</td>"
			  "<td>%llu</td>"
			  "<td>%lu</td><td>%i</td>"
			  "<td>"
			  //"<a href=http://%s:%hu/master/titledb?d=%llu>"
			  "<a href=/master/titledb?c=%s&d=%llu>"
			  "%llu"
			  //"<td><a href=/cgi/4.cgi?d=%llu>%llu"
			  "</td></tr>\n" ,
			  i++,
			  st->m_list2.getTermId16(kb) ,
			  (unsigned long)st->m_list2.getCurrentDate() ,
			  (int)st->m_list2.getCurrentScore() ,
			  //iptoa(ip) , port ,
			  st->m_coll,
			  docId , 
			  docId );
		p += gbstrlen ( p );
	}	
	*/
	if ( ! st->m_list.isEmpty() ) 
		pbuf->safePrintf ( "</table>" );


	// print msg if we could fit all into buf
	//if ( p + 1024 >= pend ) {
	//	sprintf ( p ,"... truncated ... no mem" );
	//	p += gbstrlen ( p );		
	//}
	// print the final tail
	//p += g_httpServer.printTail ( p , pend - p );
	pbuf->safePrintf ( "</center>\n");
	// now encapsulate it in html head/tail and send it off
	bool status = g_httpServer.sendDynamicPage ( s , 
						     pbuf->getBufStart() ,
						     pbuf->length() );
	// delete the state
	mdelete ( st , sizeof(State10) , "PageIndexdb" );
	delete (st) ;
	return status;
}

Example #11

Show file

File: Syncdb.cpp Project: FlavioFalcao/open-source-search-engine

void Syncdb::syncStart_r ( bool amThread ) {

	// turn this off
	g_process.m_suspendAutoSave = true;

	char cmd[1024];
	// get synchost best ip
	char *ips = iptoa ( g_hostdb.getAliveIp ( g_hostdb.m_syncHost ) );
	// his dir
	char *dir = g_hostdb.m_syncHost->m_dir;
	// use
	Host *me = g_hostdb.m_myHost;
	// ours
	char *mydir = me->m_dir;
	// generic
	long err;

	// loop over every rdb and every data and map file in each rdb
	for ( long i = 0 ; i < RDB_END ; i++ ) {

	// skip SYNCDB
	if  ( i == RDB_SYNCDB ) continue;
	// get that rdb
	Rdb *rdb = getRdbFromId ( i );
	// skip if none
	if ( ! rdb ) continue;

	// get coll
	for ( long j = 0 ; j < rdb->getNumBases() ; j++ ) {

		// get that base
		RdbBase *base = rdb->getBase(j);//m_bases[j];
		if ( ! base ) continue;

	// get coll
	char *coll = base->m_coll;
	// and num
	long collnum = base->m_collnum;
	// make the dir
	sprintf ( cmd , "ssh %s 'mkdir %scoll.%s.%li'",
		  ips,dir,coll,collnum);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	//int err = my_system_r ( cmd, 3600*24 );
	//if ( err != 0 ) goto hadError;

	// copy the files
	for ( long k = 0 ; k < base->m_numFiles ; k++ ) {

	// sleep while dumping. we are in a thread.
	if ( base->isDumping() ) sleep ( 1 );


	// get map
	RdbMap *map = base->m_maps[k];
	// copy the map file
	sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'",
		  map->getFilename(),ips,dir,coll,collnum);
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	
	// get the file
	BigFile *f = base->m_files[k];

	// loop over each little part file
	for ( long m = 0 ; m < f->m_numParts ; m++ ) {

	// get part file
	File *p = f->m_files[m];
	// copy that
	sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'",
		  p->m_filename,ips,dir,coll,collnum);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;

	}
	}
	}
	}

	// make the dirs
	sprintf ( cmd , "ssh %s '"
		  "mkdir %s/dict/ ;"
		  "mkdir %s/dict/en/ ;"
		  "mkdir %s/ucdata/ ;"
		  "mkdir %s/.antiword/ ;"
		  "'" ,
		  ips,
		  dir,
		  dir,
		  dir,
		  dir
		  );
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;


	// loop over the files in Process.cpp
	for ( long i = 0 ; i < 99999 ; i++ ) {
		// null means end
		if ( ! g_files[i] ) break;
		sprintf ( cmd , "rcp %s%s %s:%s",
			  mydir,g_files[i],ips,dir);
		// excecute
		log ( LOG_INFO, "sync: %s", cmd );
		if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	}

	// new guy is NOT in sync
	sprintf ( cmd , "ssh %s 'echo 0 > %sinsync.dat", ips,dir);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;

	// saved files
	sprintf ( cmd , "rcp %s*-saved.dat %s:%sinsync.dat", 
		  mydir,ips,dir);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	
	// completed!
	return;

 hadError:
	log ( "sync: Call to system(\"%s\") had error %s.",cmd,strerror(err));
	g_hostdb.m_syncHost->m_doingSync = 0;
	g_hostdb.m_syncHost              = NULL;
	return;
}

Example #12

Show file

File: DailyMerge.cpp Project: BlaBlaNet/open-source-search-engine

void DailyMerge::dailyMergeLoop ( ) {
	// disable for now!
	//return;
	// if in repair mode, do not do daily merge
	if ( g_repairMode ) return;
	// or if in read only mode
	if ( g_conf.m_readOnlyMode ) return;
	// skip if proxy, a proxy can be hostid 0!
	if ( g_proxy.isProxy() ) return;
	// wait for clock to be synced with host #0
	if ( ! isClockInSync() ) return;
	// get local time
	int64_t nowLocalMS = gettimeofdayInMillisecondsLocal();
	// get our hostid
	int32_t hid = g_hostdb.m_myHost->m_hostId;
	// if process only recently started (1 min ago or less)
	// then do not immediately do this...
	if (hid==0 && nowLocalMS - g_process.m_processStartTime < 1*60*1000)
		return;
	// wait until the right time (this is in UTC)
	time_t nowSynced = getTimeSynced();

	// get time since midnight
	struct tm *tt ;
	// how many MINUTES into the day are we? (in UTC)
	tt = gmtime ( &nowSynced );
	int32_t elapsedMins = tt->tm_hour * 60 + tt->tm_min ;

	// what collnum to merge?
	collnum_t i ;

	// . if we are not 0, just use host #0's collnum
	// . an error here will screw up the whole daily merge process
	if ( hid != 0 && m_mergeMode == 0 ) {
		// get host #0
		Host *h = &g_hostdb.m_hosts[0];
		// must have got a ping reply from him
		if ( ! h->m_gotPingReply ) return;
		// hostid #0 must NOT be in mode 0
		if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 ) return;
		// get the collnum that host #0 is currently daily merging
		i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
		// this means host #0 is not daily merging a collnum now
		if ( i < 0 ) return;
		// if it is valid, the CollectionRec MUST be there
		CollectionRec *cr = g_collectiondb.getRec ( i );
		if ( ! cr ) { 
			log("daily: host #0 bad collnum %"INT32"",(int32_t)i);return;}
		// if valid, use it
		m_cr = cr;
		// we set m_cr, go to next mode
		m_mergeMode = 1;
		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
	}

	// . only host #0 should do this loop!!!
	// . loop through each collection to check the time
	for (i=0; hid==0&&m_mergeMode==0 && i<g_collectiondb.m_numRecs; i++) {
		// get collection rec for collnum #i
		CollectionRec *cr = g_collectiondb.getRec ( i );
		// skip if empty, it was deleted at some point
		if ( ! cr ) continue;
		// skip if daily merge trigger is < 0 (do not do dailies)
		if ( cr->m_dailyMergeTrigger < 0 ) continue;
		// . skip if not time yet
		// . !!!!!THIS IS IN MINUTES!!!!!!!!
		if ( (int32_t)elapsedMins < (int32_t)cr->m_dailyMergeTrigger ) 
			continue;
		// do not start more than 15 mins after the trigger time,
		// if we miss that cuz we are down, then too bad
		if ( (int32_t)elapsedMins > (int32_t)cr->m_dailyMergeTrigger + 15 )
			continue;
 		// . how long has it been (in seconds)
		// . !!!!!THIS IS IN SECONDS!!!!!!!!
		int32_t diff = nowSynced - cr->m_dailyMergeStarted;
		// crazy?
		if ( diff < 0 ) continue;
		// if less than 24 hours ago, we already did it
		if ( diff < 24*3600 ) continue;
		// . we must now match the day of week
		// . use <= 0 to do it every day
		// . 0 = sunday ... 6 = saturday
		// . comma separated list is ok ("0,1, 6")
		// . leave blank or at least no numbers to do every day
		char *s = cr->m_dailyMergeDOWList;
		char dowCounts[8];
		memset(dowCounts,0,8);
		for ( ; *s ; s++ ) {
			if ( ! is_digit(*s) ) continue;
			int32_t num = atoi(s);
			if ( num < 0 ) continue;
			if ( num > 6 ) continue;
			dowCounts[num]++;
		}
		// get our dow
		int32_t todayDOW = tt->tm_wday + 1;
		// make sure 1 to 7
		if ( todayDOW < 0 || todayDOW > 6 ) { 
			log("merge: bad today dow of %i for coll %s",
			    (int)todayDOW,cr->m_coll);
			return;
		}
		//if ( todayDOW > 6 ) { char *xx=NULL;*xx=0; }
		// skip if not a dayofweek to merge on
		if ( dowCounts [ todayDOW ] == 0 ) continue;

		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
		// . wait for everyone to be in mode #0 in case they just
		//   finished another daily merge. only host #0 does this loop.
		// . PROBLEM: if host #0 crashes before everyone can get into 
		//   mode 1+ and then host #0 is brought back up, then 
		//   obviously, we will not be able to meet this condition,
		//   therefore only check to see if this condition is 
		//   satisfied our "second time around" (so we must complete
		//   one daily merge before checking this again). that is why
		//   i added "m_didDaily". -- MDW
		for ( int32_t i = 0 ; m_didDaily && i<g_hostdb.m_numHosts ; i++){
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// that's good if he is in mode 0
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0 )
				continue;
			// oops, someone is not mode 0
			return;
		}
		// got one, save it
		m_cr = cr;
		// if we were hostid 0, go into merge mode 1 now
		m_mergeMode = 1;
		// bust out of loop
		break;
	}

	// can we advance to merge mode 1?
	if ( m_mergeMode == 1 ) {
		// no candidates, go back to mode 0 now, we are done
		if ( ! m_cr ) {
			log("daily: Could not get coll rec.");
			m_mergeMode = 0; return; 
		}
		// ok, we got a collection that needs it so turn off spiders
		m_mergeMode = 2;
		// turn spiders off to keep query latency down
		m_spideringEnabled = g_conf.m_spideringEnabled;
		//m_injectionEnabled = g_conf.m_injectionEnabled;
		g_conf.m_spideringEnabled = false;
		//g_conf.m_injectionEnabled = false;
		// log it
		log("daily: Starting daily merge for %s.",m_cr->m_coll);
		log("daily: Waiting for other hosts to enter merge mode.");
	}

	// wait for everyone to make it to mode 1+ before going on
	if ( m_mergeMode == 2 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// get the host
			Host *h = &g_hostdb.m_hosts[i];
			// skip ourselves, obviously we are in merge mode 2
			if ( h == g_hostdb.m_myHost ) 
				continue;
			// skip dead hosts
			if ( g_hostdb.isDead(h) )
				continue;
			// return if a host still in merge mode 0. wait for it.
			if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 )
				return;
		}
		// ok, everyone is out of mode 0 now
		m_mergeMode = 3;
		// log it
		log("daily: Waiting for all hosts to have 0 "
		    "spiders out.");
	}

	// wait for ALL spiders in network to clear
	if ( m_mergeMode == 3 ) {
		// return if we got spiders out!
		if ( g_spiderLoop.m_numSpidersOut > 0 )
			return;
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host still has spiders out, we can't go to mode 4
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_HASSPIDERS ) 
				return;
		}
		// ok, nobody has spiders now
		m_mergeMode = 4;
		// log it
		log("daily: Dumping trees.");
	}

	// start the dumps
	if ( m_mergeMode == 4 ) {
		// . set when we did it last, save that to disk to avoid thrash
		// . TODO: BUT do not allow it to be set in the spider 
		//   controls!
		// . THIS IS IN SECONDS!!!!!!!
		// . use the time we started, otherwise the merge time keeps
		//   getting pushed back.
		m_cr->m_dailyMergeStarted = m_savedStartTime; // nowSynced;
		// tell it to save, otherwise this might not get saved
		m_cr->m_needsSave = true;
		// initiate dumps
		g_indexdb.getRdb  ()->dumpTree(1); // niceness = 1
		//g_datedb.getRdb   ()->dumpTree(1); // niceness = 1
		g_spiderdb.getRdb ()->dumpTree(1); // niceness = 1
		g_linkdb.getRdb   ()->dumpTree(1); // niceness = 1
		// if neither has recs in tree, go to next mode
		if(g_indexdb .getRdb()->getNumUsedNodes()>0) return;
		//if(g_datedb  .getRdb()->getNumUsedNodes()>0) return;
		if(g_spiderdb.getRdb()->getNumUsedNodes()>0) return;
		if(g_linkdb  .getRdb()->getNumUsedNodes()>0) return;
		// ok, all trees are clear and dumped
		m_mergeMode = 5;
		// log it
		log("daily: Merging indexdb and datedb files.");
	}

	// start the merge
	if ( m_mergeMode == 5 ) {
		// kick off the merges if not already going
		//g_indexdb.getRdb()->attemptMerge(1,true,false);
		//g_datedb .getRdb()->attemptMerge(1,true,false);
		// if has more than one file, bail on it
		RdbBase *base;

		base = g_indexdb .getRdb()->getBase(m_cr->m_collnum);
		// . niceness,forced?,doLog?,minFilesToMerge
		// . only does a merge if there are 2 or more "big" indexdb 
		//   files present. Merges so that there are LESS THAN 2 files.
		//   just another way of describing a tight merge.
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		//base = g_datedb  .getRdb()->getBase(m_cr->m_collnum);
		//base->attemptMerge (1,true,false,2);
		//if ( base->getNumFiles() >= 2 ) return;

		base = g_spiderdb.getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		base = g_linkdb  .getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		// . minimize titledb merging at spider time, too
		// . will perform a merge IFF there are 200 or more titledb 
		//   files present, otherwise, it will not. will do the merge
		//   such that LESS THAN 200 titledb files will be present
		//   AFTER the merge is completed.
		// . do NOT force merge ALL files on this one, we just want
		//   to make sure there are not 200+ titledb files
		base = g_titledb .getRdb()->getBase(m_cr->m_collnum);
		// we seem to dump about 70 per day at a decent spider rate
		// so merge enough so that we don't have to merge while 
		// spidering
		base->attemptMerge (1,false,false,230-70);
		if ( base->getNumFiles() >= 230-70 ) return;

		// set m_cr to NULL up here, so that the last guy to
		// complete the daily merge, does not "cycle back" and
		// try to re-daily merge the same collection!
		m_cr = NULL;
		// ok, merges are done
		m_mergeMode = 6;
		// log it
		log("daily: Waiting for all hosts to finish merging.");
	}

	// wait for all to finish before re-enabling spiders
	if ( m_mergeMode == 6 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are ok
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host in mode 6 or 0, that's good
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0OR6)
				continue;
			// otherwise, wait for it to be in 6 or 0
			return;
		}
		// ok, nobody has spiders now, everyone is 6 or 0
		m_mergeMode = 0;
		// no coll rec now
		m_cr = NULL;
		// spiders back on
		g_conf.m_spideringEnabled = m_spideringEnabled;
		//g_conf.m_injectionEnabled = m_injectionEnabled;
		// log it
		log("daily: Daily merge completed.");
		// now the next time we do a daily we must make sure all hosts
		// are in merge mode #0 before we start
		m_didDaily  = true;
	}		
}

Example #13

Show file

File: RdbBaseTest.cpp Project: privacore/open-source-search-engine

TEST_F(RdbBaseTest, PosdbUpdateIndex) {
	const collnum_t collNum = 0;

	RdbBase *base = g_posdb.getRdb()->getBase(collNum);

	ASSERT_EQ(0, base->addNewFile());
	RdbIndex *index0 = base->getIndex(0);
	GbTest::addPosdbKey(index0, 'A', 1, 0);
	index0->writeIndex();

	ASSERT_EQ(1, base->addNewFile());
	RdbIndex *index1 = base->getIndex(1);
	GbTest::addPosdbKey(index1, 'B', 2, 0);
	index1->writeIndex();

	ASSERT_EQ(2, base->addNewFile());
	RdbIndex *index2 = base->getIndex(2);
	GbTest::addPosdbKey(index2, 'C', 3, 0);
	index2->writeIndex();

	ASSERT_EQ(3, base->addNewFile());
	RdbIndex *index3 = base->getIndex(3);
	GbTest::addPosdbKey(index3, 'D', 4, 0);
	index3->writeIndex();

	ASSERT_EQ(4, base->addNewFile());
	RdbIndex *index4 = base->getIndex(4);
	GbTest::addPosdbKey(index4, 'E', 5, 0);
	index4->writeIndex();

	ASSERT_EQ(5, base->addNewFile());
	RdbIndex *index5 = base->getIndex(5);
	GbTest::addPosdbKey(index5, 'F', 6, 0);
	index5->writeIndex();

	base->generateGlobalIndex();
	{
		auto globalIndex = base->getGlobalIndex();
		ASSERT_EQ(6, globalIndex->size());

		EXPECT_EQ((((1 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 0, globalIndex->at(0));
		EXPECT_EQ((((2 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 1, globalIndex->at(1));
		EXPECT_EQ((((3 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 2, globalIndex->at(2));
		EXPECT_EQ((((4 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 3, globalIndex->at(3));
		EXPECT_EQ((((5 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 4, globalIndex->at(4));
		EXPECT_EQ((((6 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 5, globalIndex->at(5));
	}

	static const int32_t mergeFilePos = 2;
	static const int32_t startFilePos = 3;
	static const int32_t fileMergeCount = 2;


	// insert file
	base->updateGlobalIndexInsertFile(mergeFilePos);

	{
		SCOPED_TRACE("insert file");
		auto globalIndex = base->getGlobalIndex();
		ASSERT_EQ(6, globalIndex->size());

		EXPECT_EQ((((1 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 0, globalIndex->at(0));
		EXPECT_EQ((((2 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 1, globalIndex->at(1));
		EXPECT_EQ((((3 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 3, globalIndex->at(2));
		EXPECT_EQ((((4 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 4, globalIndex->at(3));
		EXPECT_EQ((((5 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 5, globalIndex->at(4));
		EXPECT_EQ((((6 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 6, globalIndex->at(5));
	}

	// update file
	base->updateGlobalIndexUpdateFile(mergeFilePos, startFilePos, fileMergeCount);

	{
		SCOPED_TRACE("update file");
		auto globalIndex = base->getGlobalIndex();
		ASSERT_EQ(6, globalIndex->size());

		EXPECT_EQ((((1 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 0, globalIndex->at(0));
		EXPECT_EQ((((2 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 1, globalIndex->at(1));
		EXPECT_EQ((((3 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 2, globalIndex->at(2));
		EXPECT_EQ((((4 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 2, globalIndex->at(3));
		EXPECT_EQ((((5 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 5, globalIndex->at(4));
		EXPECT_EQ((((6 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 6, globalIndex->at(5));
	}

	// delete file
	base->updateGlobalIndexDeleteFile(mergeFilePos, fileMergeCount);

	{
		SCOPED_TRACE("delete file");
		auto globalIndex = base->getGlobalIndex();
		ASSERT_EQ(6, globalIndex->size());

		EXPECT_EQ((((1 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 0, globalIndex->at(0));
		EXPECT_EQ((((2 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 1, globalIndex->at(1));
		EXPECT_EQ((((3 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 2, globalIndex->at(2));
		EXPECT_EQ((((4 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 2, globalIndex->at(3));
		EXPECT_EQ((((5 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 3, globalIndex->at(4));
		EXPECT_EQ((((6 << RdbIndex::s_docIdOffset) | 1) << RdbBase::s_docIdFileIndex_docIdOffset) | 4, globalIndex->at(5));
	}
}

Example #14

Show file

File: RdbMerge.cpp Project: lemire/open-source-search-engine

bool RdbMerge::getAnotherList ( ) {
	log(LOG_DEBUG,"db: Getting another list for merge.");
	// clear it up in case it was already set
	g_errno = 0;
	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base = getRdbBase( m_rdbId, m_collnum );
	if ( ! base ) {
		return true;
	}
	// if merging titledb files, we must adjust m_endKey so we do
	// not have to read a huge 200MB+ tfndb list
	//key_t newEndKey = m_endKey;
	char newEndKey[MAX_KEY_BYTES];
	KEYSET(newEndKey,m_endKey,m_ks);

	// . this returns false if blocked, true otherwise
	// . sets g_errno on error
	// . we return false if it blocked
	// . m_maxBufSize may be exceeded by a rec, it's just a target size
	// . niceness is usually MAX_NICENESS, but reindex.cpp sets to 0
	// . this was a call to Msg3, but i made it call Msg5 since
	//   we now do the merging in Msg5, not in msg3 anymore
	// . this will now handle truncation, dup and neg rec removal
	// . it remembers last termId and count so it can truncate even when
	//   IndexList is split between successive reads
	// . IMPORTANT: when merging titledb we could be merging about 255
	//   files, so if we are limited to only X fds it can have a cascade
	//   affect where reading from one file closes the fd of another file
	//   in the read (since we call open before spawning the read thread)
	//   and can therefore take 255 retries for the Msg3 to complete 
	//   because each read gives a EFILCLOSED error.
	//   so to fix it we allow one retry for each file in the read plus
	//   the original retry of 25
	int32_t nn = base->getNumFiles();
	if ( m_numFiles > 0 && m_numFiles < nn ) nn = m_numFiles;
	// don't access any biased page caches
	bool usePageCache = true;
	if ( m_rdbId == RDB_CLUSTERDB )
		usePageCache = false;
	// . i don't trust page cache too much (mdw)... well, give it a shot
	// . see if ths helps fix WD corruption... i doubt it
	usePageCache = false;
	// for now force to 100k
	int32_t bufSize = 100000; // g_conf.m_mergeBufSize , // minRecSizes
	// get it
	return m_msg5.getList ( m_rdbId        ,
				m_collnum           ,
				&m_list        ,
				m_startKey     ,
				newEndKey      , // usually is maxed!
				bufSize        ,
				false          , // includeTree?
				false          , // add to cache?
				0              , // max cache age for lookup
				m_startFileNum , // startFileNum
				m_numFiles     ,
				this           , // state 
				gotListWrapper , // callback
				m_niceness     , // niceness
				true           , // do error correction?
				NULL           , // cache key ptr
				0              , // retry #
				nn + 75        , // max retries (mk it high)
				false          , // compensate for merge?
				-1LL           , // sync point
				true           , // isRealMerge? absolutely!
				usePageCache   );
}