C++ (Cpp) isClockInSync示例

示例#1

0

显示文件

文件： Statsdb.cpp 项目： automatedtendencies/open-source-search-engine

void Statsdb::addDocsIndexed ( ) {

	if ( ! isClockInSync() ) return;

	// only once per five seconds
	long now = getTimeLocal();
	static long s_lastTime = 0;
	if ( now - s_lastTime < 5 ) return;
	s_lastTime = now;

	long long total = 0LL;
	static long long s_lastTotal = 0LL;
	// every 5 seconds update docs indexed count
	for ( long i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
		Host *h = &g_hostdb.m_hosts[i];
		// must have something
		if ( h->m_docsIndexed <= 0 ) return;
		// add it up
		total += h->m_docsIndexed;
	}
	// divide by # of groups
	total /= g_hostdb.getNumGroups();
	// skip if no change
	if ( total == s_lastTotal ) return;

	s_lastTotal = total;

	// add it if changed though
	long long nowms = gettimeofdayInMillisecondsGlobal();
	addStat ( MAX_NICENESS,"docs_indexed", nowms, nowms, (float)total );
}

示例#2

0

显示文件

文件： Statsdb.cpp 项目： exename/open-source-search-engine

void Statsdb::addDocsIndexed ( ) {

	if ( ! isClockInSync() ) return;
	if ( g_hostdb.hasDeadHost() ) return;


	// only host #0 needs this
	if ( g_hostdb.m_hostId != 0 ) return;

	// only once per five seconds
	int32_t now = getTimeLocal();
	static int32_t s_lastTime = 0;
	if ( now - s_lastTime < 5 ) return;
	int32_t interval = now - s_lastTime;
	s_lastTime = now;

	int64_t total = 0LL;
	static int64_t s_lastTotal = 0LL;
	// every 5 seconds update docs indexed count
	for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
		Host *h = &g_hostdb.m_hosts[i];
		// must have something
		if ( h->m_pingInfo.m_totalDocsIndexed <= 0 ) continue;
		// add it up
		total += h->m_pingInfo.m_totalDocsIndexed;
	}
	// divide by # of groups
	total /= g_hostdb.getNumHostsPerShard();
	// skip if no change

	if ( total == s_lastTotal ) return;

    int32_t docsIndexedInInterval = total - s_lastTotal;
    float docsPerSecond = docsIndexedInInterval / (float)interval;

	log("build: total docs indexed: %f. docs per second %f %i %i", (float)total, docsPerSecond, docsIndexedInInterval, interval);

	// add it if changed though
	int64_t nowms = gettimeofdayInMillisecondsGlobal();
	addStat ( MAX_NICENESS,"docs_indexed", nowms, nowms, (float)total );
    // Prevent a datapoint which adds all of the docs indexed to date.
    if( s_lastTotal != 0 ) {
        addStat ( MAX_NICENESS,"docs_per_second", nowms, nowms, docsPerSecond );
    }

	s_lastTotal = total;
}

示例#3

0

显示文件

文件： Statsdb.cpp 项目： automatedtendencies/open-source-search-engine

void flushStatsWrapper ( int fd , void *state ) {
	g_statsdb.addDocsIndexed();

	// force a statsdb tree dump if running out of room
	Rdb     *rdb  = &g_statsdb.m_rdb;
	RdbTree *tree = &rdb->m_tree;
	// if we got 20% room left and 50k available mem, do not dump
	if ( (float)tree->getNumUsedNodes() * 1.2 < 
	     (float)tree->getNumAvailNodes () &&
	     //tree->getNumAvailNodes () > 1000 &&
	     rdb-> m_mem.getAvailMem() > 50000 )
		return;

	if ( ! isClockInSync() ) return;

	// force a dump
	rdb->dumpTree ( 1 );
}

示例#4

0

显示文件

文件： fctypes.cpp 项目： privacore/open-source-search-engine

int64_t gettimeofdayInMillisecondsSynced() {
	// sanity check
	if ( ! isClockInSync() ) { 
		static int s_printed = 0;
		if ( (s_printed % 100) == 0 ) {
			log("xml: clock not in sync with host #0 yet!!!!!!");
		}
		s_printed++;
	}

	int64_t now;

	struct timeval tv;
	gettimeofday ( &tv , NULL );
	now = (int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;

	// adjust from Msg0x11 time adjustments
	now += s_adjustment;
	return now;
}

示例#5

0

显示文件

文件： Msg12.cpp 项目： lemire/open-source-search-engine

void handleRequest12 ( UdpSlot *udpSlot , int32_t niceness ) {
	// get request
	char *request = udpSlot->m_readBuf;
	int32_t  reqSize = udpSlot->m_readBufSize;
	// shortcut
	UdpServer *us = &g_udpServer;
	// breathe
	QUICKPOLL ( niceness );

	// shortcut
	char *reply = udpSlot->m_tmpBuf;

	//
	// . is it confirming that he got all the locks?
	// . if so, remove the doledb record and dock the doleiptable count
	//   before adding a waiting tree entry to re-pop the doledb record
	//
	if ( reqSize == sizeof(ConfirmRequest) ) {
		char *msg = NULL;
		ConfirmRequest *cq = (ConfirmRequest *)request;

		// confirm the lock
		HashTableX *ht = &g_spiderLoop.m_lockTable;
		int32_t slot = ht->getSlot ( &cq->m_lockKeyUh48 );
		if ( slot < 0 ) { 
			log("spider: got a confirm request for a key not "
			    "in the table! coll must have been deleted "
			    " or reset "
			    "while lock request was outstanding.");
			g_errno = EBADENGINEER;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
			//char *xx=NULL;*xx=0; }
		}
		UrlLock *lock = (UrlLock *)ht->getValueFromSlot ( slot );
		lock->m_confirmed = true;

		// note that
		if ( g_conf.m_logDebugSpider ) // Wait )
			log("spider: got confirm lock request for ip=%s",
			    iptoa(lock->m_firstIp));

		// get it
		SpiderColl *sc = g_spiderCache.getSpiderColl(cq->m_collnum);
		// make it negative
		cq->m_doledbKey.n0 &= 0xfffffffffffffffeLL;
		// and add the negative rec to doledb (deletion operation)
		Rdb *rdb = &g_doledb.m_rdb;
		if ( ! rdb->addRecord ( cq->m_collnum,
					(char *)&cq->m_doledbKey,
					NULL , // data
					0    , //dataSize
					1 )){ // niceness
			// tree is dumping or something, probably ETRYAGAIN
			if ( g_errno != ETRYAGAIN ) {msg = "error adding neg rec to doledb";	log("spider: %s %s",msg,mstrerror(g_errno));
			}
			//char *xx=NULL;*xx=0;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// now remove from doleiptable since we removed from doledb
		if ( sc ) sc->removeFromDoledbTable ( cq->m_firstIp );

		// how many spiders outstanding for this coll and IP?
		//int32_t out=g_spiderLoop.getNumSpidersOutPerIp ( cq->m_firstIp);

		// DO NOT add back to waiting tree if max spiders
		// out per ip was 1 OR there was a crawldelay. but better
		// yet, take care of that in the winReq code above.

		// . now add to waiting tree so we add another spiderdb
		//   record for this firstip to doledb
		// . true = callForScan
		// . do not add to waiting tree if we have enough outstanding
		//   spiders for this ip. we will add to waiting tree when
		//   we receive a SpiderReply in addSpiderReply()
		if ( sc && //out < cq->m_maxSpidersOutPerIp &&
		     // this will just return true if we are not the 
		     // responsible host for this firstip
		    // DO NOT populate from this!!! say "false" here...
		     ! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) &&
		     // must be an error...
		     g_errno ) {
			msg = "FAILED TO ADD TO WAITING TREE";
			log("spider: %s %s",msg,mstrerror(g_errno));
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// success!!
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}



	// sanity check
	if ( reqSize != sizeof(LockRequest) ) {
		log("spider: bad msg12 request size of %" PRId32,reqSize);
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , EBADREQUEST );
		return;
	}
	// deny it if we are not synced yet! otherwise we core in 
	// getTimeGlobal() below
	if ( ! isClockInSync() ) { 
		// log it so we can debug it
		//log("spider: clock not in sync with host #0. so "
		//    "returning etryagain for lock reply");
		// let admin know why we are not spidering
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , ETRYAGAIN );
		return;
	}

	LockRequest *lr = (LockRequest *)request;
	//uint64_t lockKey = *(int64_t *)request;
	//int32_t lockSequence = *(int32_t *)(request+8);
	// is this a remove operation? assume not
	//bool remove = false;
	// get top bit
	//if ( lockKey & 0x8000000000000000LL ) remove = true;

	// mask it out
	//lockKey &= 0x7fffffffffffffffLL;
	// sanity check, just 6 bytes! (48 bits)
	if ( lr->m_lockKeyUh48 &0xffff000000000000LL ) { char *xx=NULL;*xx=0; }
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: got msg12 request uh48=%" PRId64" remove=%" PRId32,
		    lr->m_lockKeyUh48, (int32_t)lr->m_removeLock);
	// get time
	int32_t nowGlobal = getTimeGlobal();
	// shortcut
	HashTableX *ht = &g_spiderLoop.m_lockTable;

	int32_t hostId = g_hostdb.getHostId ( udpSlot->m_ip , udpSlot->m_port );
	// this must be legit - sanity check
	if ( hostId < 0 ) { char *xx=NULL;*xx=0; }

	// remove expired locks from locktable
	removeExpiredLocks ( hostId );

	int64_t lockKey = lr->m_lockKeyUh48;

	// check tree
	int32_t slot = ht->getSlot ( &lockKey ); // lr->m_lockKeyUh48 );
	// put it here
	UrlLock *lock = NULL;
	// if there say no no
	if ( slot >= 0 ) lock = (UrlLock *)ht->getValueFromSlot ( slot );

	// if doing a remove operation and that was our hostid then unlock it
	if ( lr->m_removeLock && 
	     lock && 
	     lock->m_hostId == hostId &&
	     lock->m_lockSequence == lr->m_lockSequence ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: removing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// ok, at this point all remove ops return
	if ( lr->m_removeLock ) {
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}

	/////////
	//
	// add new lock
	//
	/////////


	// if lock > 1 hour old then remove it automatically!!
	if ( lock && nowGlobal - lock->m_timestamp > MAX_LOCK_AGE ) {
		// note it for now
		log("spider: removing lock after %" PRId32" seconds "
		    "for lockKey=%" PRIu64" hid=%" PRId32,
		    (nowGlobal - lock->m_timestamp),
		    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// if lock still there, do not grant another lock
	if ( lock ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: refusing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		reply[0] = 0;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}
	// make the new lock
	UrlLock tmp;
	tmp.m_hostId       = hostId;
	tmp.m_lockSequence = lr->m_lockSequence;
	tmp.m_timestamp    = nowGlobal;
	tmp.m_expires      = 0;
	tmp.m_firstIp      = lr->m_firstIp;
	tmp.m_collnum      = lr->m_collnum;

	// when the spider returns we remove its lock on reception of the
	// spiderReply, however, we actually just set the m_expires time
	// to 5 seconds into the future in case there is a current request
	// to get a lock for that url in progress. but, we do need to
	// indicate that the spider has indeed completed by setting
	// m_spiderOutstanding to true. this way, addToWaitingTree() will
	// not count it towards a "max spiders per IP" quota when deciding
	// on if it should add a new entry for this IP.
	tmp.m_spiderOutstanding = true;
	// this is set when all hosts in the group (shard) have granted the
	// lock and the host sends out a confirmLockAcquisition() request.
	// until then we do not know if the lock will be granted by all hosts
	// in the group (shard)
	tmp.m_confirmed    = false;

	// put it into the table
	if ( ! ht->addKey ( &lockKey , &tmp ) ) {
		// return error if that failed!
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , g_errno );
		return;
	}
	// note it for now
	if ( g_conf.m_logDebugSpider )
		log("spider: granting lock for lockKey=%" PRIu64" hid=%" PRId32,
		    lr->m_lockKeyUh48,hostId);
	// grant the lock
	reply[0] = 1;
	us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
	return;
}

示例#6

0

显示文件

文件： Statsdb.cpp 项目： automatedtendencies/open-source-search-engine

// . m_key bitmap in statsdb:
//   tttttttt tttttttt tttttttt tttttttt  t = time in milliseconds, t1
//   tttttttt tttttttt tttttttt tttttttt
//   hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh  h = hash32 of m_title
// . returns false if could not add stat, true otherwise
// . do not set g_errno if we return false just to keep things simple
// . we only add the stat to our local statsdb rdb, but because
//   we might be dumping statsdb to disk or something it is possible
//   we get an ETRYAGAIN error, so we try to accumulate stats in a
//   local buffer in that case
// . "label" is something like "queryLatency" or whatever
// . [t1,t2] are the time endpoints for the operation being measured
// . "value" is usually "numBytes", or a quantity indicator of whatever
//   was processed.
// . oldVal, newVal are reflect a state change, like maybe changing the
//   value of a parm. typically for such things t1 equals t2
bool Statsdb::addStat ( long        niceness ,
			char       *label    ,
			long long   t1Arg    ,
			long long   t2Arg    ,
			float       value    , // y-value really, "numBytes"
			long        parmHash ,
			float       oldVal   ,
			float       newVal   ,
			long        userId32 ) {

	if ( ! g_conf.m_useStatsdb ) return true;

	// so Process.cpp can turn it off when dumping core
	if ( m_disabled ) return true;

	// not thread safe!
	//if ( g_threads.amThread() ) { 
	//	log("statsdb: called from thread");
	//	char *xx=NULL;*xx=0; 
	//}

	// . for now we can only add stats if we are synced with host #0 clock
	// . this is kinda a hack and it would be nice to not miss stats!
	if ( ! isClockInSync() ) return true;

	RdbTree *tree = &m_rdb.m_tree;
	// do not add stats to our tree if it is loading
	if ( tree->m_isLoading ) return true;

	// convert into host #0 synced time
	t1Arg = localToGlobalTimeMilliseconds ( t1Arg );
	t2Arg = localToGlobalTimeMilliseconds ( t2Arg );

	// sanity check
	if ( ! label ) { char *xx=NULL;*xx=0; }

	long labelHash;
	if ( parmHash ) labelHash = parmHash;
	else            labelHash = hash32n ( label );

	// fix it for parm changes, and docs_indexed stat, etc.
	if ( t1Arg == t2Arg ) t2Arg++;

	// how many SECONDS did the op take? (convert from ms to secs)
	float dtms   = (t2Arg - t1Arg);
	float dtSecs = dtms / 1000.0;

	// we have already flushed stats 30+ seconds old, so if this op took
	// 30 seconds, discard it!
	if ( dtSecs >= 30 ) {
		//log("statsdb: stat is %li secs > 30 secs old, discarding.",
		//   (long)dtSecs);
		return true;
	}

	long long nextup;

	// loop over all "second" buckets
	for ( long long tx = t1Arg ; tx < t2Arg ; tx = nextup ) {
		// get next second-aligned point in milliseconds
		nextup = ((tx +1000)/ 1000) * 1000;
		// truncate if we need to
		if ( nextup > t2Arg ) nextup = t2Arg;
		// . how much of the stat is in this time interval?
		// . like if operation took 3 seconds, we might cover
		//   50% of the first 1-second interval. so we use this
		//   as a weight for the stats we keep for that particular
		//   second. then we can plot a point for each second
		//   in time which is an average of all the queries that
		//   were in progress at that second.
		float fractionTime = ((float)(nextup - tx)) / dtms;

		// . get the time point bucket in which this stat belongs
		// . every "second" in time has a bucket
		unsigned long t1 = tx / 1000;

		StatKey sk;
		sk.m_zero      = 0x01; // make it a positive key
		sk.m_time1     = t1;
		sk.m_labelHash = labelHash;

		// so we can show just the stats for a particular user...
		if ( userId32 ) {
			sk.m_zero = userId32;
			// make it positive
			sk.m_zero |= 0x01; 
		}

		// if we already have added a bucket for this "second" then
		// get it from the tree so we can add to its accumulated stats.
		long node1 = tree->getNode ( 0 , (char *)&sk );
		long node2;

		StatData *sd;

		// get that stat, see if we are accumulating it already
		if ( node1 >= 0 ) 
			sd = (StatData *)tree->getData ( node1 );

		// make a new one if not there
		else {
			StatData tmp;
			// init it
			tmp.m_totalOps      = 0.0;
			tmp.m_totalQuantity = 0.0;
			tmp.m_totalTime     = 0.0;

			// save this
			long saved = g_errno;
			// need to add using rdb so it can memcpy the data
			if ( ! m_rdb.addRecord ( (collnum_t)0 ,
						 (char *)&sk,
						 (char *)&tmp,
						 sizeof(StatData),
						 niceness ) ) {
				if ( g_errno != ETRYAGAIN )
				log("statsdb: add rec failed: %s",
				    mstrerror(g_errno));
				// caller does not care about g_errno
				g_errno = saved;
				return false;
			}
			// caller does not care about g_errno
			g_errno = saved;
			// get the node in the tree
			//sd = (StatData *)tree->getData ( node1 );
			// must be there!
			node2 = tree->getNode ( 0 , (char *)&sk );
			// must be there!
			if ( node2 < 0 ) { char *xx=NULL;*xx=0; }
			// point to it
			sd = (StatData *)tree->getData ( node2 );
		}

		// use the milliseconds elapsed as the value if none given
		//if ( value == 0 && ! parmHash )
		//	value = t2Arg - t1Arg;

		// if we got it for this time, accumulate it
		// convert x into pixel position
		sd->m_totalOps      += 1      * fractionTime;
		sd->m_totalQuantity += value  * fractionTime;
		sd->m_totalTime     += dtSecs * fractionTime;
		
		if ( ! parmHash ) continue;

		sd->m_totalOps = 0;
		sd->m_totalQuantity = oldVal;
		sd->m_newVal        = newVal;
		// no fractions for this!
		break;
	}

	//logf(LOG_DEBUG,"statsdb: sp=0x%lx",(long)sp);

	return true;
}

示例#7

0

显示文件

文件： DailyMerge.cpp 项目： BlaBlaNet/open-source-search-engine

void DailyMerge::dailyMergeLoop ( ) {
	// disable for now!
	//return;
	// if in repair mode, do not do daily merge
	if ( g_repairMode ) return;
	// or if in read only mode
	if ( g_conf.m_readOnlyMode ) return;
	// skip if proxy, a proxy can be hostid 0!
	if ( g_proxy.isProxy() ) return;
	// wait for clock to be synced with host #0
	if ( ! isClockInSync() ) return;
	// get local time
	int64_t nowLocalMS = gettimeofdayInMillisecondsLocal();
	// get our hostid
	int32_t hid = g_hostdb.m_myHost->m_hostId;
	// if process only recently started (1 min ago or less)
	// then do not immediately do this...
	if (hid==0 && nowLocalMS - g_process.m_processStartTime < 1*60*1000)
		return;
	// wait until the right time (this is in UTC)
	time_t nowSynced = getTimeSynced();

	// get time since midnight
	struct tm *tt ;
	// how many MINUTES into the day are we? (in UTC)
	tt = gmtime ( &nowSynced );
	int32_t elapsedMins = tt->tm_hour * 60 + tt->tm_min ;

	// what collnum to merge?
	collnum_t i ;

	// . if we are not 0, just use host #0's collnum
	// . an error here will screw up the whole daily merge process
	if ( hid != 0 && m_mergeMode == 0 ) {
		// get host #0
		Host *h = &g_hostdb.m_hosts[0];
		// must have got a ping reply from him
		if ( ! h->m_gotPingReply ) return;
		// hostid #0 must NOT be in mode 0
		if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 ) return;
		// get the collnum that host #0 is currently daily merging
		i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
		// this means host #0 is not daily merging a collnum now
		if ( i < 0 ) return;
		// if it is valid, the CollectionRec MUST be there
		CollectionRec *cr = g_collectiondb.getRec ( i );
		if ( ! cr ) { 
			log("daily: host #0 bad collnum %"INT32"",(int32_t)i);return;}
		// if valid, use it
		m_cr = cr;
		// we set m_cr, go to next mode
		m_mergeMode = 1;
		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
	}

	// . only host #0 should do this loop!!!
	// . loop through each collection to check the time
	for (i=0; hid==0&&m_mergeMode==0 && i<g_collectiondb.m_numRecs; i++) {
		// get collection rec for collnum #i
		CollectionRec *cr = g_collectiondb.getRec ( i );
		// skip if empty, it was deleted at some point
		if ( ! cr ) continue;
		// skip if daily merge trigger is < 0 (do not do dailies)
		if ( cr->m_dailyMergeTrigger < 0 ) continue;
		// . skip if not time yet
		// . !!!!!THIS IS IN MINUTES!!!!!!!!
		if ( (int32_t)elapsedMins < (int32_t)cr->m_dailyMergeTrigger ) 
			continue;
		// do not start more than 15 mins after the trigger time,
		// if we miss that cuz we are down, then too bad
		if ( (int32_t)elapsedMins > (int32_t)cr->m_dailyMergeTrigger + 15 )
			continue;
 		// . how long has it been (in seconds)
		// . !!!!!THIS IS IN SECONDS!!!!!!!!
		int32_t diff = nowSynced - cr->m_dailyMergeStarted;
		// crazy?
		if ( diff < 0 ) continue;
		// if less than 24 hours ago, we already did it
		if ( diff < 24*3600 ) continue;
		// . we must now match the day of week
		// . use <= 0 to do it every day
		// . 0 = sunday ... 6 = saturday
		// . comma separated list is ok ("0,1, 6")
		// . leave blank or at least no numbers to do every day
		char *s = cr->m_dailyMergeDOWList;
		char dowCounts[8];
		memset(dowCounts,0,8);
		for ( ; *s ; s++ ) {
			if ( ! is_digit(*s) ) continue;
			int32_t num = atoi(s);
			if ( num < 0 ) continue;
			if ( num > 6 ) continue;
			dowCounts[num]++;
		}
		// get our dow
		int32_t todayDOW = tt->tm_wday + 1;
		// make sure 1 to 7
		if ( todayDOW < 0 || todayDOW > 6 ) { 
			log("merge: bad today dow of %i for coll %s",
			    (int)todayDOW,cr->m_coll);
			return;
		}
		//if ( todayDOW > 6 ) { char *xx=NULL;*xx=0; }
		// skip if not a dayofweek to merge on
		if ( dowCounts [ todayDOW ] == 0 ) continue;

		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
		// . wait for everyone to be in mode #0 in case they just
		//   finished another daily merge. only host #0 does this loop.
		// . PROBLEM: if host #0 crashes before everyone can get into 
		//   mode 1+ and then host #0 is brought back up, then 
		//   obviously, we will not be able to meet this condition,
		//   therefore only check to see if this condition is 
		//   satisfied our "second time around" (so we must complete
		//   one daily merge before checking this again). that is why
		//   i added "m_didDaily". -- MDW
		for ( int32_t i = 0 ; m_didDaily && i<g_hostdb.m_numHosts ; i++){
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// that's good if he is in mode 0
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0 )
				continue;
			// oops, someone is not mode 0
			return;
		}
		// got one, save it
		m_cr = cr;
		// if we were hostid 0, go into merge mode 1 now
		m_mergeMode = 1;
		// bust out of loop
		break;
	}

	// can we advance to merge mode 1?
	if ( m_mergeMode == 1 ) {
		// no candidates, go back to mode 0 now, we are done
		if ( ! m_cr ) {
			log("daily: Could not get coll rec.");
			m_mergeMode = 0; return; 
		}
		// ok, we got a collection that needs it so turn off spiders
		m_mergeMode = 2;
		// turn spiders off to keep query latency down
		m_spideringEnabled = g_conf.m_spideringEnabled;
		//m_injectionEnabled = g_conf.m_injectionEnabled;
		g_conf.m_spideringEnabled = false;
		//g_conf.m_injectionEnabled = false;
		// log it
		log("daily: Starting daily merge for %s.",m_cr->m_coll);
		log("daily: Waiting for other hosts to enter merge mode.");
	}

	// wait for everyone to make it to mode 1+ before going on
	if ( m_mergeMode == 2 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// get the host
			Host *h = &g_hostdb.m_hosts[i];
			// skip ourselves, obviously we are in merge mode 2
			if ( h == g_hostdb.m_myHost ) 
				continue;
			// skip dead hosts
			if ( g_hostdb.isDead(h) )
				continue;
			// return if a host still in merge mode 0. wait for it.
			if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 )
				return;
		}
		// ok, everyone is out of mode 0 now
		m_mergeMode = 3;
		// log it
		log("daily: Waiting for all hosts to have 0 "
		    "spiders out.");
	}

	// wait for ALL spiders in network to clear
	if ( m_mergeMode == 3 ) {
		// return if we got spiders out!
		if ( g_spiderLoop.m_numSpidersOut > 0 )
			return;
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host still has spiders out, we can't go to mode 4
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_HASSPIDERS ) 
				return;
		}
		// ok, nobody has spiders now
		m_mergeMode = 4;
		// log it
		log("daily: Dumping trees.");
	}

	// start the dumps
	if ( m_mergeMode == 4 ) {
		// . set when we did it last, save that to disk to avoid thrash
		// . TODO: BUT do not allow it to be set in the spider 
		//   controls!
		// . THIS IS IN SECONDS!!!!!!!
		// . use the time we started, otherwise the merge time keeps
		//   getting pushed back.
		m_cr->m_dailyMergeStarted = m_savedStartTime; // nowSynced;
		// tell it to save, otherwise this might not get saved
		m_cr->m_needsSave = true;
		// initiate dumps
		g_indexdb.getRdb  ()->dumpTree(1); // niceness = 1
		//g_datedb.getRdb   ()->dumpTree(1); // niceness = 1
		g_spiderdb.getRdb ()->dumpTree(1); // niceness = 1
		g_linkdb.getRdb   ()->dumpTree(1); // niceness = 1
		// if neither has recs in tree, go to next mode
		if(g_indexdb .getRdb()->getNumUsedNodes()>0) return;
		//if(g_datedb  .getRdb()->getNumUsedNodes()>0) return;
		if(g_spiderdb.getRdb()->getNumUsedNodes()>0) return;
		if(g_linkdb  .getRdb()->getNumUsedNodes()>0) return;
		// ok, all trees are clear and dumped
		m_mergeMode = 5;
		// log it
		log("daily: Merging indexdb and datedb files.");
	}

	// start the merge
	if ( m_mergeMode == 5 ) {
		// kick off the merges if not already going
		//g_indexdb.getRdb()->attemptMerge(1,true,false);
		//g_datedb .getRdb()->attemptMerge(1,true,false);
		// if has more than one file, bail on it
		RdbBase *base;

		base = g_indexdb .getRdb()->getBase(m_cr->m_collnum);
		// . niceness,forced?,doLog?,minFilesToMerge
		// . only does a merge if there are 2 or more "big" indexdb 
		//   files present. Merges so that there are LESS THAN 2 files.
		//   just another way of describing a tight merge.
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		//base = g_datedb  .getRdb()->getBase(m_cr->m_collnum);
		//base->attemptMerge (1,true,false,2);
		//if ( base->getNumFiles() >= 2 ) return;

		base = g_spiderdb.getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		base = g_linkdb  .getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		// . minimize titledb merging at spider time, too
		// . will perform a merge IFF there are 200 or more titledb 
		//   files present, otherwise, it will not. will do the merge
		//   such that LESS THAN 200 titledb files will be present
		//   AFTER the merge is completed.
		// . do NOT force merge ALL files on this one, we just want
		//   to make sure there are not 200+ titledb files
		base = g_titledb .getRdb()->getBase(m_cr->m_collnum);
		// we seem to dump about 70 per day at a decent spider rate
		// so merge enough so that we don't have to merge while 
		// spidering
		base->attemptMerge (1,false,false,230-70);
		if ( base->getNumFiles() >= 230-70 ) return;

		// set m_cr to NULL up here, so that the last guy to
		// complete the daily merge, does not "cycle back" and
		// try to re-daily merge the same collection!
		m_cr = NULL;
		// ok, merges are done
		m_mergeMode = 6;
		// log it
		log("daily: Waiting for all hosts to finish merging.");
	}

	// wait for all to finish before re-enabling spiders
	if ( m_mergeMode == 6 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are ok
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host in mode 6 or 0, that's good
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0OR6)
				continue;
			// otherwise, wait for it to be in 6 or 0
			return;
		}
		// ok, nobody has spiders now, everyone is 6 or 0
		m_mergeMode = 0;
		// no coll rec now
		m_cr = NULL;
		// spiders back on
		g_conf.m_spideringEnabled = m_spideringEnabled;
		//g_conf.m_injectionEnabled = m_injectionEnabled;
		// log it
		log("daily: Daily merge completed.");
		// now the next time we do a daily we must make sure all hosts
		// are in merge mode #0 before we start
		m_didDaily  = true;
	}		
}

示例#8

0

显示文件

文件： HttpMime.cpp 项目： BKJackson/open-source-search-engine

// a cacheTime of -1 means browser should not cache at all
void HttpMime::makeMime  ( long    totalContentLen    , 
			   long    cacheTime          ,
			   time_t  lastModified       ,
			   long    offset             , 
			   long    bytesToSend        ,
			   char   *ext                ,
			   bool    POSTReply          ,
			   char   *contentType        ,
			   char   *charset            ,
			   long    httpStatus         ,
			   char   *cookie             ) {
	// assume UTF-8
	//if ( ! charset ) charset = "utf-8";
	// . make the content type line
	// . uses a static buffer
	if ( ! contentType ) 
		contentType = (char *)getContentTypeFromExtension ( ext );

	// do not cache plug ins
	if ( contentType && strcmp(contentType,"application/x-xpinstall")==0)
		cacheTime = -2;

	// assume UTF-8, but only if content type is text
	// . No No No!!!  
	// . This prevents charset specification in html files
	// . -partap

	//if ( ! charset && contentType && strncmp(contentType,"text",4)==0) 
	//	charset = "utf-8";
	// this is used for bz2 and gz files (mp3?)
	const char *contentEncoding = getContentEncodingFromExtension ( ext );
	// the string
	char enc[128];
	if ( contentEncoding ) 
		sprintf ( enc , "Content-Encoding: %s\r\n", contentEncoding );
	else
		enc[0] = '\0';
	// get the time now
	//time_t now = getTimeGlobal();
	time_t now;
	if ( isClockInSync() ) now = getTimeGlobal();
	else                   now = getTimeLocal();
	// get the greenwhich mean time (GMT)
	char ns[128];
	struct tm *timeStruct = gmtime ( &now );
	// Wed, 20 Mar 2002 16:47:30 GMT
	strftime ( ns , 126 , "%a, %d %b %Y %T GMT" , timeStruct );
	// if lastModified is 0 use now
	if ( lastModified == 0 ) lastModified = now;
	// convert lastModified greenwhich mean time (GMT)
	char lms[128];
	timeStruct = gmtime ( &lastModified );
	// Wed, 20 Mar 2002 16:47:30 GMT
	strftime ( lms , 126 , "%a, %d %b %Y %T GMT" , timeStruct );
	// . the pragma no cache string (used just for proxy servers?)
	// . also use cache-control: for the browser itself (HTTP1.1, though)
	// . pns = "Pragma: no-cache\nCache-Control: no-cache\nExpires: -1\n";
	char tmp[128];
	char *pns ;
	// with cache-control on, when you hit the back button, it reloads
	// the page, this is bad for most things... so we only avoid the
	// cache for index.html and PageAddUrl.cpp (the main and addurl page)
	if      ( cacheTime == -2 ) pns =  "Cache-Control: no-cache\r\n"
					   "Pragma: no-cache\r\n"
					   "Expires: -1\r\n";
	// so when we click on a control link, it responds correctly.
	// like turning spiders on.
	else if  ( cacheTime == -1 ) pns = "Pragma: no-cache\r\n"
					   "Expires: -1\r\n";
	// don't specify cache times if it's 0 (let browser regulate it)
	else if ( cacheTime == 0 ) pns = "";
	// otherwise, expire tag: "Expires: Wed, 23 Dec 2001 10:23:01 GMT"
	else {
		time_t  expDate = now + cacheTime;
		timeStruct = gmtime ( &expDate );
		strftime ( tmp , 100 , "Expires: %a, %d %b %Y %T GMT\r\n", 
			   timeStruct );
		pns = tmp;
	}
	// . set httpStatus
	// . a reply to a POST (not a GET or HEAD) should be 201
	char *p = m_buf;
	char *smsg = "";
	if ( POSTReply ) {
		if ( httpStatus == -1 ) httpStatus = 200;
		if ( httpStatus == 200 ) smsg = " OK";
		if ( ! charset ) charset = "utf-8";
		//sprintf ( m_buf , 
		p += sprintf ( p,
			  "HTTP/1.0 %li%s\r\n"
			  "Date: %s\r\n"
			       //"P3P: CP=\"CAO PSA OUR\"\r\n"
			  "Server: Gigablast/1.0\r\n"
			  "Content-Length: %li\r\n"
			  //"Expires: Wed, 23 Dec 2003 10:23:01 GMT\r\n"
			  //"Expires: -1\r\n"
			  "Connection: Close\r\n"
			  "%s"
			  "Content-Type: %s\r\n\r\n",
			  //"Connection: Keep-Alive\r\n"
			  //"%s"
			  //"Location: f**k\r\n"
			  //"Location: http://192.168.0.4:8000/cgi/3.cgi\r\n"
			  //"Last-Modified: %s\r\n\r\n" ,
			  httpStatus , smsg ,
			  ns , totalContentLen , enc , contentType  );
			  //pns ,
	                  //ns );
			  //lms );
	}
	// . is it partial content?
	// . if bytesToSend is < 0 it means "totalContentLen"
	else if ( offset > 0 || bytesToSend != -1 ) {
		if ( httpStatus == -1 ) httpStatus = 206;
		if ( ! charset ) charset = "utf-8";
		//sprintf ( m_buf , 
		p += sprintf( p,
			      "HTTP/1.0 %li Partial content\r\n"
			      "%s"
			      "Content-Length: %li\r\n"
			      "Content-Range: %li-%li(%li)\r\n"// added "bytes"
			      "Connection: Close\r\n"
			      //"P3P: CP=\"CAO PSA OUR\"\r\n"
			      "Server: Gigablast/1.0\r\n"
			      "%s"
			      "Date: %s\r\n"
			      "Last-Modified: %s\r\n" 
			      "Content-Type: %s\r\n",
			      httpStatus ,
			      enc ,bytesToSend ,
			      offset , offset + bytesToSend , 
			      totalContentLen ,
			      pns ,
			      ns , 
			      lms , contentType );
		// otherwise, do a normal mime
	}
	else {
		char encoding[256];
		if (charset) sprintf(encoding, "; charset=%s", charset);
		else encoding[0] = '\0';
		
		
		if ( httpStatus == -1 ) httpStatus = 200;
		if ( httpStatus == 200 ) smsg = " OK";
		//sprintf ( m_buf , 
		p += sprintf( p,
			      "HTTP/1.0 %li%s\r\n"
			      // make it at least 4 spaces so we can change
			      // the length of the content should we insert
			      // a login bar in Proxy::storeLoginBar()
			      "Content-Length: %04li\r\n"
			      "%s"
			      "Content-Type: %s",
			      httpStatus , smsg ,
			      totalContentLen , enc , contentType );
		if ( charset ) p += sprintf ( p , "; charset=%s", charset );
		p += sprintf ( p , "\r\n");
		p += sprintf ( p ,
			       //"Connection: Keep-Alive\r\n"
			       "Connection: Close\r\n"
			       //"P3P: CP=\"CAO PSA OUR\"\r\n"
			       "Server: Gigablast/1.0\r\n"
			       "%s"
			       "Date: %s\r\n"
			       "Last-Modified: %s\r\n" ,
			       pns ,
			       ns , 
			       lms );
	}
	// write the cookie if we have one
	if (cookie) {
		// now it is a list of Set-Cookie: x=y\r\n lines
		//p += sprintf ( p, "Set-Cookie: %s\r\n", cookie);
		if ( strncmp(cookie,"Set-Cookie",10 ) )
			p += sprintf(p,"Set-Cookie: ");
		p += sprintf ( p, "%s", cookie);
		if ( p[-1] != '\n' && p[-2] != '\r' ) {
			*p++ = '\r';
			*p++ = '\n';
		}
	}
			
	// write another line to end the mime
	p += sprintf(p, "\r\n");
	// set the mime's length
	//m_bufLen = gbstrlen ( m_buf );
	m_bufLen = p - m_buf;
}