C++ (Cpp) RdbList Examples, RdbList C++ (Cpp) Examples

Example #1

0

Show file

File: Syncdb.cpp Project: FlavioFalcao/open-source-search-engine

// . send as many zids with a msg4d request as you can
// . that way we check a bunch off all at once
bool Syncdb::gotList ( ) {
	// bigLoop() can be called again now
	m_outstanding = false;
	// error?
	if ( g_errno ) {
		log("sync: had error in msg5: %s",mstrerror(g_errno));
		return false;
	}
	// shortcut
	RdbList *m = &m_list;
	// just in case
	m->resetListPtr();
	// get the rec
	char *rec = m->getCurrentRec();
	// get key
	key128_t k = *(key128_t *)rec;
	// sanity check
	if ( k != m_addMe[m_ia] ) { char *xx=NULL;*xx=0;}
	// . add it using msg4.cpp::addMetaList()
	// . sets g_errno and returns false on error
	if ( ! addMetaList ( rec ) ) return false;
	// we no longer have to add it!
	m_qt.deleteNode ( 0 , (char *)&k , true );
	// point to next
	m_ia++;
	// free it
	m_list.reset();
	// success
	return true;
}

Example #2

0

Show file

File: qa.cpp Project: FlavioFalcao/open-source-search-engine

void gotList33 ( void *state ) {
	long *rdbId = (long *)state;
	if ( ! s_list.isEmpty() ) {
		log("qa: delete failed. list is not empty rdbid=%li.",*rdbId);
		s_failures++;
	}
	// resume main loop
	qatest();
}

Example #3

0

Show file

File: Msg0.cpp Project: lemire/open-source-search-engine

// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN" );
	
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;

	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %" PRIu64,gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%" PRIu64" size=%" PRId32
		    " transId=%" PRId32" ip=%s port=%i took=%" PRId64" "
		    "(niceness=%" PRId32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}

	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%" PRId32" "
		    "minRecSizes=%" PRId32".",dataSize,oldSize);
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%" PRId32" min=%" PRId32,dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass( data, dataSize, alloc, allocSize, slot, st0, doneSending_ass, -1, -1, true );

	logTrace( g_conf.m_logTraceMsg0, "END" );
}

Example #4

0

Show file

File: Turkdb.cpp Project: DeadNumbers/open-source-search-engine

void gotDatedbList ( State60 *st ) {

	// must only be run on host #0 since we need just one lock table
	if ( g_hostdb.m_myHost->m_hostId != 0 ) { char *xx=NULL;*xx=0; }

	// load turk lock table if we need to
	bool s_init = false;
	if ( ! s_init ) {
		s_init = true;
		if ( ! g_turkLocks.set(8,sizeof(TurkLock),256) )
			log("turk: failed to init turk lock table");
		if ( ! g_turkLocks.load(g_conf.m_dir,"turkdir/docidlocks.dat"))
			log("turk: failed to load turk lock table");
	}

	time_t now = getTimeGlobal();
	// int16_tcut
	RdbList *list = &st->m_list;
	// the best docid
	int64_t best = 0LL;
	// scan the list to get urls/docids to turk out
	for ( ; ! list->isExhausted() ; ) {
		// get rec
		char *k = list->getCurrentKey();
		// skip that
		list->skipCurrentRecord();
		// skip if negative
		if ( (k[0] & 0x01) == 0x00 ) continue;
		// get the docid
		int64_t docid = g_datedb.getDocId ( k );
		// skip if locked
		TurkLock *tt = (TurkLock *)g_turkLock.getValue(&docid);
		// if there check time
		if ( tt && now - tt->m_lockTime > 3600 ) {
			// remove it
			g_turkLock.removeKey(&docId);
			// nuke tt
			tt = NULL;
		}
		// if still there, skip it and try next one
		if ( tt ) continue;
		// ok, we got a good docid to dish out
		best = docId;
		break;
	}

	SafeBuf sb;

	// print description so they can clikc a button to start the turk
	sb.safePrintf("<html>\n"
		      "<title>Event Editor</title>\n"
		      "<body>\n"
		      "<table width=\"100%%\" border=\"0\">\n"
		      "<tr><td style=\"background-color:#0079ba;\">\n"
		      "<center><font color=#00000>"
		      "<h2>Event Editor</h2>\n"
		      "</font></center></td>"
		      "</tr></table>");

	// if we had no docid, give user an empty msg
	if ( ! best ) {
		sb.safePrintf("<center>Nothing currently available to edit. "
			      "Please try again later.</center>"
			      "</body></html>\n");
		sendReply ( &sb );
		return;
	}

	// lock it!
	TurkLock tt;
	strcpy ( tt.m_user , st->m_user );
	tt.m_lockTime = now;
	if ( ! g_lockTable.addLock ( &tt ) ) {
		sendErrorReply ( st , g_errno );
		return;
	}

	// . fetch the TitleRec
	// . a max cache age of 0 means not to read from the cache
	XmlDoc *xd = &st->m_xd;
	// . when getTitleRec() is called it will load the old one
	//   since XmlDoc::m_setFromTitleRec will be true
	// . niceness is 0
	xd->set3 ( best , st->m_coll , 0 );
	// if it blocks while it loads title rec, it will re-call this routine
	xd->setCallback ( st , processLoopWrapper );
	// good to go!
	return processLoop ( st );
}

Example #5

0

Show file

File: Turkdb.cpp Project: DeadNumbers/open-source-search-engine

// . displays the stats for a username
// . show stats for every day we have them for
// . in a big list
// . if they click the day display all docids evaluated for that day
// . show the accuracy for that day too
// . how many docs they edited
// . how many of those docs were verified by another
// . and if there was consensus
void gotTransdbList ( State60 *st ) {

	// get today's time range
	time_t now = getTimeGlobal();
	// get start of today
	time_t dayStart = now / (24*3600);

	SafeBuf sb;

	// int16_tcut
	TcpSocket *s = st->m_s;

	// make about 200k of mem to write into
	if ( ! sb.reserve ( 200000 ) ) 
		return g_httpServer.sendErrorReply(s,500,mstrerrno(g_errno));

	// print description so they can clikc a button to start the turk
	sb.safePrintf("<html>\n"
		      "<title>Event Editor</title>\n"
		      "<body>\n"
		      "<table width=\"100%%\" border=\"0\">\n"
		      "<tr><td style=\"background-color:#0079ba;\">\n"
		      "<center><font color=#00000>"
		      "<h2>Event Editor</h2>\n"
		      "</font></center></td>"
		      "</tr></table>");
	// print the content
	sb.safePrintf("<center><font size=4><blink>"
		      "<b><a href=\"/pageturk?c=%s&edit=1\">"
		      "Click here to start editing.</a></b></blink>"
		      "</font><br><i>Please take your "
		      "time to read the information below before you begin"
		      "</i><br><font color=\"red\" size=2> Warning: Adult "
		      "content might be presented to you."
		      " You should be above 18 years of age to continue."
		      "</center></font>",st->m_coll);

	sb.safePrintf("<font face=arial,sans-serif color=black size=3>"
		      "<p>By clicking <i>Start Voting</i>, you will be "
		       "presented with an interface for editing events. "
		      "The editor will display a modified web page that "
		      "contains one or more events. Each event's description "
		      "will be highlight with a blue background. You can "
		      "toggle whether a particular event is displayed by "
		      "clicking on that event's ID. You can highlight one or "
		      "multiple event descriptions at the same time. "
		      "</p><p>"
		      "By clicking on the section icons in the web page you "
		      "can tell the editor that a virtual fence should be "
		      "erected around that section. The fence will make sure "
		      "that event descriptions can not span across it. Each "
		      "event description must be fully contained either "
		      "inside or outside the fence. However, you can also "
		      "declare a section as a title section, which means that "
		      "the text that the title section contains is free to be "
		      "used by any event description."
		      "</p>\n"
		      "<p>When you are done erecting section fences, you "
		      "submit your changes. The more changes you make the "
		      "more points you earn. Other users may evaluate " 
		      "your edits for accuracy. You will be paid based on the "
		      "points you earn as well as your accuracy. All "
		      "transactions are listed in the table below.</p>"
		      "<p>You may not change your username or password "
		      "but you can change your email address. Your email "
		      "address will be used to pay you with PayPal every "
		      "Friday. Paypal fees will be deducted on your end. By "
		      "using this service you agree to all stated Terms & "
		      "Conditions.</p>"
		      "</font>\n");

	// get the user record
	User *uu = g_users.getUser ( username );
	// print out their info, like paypal email
	sb.safePrintf("<table>\n"
		      "<tr><td colspan=10><center>Your Info</center>"
		      "</td></tr>\n"
		      "<tr>"
		      "<td>Email</td>"
		      "<td><input type=text value=%s></td>"
		      "<td>email address used to pay with paypal</td>"
		      "</tr>\n"
		      "<tr><td colspan=10><input type=submit value=update>"
		      "</td></tr>\n"
		      "</table>\n" ,
		      uu->m_payPalEmail );

	// print your stats here now
	sb.safePrintf("<table>\n"
		      "<tr><td colspan=10><center>Your Stats</center>"
		      "</td></tr>\n"
		      "<tr>"
		      "<td>date</td>"
		      "<td>action</td>"
		      "<td>amount</td>"
		      "<td>desc</td>"
		      "</tr>\n");

	// int16_tcut
	RdbList *list = &st->m_list;

	int32_t lastDay        = -1;
	int32_t totalReceives  = 0;
	int32_t totalSubmits   = 0;
	int32_t totalPasses    = 0;
	int32_t totalFails     = 0;

	// scan the list
	for ( ; ! list->isExhausted() ; ) {
		// get rec
		char *rec      = list->getCurrentRecord();
		char *data     = list->getCurrentData();
		int32_t  dataSize = list->getCurrentDataSize();
		// skip that
		list->skipCurrentRecord();
		// skip if negative
		if ( (rec[0] & 0x01) == 0x00 ) continue;
		// get the time (global time - sync'd with host #0)
		time_t tt = g_transdb.getTimeStamp ( rec );
		// get day #
		int32_t daynum = tt / (24*3600);
		// is it today?
		bool isToday = ( daynum >= dayStart );
		// point to the Transaction
		Trans *trans = (Trans *)data;
		// if is today, print it out verbatim
		if ( isToday ) {
			// print it in html row format to match table above
			//printTrans ( &sb , rec );
			sb.safePrintf("<tr>");
			// make it into a nice date
			time_t dd = lastDay * 86400;
			struct tm *timeStruct = localtime ( &dd );
			char ppp[100];
			strftime(ppp,100,"%H:%M:%S",timeStruct);
			// print last days stats first
			sb.safePrintf("<td>%s</td>",ppp);
			// then stats
			if ( trans->m_actionType == AT_RECEIVE_DOC )
				sb.safePrintf("<td>receive</td>"
					      "<td>%"INT32" pts</td>"
					      "<td>docid=%"UINT64"</td>",
					      (int32_t)trans->m_number,
					      trans->m_docId);
			else if ( trans->m_actionType == AT_SUBMIT_DOC )
				sb.safePrintf("<td>submit</td>"
					      "<td>%"INT32" pts</td>"
					      "<td>docid=%"UINT64"</td>",
					      (int32_t)trans->m_number,
					      trans->m_docId);
			else if ( trans->m_actionType == AT_PASS_DOC )
				sb.safePrintf("<td>verify</td>"
					      "<td>%"INT32" pts</td>"
					      "<td>docid=%"UINT64" was verified "
					      "by user=\"%s\"</td>",
					      (int32_t)trans->m_number,
					      trans->m_docId,
					      trans->m_desc);
			else if ( trans->m_actionType == AT_FAIL_DOC )
				sb.safePrintf("<td>verify</td>"
					      "<td>%"INT32" pts</td>"
					      "<td>docid=%"UINT64" was deemed to "
					      "be incorrect "
					      "by user=\"%s\"</td>",
					      (int32_t)trans->m_number,
					      trans->m_docId,
					      trans->m_desc);
			else if ( trans->m_actionType == AT_ACCURACY_EVAL)
				sb.safePrintf("<td>accuracy eval</td>"
					      "<td>%.02f</td>"
					      "<td>docid=%"UINT64"</td>",
					      trans->m_number,
					      trans->m_docId);
			else if ( trans->m_actionType == AT_CHARGE)
				sb.safePrintf("<td>credit</td>"
					      "<td>%.02f</td>"
					      "<td>You made money.</td>",
					      trans->m_number);
			else if ( trans->m_actionType == AT_PAYMENT)
				sb.safePrintf("<td>payment</td>"
					      "<td>%.02f</td>"
					      "<td>We paid you.</td>",
					      trans->m_number);
			else if ( trans->m_actionType == AT_LOGIN)
				sb.safePrintf("<td>login</td>"
					      "<td>-</td>"
					      "<td>You logged in.</td>");
			else if ( trans->m_actionType == AT_LOGOUT)
				sb.safePrintf("<td>logout</td>"
					      "<td>-</td>"
					      "<td>You logged out.</td>");
			else if ( trans->m_actionType == AT_AUTO_LOGOUT)
				sb.safePrintf("<td>logout</td>"
					      "<td>-</td>"
					      "<td>You were auto "
					      "logged out.</td>");
			else {
				char *xx=NULL;*xx=0; }
			sb.safePrintf("</tr>\n");
			continue;
		}
		// if does not match last day, print out that last day's stats
		// and reset for next guy
		if ( daynum != lastDay && lastDay != -1 ) {
			// make it into a nice date
			time_t dd = lastDay * 86400;
			struct tm *timeStruct = localtime ( &dd );
			char ppp[100];
			strftime(ppp,100,"%b-%d-%Y",timeStruct);
			// print last days stats first
			sb.safePrintf("<td>%s</td>",ppp);
			// then stats
			sb.safePrintf("<tr>"
				      "<td>receive</td>"
				      "<td>%"INT32"</td>"
				      "<td>Total received</td>"
				      "</tr>\n",
				      totalReceives);
			sb.safePrintf("<tr>"
				      "<td>submit</td>"
				      "<td>%"INT32"</td>"
				      "<td>Total submitted</td>"
				      "</tr>\n",
				      totalSubmits);
			sb.safePrintf("<tr>"
				      "<td>pass</td>"
				      "<td>%"INT32"</td>"
				      "<td>Total accuracy tests passed</td>"
				      "</tr>\n",
				      totalPasses);
			sb.safePrintf("<tr>"
				      "<td>fail</td>"
				      "<td>%"INT32"</td>"
				      "<td>Total accuracy tests failed</td>"
				      "</tr>\n",
				      totalFails);
			// reset as well
			totalReceived = 0;
			totalSubmits  = 0;
			totalPasses   = 0;
			totalFails    = 0;
		}
		// remember last day # we processed for accumulating stats
		lastDay = daynum;
		// accum stats
		if ( trans->m_actionType == AT_RECEIVE_DOC )
			totalReceives++;
		if ( trans->m_actionType == AT_SUBMIT_DOC )
			totalSubmits++;
		if ( trans->m_actionType == AT_PASS_DOC )
			totalPasses++;
		if ( trans->m_actionType == AT_FAIL_DOC )
			totalFails++;
	}

	sb.safePrintf("</body></html>\n");

	sendReply ( &sb );
}

Example #6

0

Show file

File: Clusterdb.cpp Project: exename/open-source-search-engine

bool Clusterdb::verify ( char *coll ) {
	log ( LOG_DEBUG, "db: Verifying Clusterdb for coll %s...", coll );
	g_jobScheduler.disallow_new_jobs();

	Msg5 msg5;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//int32_t minRecSizes = 64000;
	CollectionRec *cr = g_collectiondb.getRec(coll);
	
	if ( ! msg5.getList ( RDB_CLUSTERDB ,
			      cr->m_collnum          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      true          )) {
		g_jobScheduler.allow_new_jobs();
		log("db: HEY! it did not block");
		return false;
	}

	int32_t count = 0;
	int32_t got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		// skip negative keys
		if ( (k.n0 & 0x01) == 0x00 ) continue;
		count++;
		//uint32_t groupId = getGroupId ( RDB_CLUSTERDB , &k );
		//if ( groupId == g_hostdb.m_groupId ) got++;
		uint32_t shardNum = getShardNum( RDB_CLUSTERDB , &k );
		if ( shardNum == getMyShardNum() ) got++;
	}
	if ( got != count ) {
		// tally it up
		g_rebalance.m_numForeignRecs += count - got;
		log ("db: Out of first %" PRId32" records in clusterdb, "
		     "only %" PRId32" belong to our group.",count,got);
		// exit if NONE, we probably got the wrong data
		if ( got == 0 ) log("db: Are you sure you have the "
					   "right "
					   "data in the right directory? "
					   "Exiting.");
		log ( "db: Exiting due to Clusterdb inconsistency." );
		g_jobScheduler.allow_new_jobs();
		return g_conf.m_bypassValidation;
	}
	log ( LOG_DEBUG, "db: Clusterdb passed verification successfully for "
			"%" PRId32" recs.", count );
	// DONE
	g_jobScheduler.allow_new_jobs();
	return true;
}

Example #7

0

Show file

File: Msg0.cpp Project: BlaBlaNet/open-source-search-engine

// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;
	// sanity check -- ensure they match
	//if ( niceness != st0->m_niceness )
	//	log("Msg0: niceness mismatch");
	// debug msg
	//if ( niceness != 0 ) 
	//	log("HEY! niceness is not 0");
	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %"UINT64"",gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%"UINT64" size=%"INT32""
		    " transId=%"INT32" ip=%s port=%i took=%"INT64" "
		    "(niceness=%"INT32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}
	// debug
	//if ( ! msg5->m_includeTree )
	//	log("hotit\n");
	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%"INT32" "
		    "minRecSizes=%"INT32".",dataSize,oldSize);
	/*
	// always compress these lists
	if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) {

		// get sh48, the sitehash
		key128_t *startKey = (key128_t *)msg5->m_startKey ;
		int64_t sh48 = g_datedb.getTermId(startKey);

		// debug
		//log("msg0: got sectiondblist from disk listsize=%"INT32"",
		//    list->getListSize());

		if ( dataSize > 50000 )
			log("msg0: sending back list rdb=%"INT32" "
			    "listsize=%"INT32" sh48=0x%"XINT64"",
			    (int32_t)st0->m_rdbId,
			    dataSize,
			    sh48);

		// save it
		int32_t origDataSize = dataSize;
		// store compressed list on itself
		char *dst = list->m_list;
		// warn if niceness is 0!
		if ( st0->m_niceness == 0 )
			log("msg0: compressing sectiondb list at niceness 0!");
		// compress the list
		uint32_t lastVoteHash32 = 0LL;
		SectionVote *lastVote = NULL;
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// get rec
			char *rec = list->getCurrentRec();
			// for ehre
			key128_t *key = (key128_t *)rec;
			// the score is the bit which is was set in 
			// Section::m_flags for that docid
			int32_t secType = g_indexdb.getScore ( (char *)key );
			// 0 means it probably used to count # of voters
			// from this site, so i don't think xmldoc uses
			// that any more
			if ( secType == SV_SITE_VOTER ) continue;
			// treat key like a datedb key and get the taghash
			uint32_t h32 = g_datedb.getDate ( key );
			// get data/vote from the current record in the 
			// sectiondb list
			SectionVote *sv=(SectionVote *)list->getCurrentData ();
			// get the average score for this doc
			float avg = sv->m_score ;
			if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled;
			// if same as last guy, add to it
			if ( lastVoteHash32 == h32 && lastVote ) {
				// turn possible multi-vote into single docid
				// into a single vote, with the score averaged.
				lastVote->m_score += avg;
				lastVote->m_numSampled++;
				continue;
			}
			// otherwise, add in a new guy!
			*(key128_t *)dst = *key;
			dst += sizeof(key128_t);
			// the new vote
			SectionVote *dsv = (SectionVote *)dst;
			dsv->m_score = avg;
			dsv->m_numSampled = 1;
			// set this
			lastVote = dsv;
			lastVoteHash32 = h32;
			// skip over
			dst += sizeof(SectionVote);
		}
		// update the list size now for sending back
		dataSize = dst - data;
		// if the list was over the requested minrecsizes we need
		// to set a flag so that the caller will do a re-call.
		// so making the entire odd, will be the flag.
	        if ( origDataSize > msg5->m_minRecSizes && 
		     dataSize < origDataSize ) {
			*dst++ = '\0';
			dataSize++;
		}

		// debug
		//log("msg0: compressed sectiondblist from disk "
		//    "newlistsize=%"INT32"", dataSize);
		
		// use this timestamp
		int32_t now = getTimeLocal();//Global();
		// finally, cache this sucker
		s_sectiondbCache.addRecord ( msg5->m_coll,
					     (char *)startKey,//(char *)&sh48
					     data, 
					     dataSize ,
					     now );
		// ignore errors
		g_errno = 0;
	}
	*/
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%"INT32" min=%"INT32"",dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass  ( data            ,
				    dataSize        ,
				    alloc           , // alloc
				    allocSize       , // alloc size
				    slot            ,
				    60              ,
				    st0             ,
				    doneSending_ass ,
				    -1              ,
				    -1              ,
				    true            );
}

Example #8

0

Show file

File: Msg1.cpp Project: BILObilo/open-source-search-engine

// . return false if blocked, true otherwise
// . sets g_errno on error
bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) {
	// debug msg
	//log("sendData: mcast=%lu listSize=%li",
	//    (long)&m_mcast,(long)listSize);

	// bail if this is an interface machine, don't write to the main
	if ( g_conf.m_interfaceMachine ) return true;
	// return true if no data
	if ( listSize == 0 ) return true;
	// how many hosts in this group
	//long numHosts = g_hostdb.getNumHostsPerShard();
	// . NOTE: for now i'm removing this until I handle ETRYAGAIN errors
	//         properly... by waiting and retrying...
	// . if this is local data just for us just do an addList to OUR rdb
	/*
	if ( groupId == g_hostdb.m_groupId  && numHosts == 1 ) {
		// this sets g_errno on error
		Msg0 msg0;
		Rdb *rdb = msg0.getRdb ( (char) m_rdbId );
		if ( ! rdb ) return true;
		// make a list from this data
		RdbList list;
		list.set (listData,listSize,listSize,rdb->getFixedDataSize(),
			  false) ; // ownData?
		// this returns false and sets g_errno on error
		rdb->addList ( &list );
		// . if we got a ETRYAGAIN cuz the buffer we add to was full
		//   then we should sleep and try again!
		// . return false cuz this blocks for a period of time
		//   before trying again
		if ( g_errno == ETRYAGAIN ) {
			// try adding again in 1 second
			registerSleepCallback ( 1000, slot, tryAgainWrapper1 );
			// return now
			return false;
		}
		// . always return true cuz we did not block
		// . g_errno may be set
		return true;
	}
	*/
	// if the data is being added to our group, don't send ourselves
	// a msg1, if we can add it right now
	bool sendToSelf = true;
	if ( shardNum == getMyShardNum() &&
	     ! g_conf.m_interfaceMachine ) {
		// get the rdb to which it belongs, use Msg0::getRdb()
		Rdb *rdb = getRdbFromId ( (char) m_rdbId );
		if ( ! rdb ) goto skip;
		// key size
		long ks = getKeySizeFromRdbId ( m_rdbId );
		// reset g_errno
		g_errno = 0;
		// . make a list from this data
		// . skip over the first 4 bytes which is the rdbId
		// . TODO: embed the rdbId in the msgtype or something...
		RdbList list;
		// set the list
		list.set ( listData ,
			   listSize ,
			   listData ,
			   listSize ,
			   rdb->getFixedDataSize() ,
			   false                   ,  // ownData?
			   rdb->useHalfKeys()      ,
			   ks                      ); 
		// note that
		//log("msg1: local addlist niceness=%li",m_niceness);
		// this returns false and sets g_errno on error
		rdb->addList ( m_coll , &list , m_niceness );
		// if titledb, add tfndb recs to map the title recs
		//if ( ! g_errno && rdb == g_titledb.getRdb() && m_injecting ) 
		//	// this returns false and sets g_errno on error
		//	updateTfndb ( m_coll , &list , true , m_niceness);
		// if no error, no need to use a Msg1 UdpSlot for ourselves
		if ( ! g_errno ) sendToSelf = false;
		else {
			log("rdb: msg1 had error: %s",mstrerror(g_errno));
			// this is messing up generate catdb's huge rdblist add
			// why did we put it in there??? from msg9b.cpp
			//return true;
		}
		
 		QUICKPOLL(m_niceness);
		// if we're the only one in the group, bail, we're done
		if ( ! sendToSelf &&
		     g_hostdb.getNumHostsPerShard() == 1 ) return true;
	}
skip:
	// . make an add record request to multicast to a bunch of machines
	// . this will alloc new space, returns NULL on failure
	//char *request = makeRequest ( listData, listSize, groupId , 
	//m_rdbId , &requestLen );
	long collLen = gbstrlen ( m_coll );
	// . returns NULL and sets g_errno on error
	// . calculate total size of the record
	// . 1 byte for rdbId, 1 byte for flags,
	//   then collection NULL terminated, then list
	long requestLen = 1 + 1 + collLen + 1 + listSize ;
	// make the request
	char *request = (char *) mmalloc ( requestLen ,"Msg1" );
	if ( ! request ) return true;
	char *p = request;
	// store the rdbId at top of request
	*p++ = m_rdbId;
	// then the flags
	*p = 0;
	if ( m_injecting ) *p |= 0x80;
	p++;
	// then collection name
	memcpy ( p , m_coll , collLen );
	p += collLen;
	*p++ = '\0';
	// sanity check
	if ( collLen <= 0 ) {
		log(LOG_LOGIC,"net: No collection specified for list add.");
		//char *xx = NULL; *xx = 0;
		g_errno = ENOCOLLREC;
		return true;
	}
	//if ( m_deleteRecs    ) request[1] |= 0x80;
	//if ( m_overwriteRecs ) request[1] |= 0x40;
	// store the list after coll
	memcpy ( p , listData , listSize );
 	QUICKPOLL(m_niceness);
	// debug msg
	//if ( ! m_waitForReply ) // (m_rdbId == RDB_SPIDERDB || 
	//m_rdbId == RDB_TFNDB)  )
	//	// if we don't get here we lose it!!!!!!!!!!!!!!!!!!!!!
	//	log("using mcast=%lu rdbId=%li listData=%lu listSize=%lu "
	//	    "gid=%lu",
	//	   (long)&m_mcast,(long)m_rdbId,(long)listData,(long)listSize,
	//	    groupId);
	// for small packets
	//long niceness = 2;
	//if ( requestLen < TMPBUFSIZE - 32 ) niceness = 0;
	//log("msg1: sending mcast niceness=%li",m_niceness);
	// . multicast to all hosts in group "groupId"
	// . multicast::send() returns false and sets g_errno on error
	// . we return false if we block, true otherwise
	// . will loop indefinitely if a host in this group is down
	key_t k; k.setMin();
	if ( m_mcast.send ( request    , // sets mcast->m_msg    to this
			    requestLen , // sets mcast->m_msgLen to this
			    0x01       , // msgType for add rdb record
			    true       , // does multicast own msg?
			    shardNum   , // group to send to (groupKey)
			    true       , // send to whole group?
			    0          , // key is useless for us
			    this       , // state data
			    NULL       , // state data
			    gotReplyWrapper1 ,
			    60         , // timeout in secs
			    m_niceness , // niceness 
			    false    , // realtime
			    -1    , // first host to try
			    NULL  , // replyBuf        = NULL ,
			    0     , // replyBufMaxSize = 0 ,
			    true  , // freeReplyBuf    = true ,
			    false , // doDiskLoadBalancing = false ,
			    -1    , // no max cache age limit
			    //(key_t)0 , // cache key
			    k    , // cache key
			    RDB_NONE , // bogus rdbId
			    -1    , // unknown minRecSizes read size
			    sendToSelf ))
		return false;

 	QUICKPOLL(m_niceness);
	// g_errno should be set
	log("net: Had error when sending request to add data to %s in shard "
	    "#%lu: %s.", getDbnameFromId(m_rdbId),shardNum,mstrerror(g_errno));
	return true;	
}

Example #9

0

Show file

File: Indexdb.cpp Project: BKJackson/open-source-search-engine

void Indexdb::deepVerify ( char *coll ) {
	log ( LOG_INFO, "db: Deep Verifying Indexdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//long minRecSizes = 64000;
	
	collnum_t collnum = g_collectiondb.getCollnum(coll);
	RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum);
	long numFiles = rdbBase->getNumFiles();
	long currentFile = 0;
	
deepLoop:
	// done after scanning all files
	if ( currentFile >= numFiles ) {
		g_threads.enableThreads();
		log ( LOG_INFO, "db: Finished deep verify for %li files.",
				numFiles );
		return;
	}
	// scan this file
	if ( ! msg5.getList ( RDB_INDEXDB   ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      currentFile   , // startFileNum  ,
			      1             , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      false         )) {
		g_threads.enableThreads();
		log("db: HEY! it did not block");
		return;
	}

	long count = 0;
	long got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
		unsigned long groupId = getGroupId ( RDB_INDEXDB , &k );
		if ( groupId == g_hostdb.m_groupId ) got++;
	}
	if ( got != count ) {
		BigFile *f = rdbBase->getFile(currentFile);
		log ("db: File %s: Out of first %li records in indexdb, "
		     "only %li belong to our group.",
		     f->getFilename(),count,got );
	}
	//else
	//	log ( LOG_INFO, "db: File %li: Indexdb passed verification "
	//	      "successfully for %li recs.",currentFile,count );
	// next file
	currentFile++;
	goto deepLoop;
}

Example #10

0

Show file

File: Syncdb.cpp Project: FlavioFalcao/open-source-search-engine

bool Syncdb::verify ( char *coll ) {
	log ( LOG_INFO, "db: Verifying Syncdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	CollectionRec *cr = g_collectiondb.getRec(coll);
	
	if ( ! msg5.getList ( RDB_SYNCDB    ,
			      cr->m_collnum          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      true          )) {
		g_threads.enableThreads();
		return log("db: HEY! it did not block");
	}

	long count = 0;
	long got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//unsigned long groupId = getGroupId ( RDB_SYNCDB , &k );
		//if ( groupId == g_hostdb.m_groupId ) got++;
		uint32_t shardNum = getShardNum ( RDB_SYNCDB , (char *)&k );
		if ( shardNum == getMyShardNum() ) got++;
	}
	if ( got != count ) {
		log ("db: Out of first %li records in syncdb, "
		     "only %li belong to our group.",count,got);
		// exit if NONE, we probably got the wrong data
		if ( got == 0 ) log("db: Are you sure you have the "
					   "right "
					   "data in the right directory? "
					   "Exiting.");
		log ( "db: Exiting due to Syncdb inconsistency." );
		g_threads.enableThreads();
		return g_conf.m_bypassValidation;
	}
	log ( LOG_INFO, "db: Syncdb passed verification successfully for "
			"%li recs.", count );
	// DONE
	g_threads.enableThreads();
	return true;
}

Example #11

0

Show file

File: superMergeTest.cpp Project: BILObilo/open-source-search-engine

main ( int argc , char *argv[] ) {
	// seed with same value so we get same rand sequence for all
	srand ( 1945687 );
	// # of keys to in each list
	long nk = 200000;
	// # keys wanted
	long numKeysWanted = 200000;
	// get # lists to merge
	long numToMerge = atoi ( argv[1] );
	// print start time
	fprintf (stderr,"smt:: randomizing begin. %li lists of %li keys.\n",
		 numToMerge, nk);
	// make a list of compressed (6 byte) docIds
        key_t *keys0 = (key_t *) malloc ( sizeof(key_t) * nk );
        key_t *keys1 = (key_t *) malloc ( sizeof(key_t) * nk );
        key_t *keys2 = (key_t *) malloc ( sizeof(key_t) * nk );
        key_t *keys3 = (key_t *) malloc ( sizeof(key_t) * nk );
	// store radnom docIds in this list
	unsigned long *p = (unsigned long *) keys0;
	// random docIds
	for ( long i = 0 ; i < nk ; i++ ) {
		*p++ = rand() ;
		*p++ = rand() ;
		*p++ = rand() ;
	}
	p = (unsigned long *) keys1;
	for ( long i = 0 ; i < nk ; i++ ) {
		*p++ = rand() ;
		*p++ = rand() ;
		*p++ = rand() ;
	}
	p = (unsigned long *) keys2;
	for ( long i = 0 ; i < nk ; i++ ) {
		*p++ = rand() ;
		*p++ = rand() ;
		*p++ = rand() ;
	}
	p = (unsigned long *) keys3;
	for ( long i = 0 ; i < nk ; i++ ) {
		*p++ = rand() ;
		*p++ = rand() ;
		*p++ = rand() ;
	}
	// sort em up
	gbsort ( keys0  , nk , sizeof(key_t) , cmp );
	gbsort ( keys1  , nk , sizeof(key_t) , cmp );
	gbsort ( keys2  , nk , sizeof(key_t) , cmp );
	gbsort ( keys3  , nk , sizeof(key_t) , cmp );
	// set lists
	RdbList list0;
	RdbList list1;
	RdbList list2;
	RdbList list3;
	key_t minKey; minKey.n0 = 0LL; minKey.n1 = 0LL;
	key_t maxKey; maxKey.setMax();
	list0.set ( (char *)keys0 , 
		    nk * sizeof(key_t),
		    nk * sizeof(key_t),
		    minKey , 
		    maxKey , 
		    0 , 
		    false );
	list1.set ( (char *)keys1 , 
		    nk * sizeof(key_t),
		    nk * sizeof(key_t),
		    minKey , 
		    maxKey , 
		    0 , 
		    false );
	list2.set ( (char *)keys2 , 
		    nk * sizeof(key_t),
		    nk * sizeof(key_t),
		    minKey , 
		    maxKey , 
		    0 , 
		    false );
	list3.set ( (char *)keys3 , 
		    nk * sizeof(key_t),
		    nk * sizeof(key_t),
		    minKey , 
		    maxKey , 
		    0 , 
		    false );
	// mergee
	RdbList list;
	RdbList *lists[2];
	lists[0] = &list0;
	lists[1] = &list1;
	lists[2] = &list2;
	lists[3] = &list3;
	//list.prepareForMerge ( lists , 3 , numKeysWanted * sizeof(key_t));
	list.prepareForMerge (lists,numToMerge,numKeysWanted * sizeof(key_t));
	// start time
	fprintf(stderr,"starting merge\n");
	long long t = gettimeofdayInMilliseconds();
	// do it
	if ( numToMerge == 2 )
		list.superMerge2 ( &list0 ,
				   &list1 ,
				   minKey ,
				   maxKey ,
				   false );
	if ( numToMerge == 3 )
		list.superMerge3 ( &list0 ,
				   &list1 ,
				   &list2 ,
				   minKey ,
				   maxKey );
	// completed
	long long now = gettimeofdayInMilliseconds();
	fprintf(stderr,"smt:: %li list NEW MERGE took %llu ms\n",
		numToMerge,now-t);
	// time per key
	long size = list.getListSize() / sizeof(key_t);
	double tt = ((double)(now - t))*1000000.0 / ((double)size);
	fprintf (stderr,"smt:: %f nanoseconds per key\n", tt);
	// stats
	//double d = (1000.0*(double)nk*2.0) / ((double)(now - t));
	double d = (1000.0*(double)(size)) / ((double)(now - t));
	fprintf (stderr,"smt:: %f cycles per final key\n" ,
		 400000000.0 / d );
	fprintf (stderr,"smt:: we can do %li adds per second\n" ,(long)d);
	
	fprintf (stderr,"smt:: final list size = %li\n",list.getListSize());
	// now get list from the old merge routine
	RdbList listOld;
	listOld.prepareForMerge (lists,numToMerge,numKeysWanted*sizeof(key_t));
	t = gettimeofdayInMilliseconds();
	listOld.merge_r ( lists , numToMerge , true , minKey , maxKey , false ,
		       numKeysWanted * sizeof(key_t));
	now = gettimeofdayInMilliseconds();
	fprintf(stderr,"smt:: %li list OLD MERGE took %llu ms\n",
		numToMerge,now-t);
	// then compare

	// exit gracefully
	exit ( 0 );
}

Example #12

0

Show file

File: Msg51.cpp Project: privacore/open-source-search-engine

// . sets m_errno to g_errno if not already set
void Msg51::gotClusterRec(Slot *slot) {

	// count it
	m_numReplies++;

	// free up
	slot->m_inUse = false;

	RdbList *list = slot->m_msg0.m_list;

	// update m_errno if we had an error
	if ( ! m_errno ) m_errno = g_errno;

	if ( g_errno ) 
		// print error
		log(LOG_DEBUG,
		    "query: Had error getting cluster info got docId=d: "
		    "%s.",mstrerror(g_errno));

	// this doubles as a ptr to a cluster rec
	int32_t    ci = slot->m_ci;
	// get docid
	int64_t docId = m_docIds[ci];
	// assume error!
	m_clusterLevels[ci] = CR_ERROR_CLUSTERDB;

	// bail on error
	if ( g_errno || list->getListSize() < 12 ) {
		//log(LOG_DEBUG,
		//    "build: clusterdb rec for d=%" PRId64" dptr=%" PRIu32" "
		//     "not found. where is it?", docId, (int32_t)ci);
		g_errno = 0;
		return;
	}

	// . steal rec from this multicast
	// . point to cluster rec, a int32_t   
	key96_t *rec = &m_clusterRecs[ci];

	// store the cluster rec itself
	*rec = *(key96_t *)(list->getList());
	// debug note
	log(LOG_DEBUG,
	    "build: had clusterdb SUCCESS for d=%" PRId64" dptr=%" PRIu32" "
	    "rec.n1=%" PRIx32",%016" PRIx64" sitehash26=0x%" PRIx32".", (int64_t)docId, (int32_t)ci,
	    rec->n1,rec->n0,
	    g_clusterdb.getSiteHash26((char *)rec));

	// check for docid mismatch
	int64_t docId2 = g_clusterdb.getDocId ( rec );
	if ( docId != docId2 ) {
		logf(LOG_DEBUG,"query: docid mismatch in clusterdb.");
		return;
	}

	// it is legit, set to CR_OK
	m_clusterLevels[ci] = CR_OK;

	// . init the quick cache
	// . use 100k
	if ( ! s_cacheInit && 
	     s_clusterdbQuickCache.init(200*1024      ,  // maxMem
					sizeof(key96_t) ,  // fixedDataSize (clusterdb rec)
					false         ,  // support lists
					10000         ,  // max recs
					false         ,  // use half keys?
					"clusterdbQuickCache" ,
					false         ,  // load from disk?
					sizeof(key96_t) ,  // cache key size
					sizeof(key96_t) )) // cache key size
		// only init once if successful
		s_cacheInit = true;

	// debug msg
	//logf(LOG_DEBUG,"query: msg51 addRec k.n0=%" PRIu64" rec.n0=%" PRIu64,docId,
	//     rec->n0);

	// . add the record to our quick cache as a int64_t
	// . ignore any error
	if ( s_cacheInit )
		s_clusterdbQuickCache.addRecord(m_collnum,
						(key96_t)docId, // docid is key
						(char *)rec,
						sizeof(key96_t), // recSize
						0);

	// clear it in case the cache set it, we don't care
	g_errno = 0;
}

Example #13

0

Show file

File: Msg1.cpp Project: exename/open-source-search-engine

// . destroys the slot if false is returned
// . this is registered in Msg1::set() to handle add rdb record msgs
// . seems like we should always send back a reply so we don't leave the
//   requester's slot hanging, unless he can kill it after transmit success???
// . TODO: need we send a reply back on success????
// . NOTE: Must always call g_udpServer::sendReply or sendErrorReply() so
//   read/send bufs can be freed
void handleRequest1 ( UdpSlot *slot , int32_t netnice ) {


	// extract what we read
	char *readBuf     = slot->m_readBuf;
	int32_t  readBufSize = slot->m_readBufSize;
	int32_t niceness = slot->m_niceness;

	// select udp server based on niceness
	UdpServer *us = &g_udpServer;
	// must at least have an rdbId
	if ( readBufSize <= 4 ) {
		g_errno = EREQUESTTOOSHORT;
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Request too short", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}
	char *p    = readBuf;
	char *pend = readBuf + readBufSize;
	// extract rdbId
	char rdbId = *p++;
	// get the rdb to which it belongs, use Msg0::getRdb()
	Rdb *rdb = getRdbFromId ( (char) rdbId );
	if ( ! rdb ) { 
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad rdbid", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot, EBADRDBID ); 
		return;
	}
	// keep track of stats
	rdb->readRequestAdd ( readBufSize );
	// reset g_errno
	g_errno = 0;
	// are we injecting some title recs?
	bool injecting;
	if ( *p & 0x80 ) injecting = true;
	else             injecting = false;
	p++;
	// then collection
	//char *coll = p;
	//p += strlen (p) + 1;
	collnum_t collnum = *(collnum_t *)p;
	p += sizeof(collnum_t);
	// . make a list from this data
	// . skip over the first 4 bytes which is the rdbId
	// . TODO: embed the rdbId in the msgtype or something...
	RdbList list;
	// set the list
	list.set ( p        , // readBuf     + 4         ,
		   pend - p , // readBufSize - 4         ,
		   p        , // readBuf     + 4         ,
		   pend - p , // readBufSize - 4         ,
		   rdb->getFixedDataSize() ,
		   false                   ,  // ownData?
		   rdb->useHalfKeys()      ,
		   rdb->getKeySize ()      ); 
	// note it
	//log("msg1: handlerequest1 calling addlist niceness=%" PRId32,niceness);
	//log("msg1: handleRequest1 niceness=%" PRId32,niceness);
	// this returns false and sets g_errno on error
	rdb->addList ( collnum , &list , niceness);
	// if titledb, add tfndb recs to map the title recs
	//if ( ! g_errno && rdb == g_titledb.getRdb() && injecting ) 
	//	updateTfndb ( coll , &list , true, 0);
	// but if deleting a "new" and unforced record from spiderdb
	// then only delete tfndb record if it was tfn=255
	//if ( ! g_errno && rdb == g_spiderdb.getRdb() )
	//	updateTfndb2 ( coll , &list , false );
	// retry on some errors
	addedList ( slot , rdb );
}

Example #14

0

Show file

File: Revdb.cpp Project: BlaBlaNet/open-source-search-engine

/*
bool Revdb::addColl ( char *coll, bool doVerify ) {
	if ( ! m_rdb.addColl ( coll ) ) return false;
	if ( ! doVerify ) return true;
	// verify
	if ( verify(coll) ) return true;
	// if not allowing scale, return false
	if ( ! g_conf.m_allowScale ) return false;
	// otherwise let it go
	log ( "db: Verify failed, but scaling is allowed, passing." );
	return true;
}
*/
bool Revdb::verify ( char *coll ) {
	log ( LOG_INFO, "db: Verifying Revdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//int32_t minRecSizes = 64000;
	CollectionRec *cr = g_collectiondb.getRec(coll);

	if ( ! msg5.getList ( RDB_REVDB   ,
			      cr->m_collnum       ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      1024*1024     , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          , // cache key ptr
			      0             , // retry num
			      -1            , // maxRetries
			      true          , // compensate for merge
			      -1LL          , // sync point
			      &msg5b        ,
			      false         )) {
		g_threads.enableThreads();
		return log("db: HEY! it did not block");
	}

	int32_t count = 0;
	int32_t got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//uint32_t groupId = getGroupId ( RDB_REVDB , &k );
		//if ( groupId == g_hostdb.m_groupId ) got++;
		uint32_t shardNum = getShardNum( RDB_REVDB , &k );
		if ( shardNum == getMyShardNum() ) got++;
	}
	if ( got != count ) {
		log ("db: Out of first %"INT32" records in revdb, "
		     "only %"INT32" belong to our group.",count,got);
		// exit if NONE, we probably got the wrong data
		if ( count > 10 && got == 0 ) 
			log("db: Are you sure you have the right "
				   "data in the right directory? "
				   "Exiting.");
		log ( "db: Exiting due to Revdb inconsistency." );
		g_threads.enableThreads();
		return g_conf.m_bypassValidation;
	}

	log ( LOG_INFO, "db: Revdb passed verification successfully for %"INT32""
			" recs.", count );
	// DONE
	g_threads.enableThreads();
	return true;
}

Example #15

0

Show file

File: Placedb.cpp Project: BILObilo/open-source-search-engine

/*
bool Placedb::addColl ( char *coll, bool doVerify ) {
	if ( ! m_rdb.addColl ( coll ) ) return false;
	if ( ! doVerify ) return true;
	// verify
	if ( verify(coll) ) return true;
	// if not allowing scale, return false
	if ( ! g_conf.m_allowScale ) return false;
	// otherwise let it go
	log ( "db: Verify failed, but scaling is allowed, passing." );
	return true;
}
*/
bool Placedb::verify ( char *coll ) {
	log ( LOG_INFO, "db: Verifying Placedb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	startKey.setMin();
	key_t endKey;
	endKey.setMax();
	
	if ( ! msg5.getList ( RDB_PLACEDB     ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      true          ,
			      false         )) { // allow page cache?
		g_threads.enableThreads();
		return log("db: HEY! it did not block");
	}

	long count = 0;
	long got   = 0;
	bool printedKey = false;
	bool printedZeroKey = false;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		// verify the group
		uint32_t shardNum = getShardNum ( RDB_PLACEDB , (char *)&k );
		if ( shardNum == getMyShardNum() )
			got++;
		else if ( !printedKey ) {
			log ("db: Found bad key in list (only printing once): "
			      "%lx %llx", k.n1, k.n0 );
			printedKey = true;
		}
		if ( k.n1 == 0 && k.n0 == 0 ) {
			if ( !printedZeroKey ) {
				log ( "db: Found Zero key in list, passing. "
				      "(only printing once)." );
				printedZeroKey = true;
			}
			// pass if we didn't match above
			if ( shardNum != getMyShardNum() )
				got++;
		}
	}
	if ( got != count ) {
		log("db: Out of first %li records in placedb, only %li passed "
		     "verification.",count,got);
		// exit if NONE, we probably got the wrong data
		if ( got == 0 ) log("db: Are you sure you have the "
					   "right "
					   "data in the right directory? "
					   "Exiting.");
		g_threads.enableThreads();
		// if only one let it slide, i saw this happen on gb1 cluster
		if ( got - count >= -1 && got - count <= 1 )
			return true;
		log ( "db: Exiting due to Placedb inconsistency." );
		return g_conf.m_bypassValidation;
	}

	log ( LOG_INFO, "db: Placedb passed verification successfully for %li "
			"recs.", count );
	// DONE
	g_threads.enableThreads();
	return true;
}

Example #16

0

Show file

File: Titledb.cpp Project: privacore/open-source-search-engine

/*
bool Titledb::addColl ( char *coll, bool doVerify ) {
	if ( ! m_rdb.addColl ( coll ) ) return false;
	if ( ! doVerify ) return true;
	// verify
	if ( verify(coll) ) return true;
	// if not allowing scale, return false
	if ( ! g_conf.m_allowScale ) return false;
	// otherwise let it go
	log ( "db: Verify failed, but scaling is allowed, passing." );
	return true;
}
*/
bool Titledb::verify(const char *coll) {
	log ( LOG_DEBUG, "db: Verifying Titledb for coll %s...", coll );

	Msg5 msg5;
	RdbList list;
	key96_t startKey;
	key96_t endKey;
	startKey.setMin();
	endKey.setMax();
	//int32_t minRecSizes = 64000;
	const CollectionRec *cr = g_collectiondb.getRec(coll);

	if ( ! msg5.getList ( RDB_TITLEDB   ,
			      cr->m_collnum       ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      1024*1024     , // minRecSizes   ,
			      true          , // includeTree   ,
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          , // cache key ptr
			      0             , // retry num
			      -1            , // maxRetries
			      -1LL          , // sync point
			      false         , // isRealMerge
			      true))          // allowPageCache
	{
		log(LOG_DEBUG, "db: HEY! it did not block");
		return false;
	}

	int32_t count = 0;
	int32_t got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key96_t k = list.getCurrentKey();
		// skip negative keys
		if ( (k.n0 & 0x01) == 0x00 ) continue;
		count++;
		//uint32_t groupId = getGroupId ( RDB_TITLEDB , &k );
		//if ( groupId == g_hostdb.m_groupId ) got++;
		uint32_t shardNum = getShardNum ( RDB_TITLEDB, &k );
		if ( shardNum == getMyShardNum() ) got++;
	}
	if ( got != count ) {
		// tally it up
		g_rebalance.m_numForeignRecs += count - got;
		log ("db: Out of first %" PRId32" records in titledb, "
		     "only %" PRId32" belong to our shard. c=%s",count,got,coll);
		// exit if NONE, we probably got the wrong data
		if ( count > 10 && got == 0 ) 
			log("db: Are you sure you have the right "
				   "data in the right directory? "
			    "coll=%s "
			    "Exiting.",
			    coll);
		// repeat with log
		for ( list.resetListPtr() ; ! list.isExhausted() ;
		      list.skipCurrentRecord() ) {
			key96_t k = list.getCurrentKey();
			//uint32_t groupId = getGroupId ( RDB_TITLEDB,&k);
			//int32_t groupNum = g_hostdb.getGroupNum(groupId);
			int32_t shardNum = getShardNum ( RDB_TITLEDB, &k );
			log("db: docid=%" PRId64" shard=%" PRId32,
			    getDocId(&k),shardNum);
		}
		//if ( g_conf.m_bypassValidation ) return true;
		//if ( g_conf.m_allowScale ) return true;
		// don't exit any more, allow it, but do not delete
		// recs that belong to different shards when we merge now!
		log ( "db: db shards unbalanced. "
		      "Click autoscale in master controls.");
		//return false;
		return true;
	}

	log ( LOG_DEBUG, "db: Titledb passed verification successfully for %" PRId32
			" recs.", count );
	// DONE
	return true;
}

Example #17

0

Show file

File: Indexdb.cpp Project: BKJackson/open-source-search-engine

bool Indexdb::verify ( char *coll ) {
	return true;
	log ( LOG_INFO, "db: Verifying Indexdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//long minRecSizes = 64000;
	
	if ( ! msg5.getList ( RDB_INDEXDB   ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      true          )) {
		g_threads.enableThreads();
		return log("db: HEY! it did not block");
	}

	long count = 0;
	long got   = 0;
	bool printedKey = false;
	bool printedZeroKey = false;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
		unsigned long groupId = getGroupId ( RDB_INDEXDB , &k );
		if ( groupId == g_hostdb.m_groupId ) got++;
		else if ( !printedKey ) {
			log ( "db: Found bad key in list (only printing once): "
			      "%lx %llx", k.n1, k.n0 );
			printedKey = true;
		}
		if ( k.n1 == 0 && k.n0 == 0 ) {
			if ( !printedZeroKey ) {
				log ( "db: Found Zero key in list, passing. "
				      "(only printing once)." );
				printedZeroKey = true;
			}
			if ( groupId != g_hostdb.m_groupId )
				got++;
		}
	}
	if ( got != count ) {
		log ("db: Out of first %li records in indexdb, only %li belong "
		     "to our group.",count,got);
		// exit if NONE, we probably got the wrong data
		if ( got == 0 ) log("db: Are you sure you have the "
				    "right "
				    "data in the right directory? "
				    "Exiting.");
		log ( "db: Exiting due to Indexdb inconsistency." );
		g_threads.enableThreads();
		return g_conf.m_bypassValidation;
	}
	log ( LOG_INFO, "db: Indexdb passed verification successfully for %li "
			"recs.", count );
	// DONE
	g_threads.enableThreads();
	return true;
}

Example #18

0

Show file

File: Msg22.cpp Project: FlavioFalcao/open-source-search-engine

void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {

	State22 *st = (State22 *)state;
	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	// shortcut
	Msg22Request *r = st->m_r;
	// breathe
	QUICKPOLL(r->m_niceness);

	// send error reply on error
	if ( g_errno ) { 
	hadError:
		log("db: Had error getting title record from titledb: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		us->sendErrorReply ( st->m_slot , g_errno ); 
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st ); 
		return ;
	}

	// convenience var
	RdbList *tlist = &st->m_tlist;

	// set probable docid
	long long pd = 0LL;
	if ( r->m_url[0] ) {
		pd = g_titledb.getProbableDocId(r->m_url);
		if ( pd != st->m_pd ) { 
			log("db: crap probable docids do not match! u=%s",
			    r->m_url);
			g_errno = EBADENGINEER;
			goto hadError;
		}
		// sanity
		//if ( pd != st->m_pd ) { char *xx=NULL;*xx=0; }
	}

	// the probable docid is the PREFERRED docid in this case
	if ( r->m_getAvailDocIdOnly ) pd = st->m_r->m_docId;

	// . these are both meant to be available docids
	// . if ad2 gets exhausted we use ad1
	long long ad1 = st->m_docId1;
	long long ad2 = pd;


	bool docIdWasFound = false;

	// scan the titleRecs in the list
	for ( ; ! tlist->isExhausted() ; tlist->skipCurrentRecord ( ) ) {
		// breathe
		QUICKPOLL ( r->m_niceness );
		// get the rec
		char *rec     = tlist->getCurrentRec();
		long  recSize = tlist->getCurrentRecSize();
		// get that key
		key_t *k = (key_t *)rec;
		// skip negative recs, first one should not be negative however
		if ( ( k->n0 & 0x01 ) == 0x00 ) continue;

		// get docid of that titlerec
		long long dd = g_titledb.getDocId(k);

		if ( r->m_getAvailDocIdOnly ) {
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			continue;
		}
		// if we had a url make sure uh48 matches
		else if ( r->m_url[0] ) {
			// get it
			long long uh48 = g_titledb.getUrlHash48(k);
			// sanity check
			if ( st->m_uh48 == 0 ) { char *xx=NULL;*xx=0; }
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			// we must match this exactly
			if ( uh48 != st->m_uh48 ) continue;
		}
		// otherwise, check docid
		else {
			// compare that
			if ( r->m_docId != dd ) continue;
		}

		// flag that we matched m_docId
		docIdWasFound = true;

		// do not set back titlerec if just want avail docid
		//if ( r->m_getAvailDocIdOnly ) continue;

		// ok, if just "checking tfndb" no need to go further
		if ( r->m_justCheckTfndb ) {
			// send back a good reply (empty means found!)
			us->sendReply_ass ( NULL,0,NULL,0,st->m_slot);
			// don't forget to free the state
			mdelete ( st , sizeof(State22) , "Msg22" );
			delete ( st );
			return;
		}

		// use rec as reply
		char *reply = rec;

		// . send this rec back, it's a match
		// . if only one rec in list, steal the list's memory
		if ( recSize != tlist->getAllocSize() ) {
			// otherwise, alloc space for the reply
			reply = (char *)mmalloc (recSize, "Msg22");
			if ( ! reply ) goto hadError;
			memcpy ( reply , rec , recSize );
		}
		// otherwise we send back the whole list!
		else {
			// we stole this from list
			tlist->m_ownData = false;
		}
		// off ya go
		us->sendReply_ass(reply,recSize,reply,recSize,st->m_slot);
		// don't forget to free the state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		// all done
		return;
	}

	// maybe no available docid if we breached our range
	if ( ad1 >= pd           ) ad1 = 0LL;
	if ( ad2 >  st->m_docId2 ) ad2 = 0LL;
	// get best
	long long ad = ad2;
	// but wrap around if we need to
	if ( ad == 0LL ) ad = ad1;
	// if "docId" was unmatched that should be the preferred available
	// docid then...
	//if(! docIdWasFound && r->m_getAvailDocIdOnly && ad != r->m_docId ) { 
	//	char *xx=NULL;*xx=0; }
	// remember it. this might be zero if none exist!
	st->m_availDocId = ad;
	// note it
	if ( ad == 0LL && (r->m_getAvailDocIdOnly || r->m_url[0]) ) 
		log("msg22: avail docid is 0 for pd=%lli!",pd);

	// . ok, return an available docid
	if ( r->m_url[0] || r->m_justCheckTfndb || r->m_getAvailDocIdOnly ) {
		// store docid in reply
		char *p = st->m_slot->m_tmpBuf;
		// send back the available docid
		*(long long *)p = st->m_availDocId;
		// send it
		us->sendReply_ass ( p , 8 , p , 8 , st->m_slot );
		// don't forget to free state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		return;
	}

	// not found! and it was a docid based request...
	log("msg22: could not find title rec for docid %llu",r->m_docId);
	g_errno = ENOTFOUND;
	goto hadError;
}

Example #19

0

Show file

File: Monitordb.cpp Project: FlavioFalcao/open-source-search-engine

/*
bool Monitordb::addColl ( char *coll, bool doVerify ) {
	if ( ! m_rdb.addColl ( coll ) ) return false;
	if ( ! doVerify ) return true;
	// verify
	if ( verify(coll) ) return true;
	// if not allowing scale, return false
	if ( ! g_conf.m_allowScale ) return false;
	// otherwise let it go
	log ( "db: Verify failed, but scaling is allowed, passing." );
	return true;
}
*/
bool Monitordb::verify ( char *coll ) {
	log ( LOG_INFO, "db: Verifying Monitordb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key224_t startKey;
	key224_t endKey;
	startKey.setMin();
	endKey.setMax();
	long minRecSizes = 64000;
	CollectionRec *cr = g_collectiondb.getRec(coll);

	if ( ! msg5.getList ( RDB_MONITORDB   ,
			      cr->m_collnum,
			      &list         ,
			      (char*)&startKey      ,
			      (char*)&endKey        ,
			      minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0             , // startFileNum  ,
			      -1            , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      true          )) {
		g_threads.enableThreads();
		return log("db: HEY! it did not block");
	}

	long count = 0;
	long got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key224_t k;
		list.getCurrentKey((char*)&k);
		count++;
		uint32_t shardNum = getShardNum ( RDB_MONITORDB , &k );
		if ( shardNum == getMyShardNum() ) got++;
	}
	if ( got != count ) {
		log ("db: Out of first %li records in Monitordb , "
		     "only %li belong to our group.",count,got);

		/*
		// repeat with log
		for ( list.resetListPtr() ; ! list.isExhausted() ;
		      list.skipCurrentRecord() ) {

			key224_t k;
			list.getCurrentKey((char*)&k);
			uint32_t shardNum = getShardNum ( RDB_MONITORDB , &k );
			long groupNum = g_hostdb.getGroupNum(groupId);
			unsigned long sh32 ;
			sh32 = g_monitordb.getLinkeeSiteHash32_uk(&k);
			uint16_t sh16 = sh32 >> 19;
			log("db: sh16=0x%lx group=%li",
			    (long)sh16,groupNum);
		}
		*/


		// exit if NONE, we probably got the wrong data
		if ( got == 0 ) log("db: Are you sure you have the "
				    "right "
				    "data in the right directory? "
				    "Exiting.");
		log ( "db: Exiting due to inconsistency.");
		g_threads.enableThreads();
		return g_conf.m_bypassValidation;
	}
	log ( LOG_INFO, "db: Monitordb passed verification successfully for "
	      "%li recs.", count );
	// DONE
	g_threads.enableThreads();
	return true;
}

Example #20

0

Show file

File: get_titlerec.cpp Project: privacore/open-source-search-engine

int main(int argc, char **argv) {
	if (argc < 3) {
		print_usage(argv[0]);
		return 1;
	}

	if (strcmp(argv[1], "--h") == 0 || strcmp(argv[1], "--help") == 0 ) {
		print_usage(argv[0]);
		return 1;
	}

	g_log.m_disabled = true;

	// initialize library
	g_mem.init();
	hashinit();

	// current dir
	char path[PATH_MAX];
	realpath(argv[1], path);
	size_t pathLen = strlen(path);
	if (path[pathLen] != '/') {
		strcat(path, "/");
	}

	g_hostdb.init(-1, NULL, false, false, path);
	g_conf.init(path);

	ucInit();

	// initialize rdbs
	g_loop.init();

	g_collectiondb.loadAllCollRecs();

	g_statsdb.init();
	g_posdb.init();
	g_titledb.init();
	g_tagdb.init();
	g_spiderdb.init();
	g_doledb.init();
	g_spiderCache.init();
	g_clusterdb.init();
	g_linkdb.init();

	g_collectiondb.addRdbBaseToAllRdbsForEachCollRec();

	g_log.m_disabled = false;
	g_log.m_logPrefix = false;

	uint64_t docId = strtoul(argv[2], NULL, 10);
	logf(LOG_TRACE, "Getting titlerec for docId=%" PRIu64, docId);

	Msg5 msg5;
	RdbList list;

	key96_t startKey = Titledb::makeFirstKey(docId);
	key96_t endKey = Titledb::makeLastKey(docId);

	msg5.getList(RDB_TITLEDB, 0, &list, startKey, endKey, 500000000, true, 0, 0, -1, NULL, NULL, 0, true, NULL, 0, -1, -1LL, false, true);

	if (list.getNumRecs() != 1) {
		logf(LOG_TRACE, "Unable to find titlerec for docId=%" PRIu64, docId);
		cleanup();
		exit(1);
	}

	XmlDoc xmlDoc;
	if (!xmlDoc.set2(list.getCurrentRec(), list.getCurrentRecSize(), "main", NULL, 0)) {
		logf(LOG_TRACE, "Unable to set XmlDoc for docId=%" PRIu64, docId);
		cleanup();
		exit(1);
	}

	logf(LOG_TRACE, "XmlDoc info");
	logf(LOG_TRACE, "\tfirstUrl   : %.*s", xmlDoc.size_firstUrl, xmlDoc.ptr_firstUrl);
	logf(LOG_TRACE, "\tredirUrl   : %.*s", xmlDoc.size_redirUrl, xmlDoc.ptr_redirUrl);
	logf(LOG_TRACE, "\trootTitle  : %.*s", xmlDoc.size_rootTitleBuf, xmlDoc.ptr_rootTitleBuf);
//	logf(LOG_TRACE, "\timageData  :");
	logf(LOG_TRACE, "\t");
	loghex(LOG_TRACE, xmlDoc.ptr_utf8Content, xmlDoc.size_utf8Content, "\tutf8Content:");
	logf(LOG_TRACE, "\tsite       : %.*s", xmlDoc.size_site, xmlDoc.ptr_site);

	logf(LOG_TRACE, "\tlinkInfo");
	LinkInfo* linkInfo = xmlDoc.getLinkInfo1();
	logf(LOG_TRACE, "\t\tm_numGoodInlinks     : %d", linkInfo->m_numGoodInlinks);
	logf(LOG_TRACE, "\t\tm_numInlinksInternal : %d", linkInfo->m_numInlinksInternal);
	logf(LOG_TRACE, "\t\tm_numStoredInlinks   : %d", linkInfo->m_numStoredInlinks);

	int i = 0;
	for (Inlink *inlink = linkInfo->getNextInlink(NULL); inlink; inlink = linkInfo->getNextInlink(inlink)) {
		logf(LOG_TRACE, "\t\tinlink #%d", i++);
		logf(LOG_TRACE, "\t\t\tdocId        : %" PRIu64, inlink->m_docId);
		logf(LOG_TRACE, "\t\t\turl          : %s", inlink->getUrl());
		logf(LOG_TRACE, "\t\t\tlinktext     : %s", inlink->getLinkText());
		logf(LOG_TRACE, "\t\t\tcountry      : %s", getCountryCode(inlink->m_country));
		logf(LOG_TRACE, "\t\t\tlanguage     : %s", getLanguageAbbr(inlink->m_language));
	}

	loghex(LOG_TRACE, xmlDoc.ptr_linkdbData, xmlDoc.size_linkdbData, "\tlinkdbData");

	logf(LOG_TRACE, "\ttagRec");
	TagRec *tagRec = xmlDoc.getTagRec();
	for (Tag *tag = tagRec->getFirstTag(); tag; tag = tagRec->getNextTag(tag)) {
		SafeBuf sb;
		tag->printDataToBuf(&sb);
		logf(LOG_TRACE, "\t\t%-12s: %s", getTagStrFromType(tag->m_type), sb.getBufStart());
	}

	logf(LOG_TRACE, "\t");

	logf(LOG_TRACE, "Links info");
	g_log.m_disabled = true;
	Links *links = xmlDoc.getLinks();
	g_log.m_disabled = false;
	for (int i = 0; i < links->getNumLinks(); ++i) {
		logf(LOG_TRACE, "\tlink      : %.*s", links->getLinkLen(i), links->getLinkPtr(i));

	}
	cleanup();

	return 0;
}