C++ (Cpp) CollectionRec примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: CollectionRec

Примеров на hotexamples.com: 4

C++ (Cpp) CollectionRec - 4 примера найдено. Это лучшие примеры C++ (Cpp) кода для CollectionRec, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

hasSearchPermission(2)

hasPermission(1)

isAssassin(1)

save(1)

Пример #1

Показать файл

Файл: PageGet.cpp Проект: firatkarakusoglu/open-source-search-engine

// . returns false if blocked, true otherwise
// . sets g_errno on error
bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
	// get the collection
	long  collLen = 0;
	char *coll    = r->getString("c",&collLen);
	if ( ! coll || ! coll[0] ) {
		//coll    = g_conf.m_defaultColl;
		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
		collLen = gbstrlen(coll);
	}
	// ensure collection not too big
	if ( collLen >= MAX_COLL_LEN ) { 
		g_errno = ECOLLTOOBIG; 
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
	}
	// get the collection rec
	CollectionRec *cr = g_collectiondb.getRec ( coll );
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		log("query: Archived copy retrieval failed. "
		    "No collection record found for "
		    "collection \"%s\".",coll);
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
	}
	// does this collection ban this IP?
	if ( ! cr->hasSearchPermission ( s ) ) {
		g_errno = ENOPERM;
		//log("PageGet::sendDynamicReply0: permission denied for %s",
		//    iptoa(s->m_ip) );
		g_msg = " (error: permission denied)";
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
	}
	// . get fields from cgi field of the requested url
	// . get the search query
	long  qlen = 0;
	char *q = r->getString ( "q" , &qlen , NULL /*default*/);
	// ensure query not too big
	if ( qlen >= MAX_QUERY_LEN-1 ) { 
		g_errno=EQUERYTOOBIG; 
		return g_httpServer.sendErrorReply (s,500 ,mstrerror(g_errno));
	}
	// the docId
	long  long docId = r->getLongLong ( "d" , 0LL /*default*/ );
	// get url
	char *url = r->getString ( "u",NULL);

	if ( docId == 0 && ! url ) {
		g_errno = EMISSINGINPUT;
		return g_httpServer.sendErrorReply (s,500 ,mstrerror(g_errno));
	}


	// . should we do a sequential lookup?
	// . we need to match summary here so we need to know this
	//bool seq = r->getLong ( "seq" , false );
	// restrict to root file?
	bool rtq = r->getLong ( "rtq" , false );

	// . get the titleRec
	// . TODO: redirect client to a better http server to save bandwidth
	State2 *st ;
	try { st = new (State2); }
	catch (... ) {
		g_errno = ENOMEM;
		log("PageGet: new(%i): %s", 
		    (int)sizeof(State2),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
	mnew ( st , sizeof(State2) , "PageGet1" );
	// save the socket and if Host: is local in the Http request Mime
	st->m_socket   = s;
	st->m_isAdmin  = g_conf.isCollAdmin ( s , r );
	st->m_isLocal  = r->isLocal();
	st->m_docId    = docId;
	st->m_printed  = false;
	// include header ... "this page cached by Gigablast on..."
	st->m_includeHeader     = r->getLong ("ih"    , true  );
	st->m_includeBaseHref   = r->getLong ("ibh"   , false );
	st->m_queryHighlighting = r->getLong ("qh"    , true  );
	st->m_strip             = r->getLong ("strip" , 0     );
	st->m_clickAndScroll    = r->getLong ("cas"   , true  );
	st->m_cnsPage           = r->getLong ("cnsp"  , true );
	char *langAbbr = r->getString("qlang",NULL);
	st->m_langId = langUnknown;
	if ( langAbbr ) {
		uint8_t langId = getLangIdFromAbbr ( langAbbr );
		st->m_langId = langId;
	}
	strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 );
	// store query for query highlighting
	st->m_netTestResults    = r->getLong ("rnettest", false );
	//if( st->m_netTestResults ) {
	//	mdelete ( st , sizeof(State2) , "PageGet1" );
	//	delete ( st );
	//	return sendPageNetResult( s );
	//}
	if ( q && qlen > 0 ) strcpy ( st->m_q , q );
	else                 st->m_q[0] = '\0';
	st->m_qlen = qlen;
	//st->m_seq      = seq;
	st->m_rtq      = rtq;
	st->m_boolFlag = r->getLong ("bq", 2 /*default is 2*/ );
	st->m_isBanned = false;
	st->m_noArchive = false;
	st->m_socket = s;
	st->m_format = r->getReplyFormat();
	// default to 0 niceness
	st->m_niceness = 0;
	st->m_r.copy ( r );
	//st->m_cr = cr;
	st->m_printDisclaimer = true;
	if ( st->m_cnsPage )
		st->m_printDisclaimer = false;
	if ( st->m_strip ) // ! st->m_evbits.isEmpty() ) 
		st->m_printDisclaimer = false;
	
	// should we cache it?
	char useCache = r->getLong ( "usecache" ,  1 );
	char rcache   = r->getLong ( "rcache"   ,  1 );
	char wcache   = r->getLong ( "wcache"   ,  1 );
	long cacheAge = r->getLong ( "cacheAge" , 60*60 ); // default one hour
	if ( useCache == 0 ) { cacheAge = 0; wcache = 0; }
	if ( rcache   == 0 )   cacheAge = 0; 
	// . fetch the TitleRec
	// . a max cache age of 0 means not to read from the cache
	XmlDoc *xd = &st->m_xd;
	// url based?
	if ( url ) {
		SpiderRequest sreq;
		sreq.reset();
		strcpy(sreq.m_url, url );
		sreq.setDataSize();
		// this returns false if "coll" is invalid
		if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) ) 
			goto hadSetError;
	}
	// . when getTitleRec() is called it will load the old one
	//   since XmlDoc::m_setFromTitleRec will be true
	// . niceness is 0
	// . use st->m_coll since XmlDoc just points to it!
	// . this returns false if "coll" is invalid
	else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) {
	hadSetError:
		mdelete ( st , sizeof(State2) , "PageGet1" );
		delete ( st );
		g_errno = ENOMEM;
		log("PageGet: set3: %s", mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
	}
	// if it blocks while it loads title rec, it will re-call this routine
	xd->setCallback ( st , processLoopWrapper );
	// good to go!
	return processLoop ( st );
}

Пример #2

Показать файл

Файл: Collectiondb.cpp Проект: BKJackson/open-source-search-engine

// . reset a collection
// . returns false if failed
bool Collectiondb::resetColl ( char *coll , bool resetTurkdb ) {
	// ensure it's not NULL
	if ( ! coll ) {
		log(LOG_LOGIC,"admin: Collection name to delete is NULL.");
		return false;
	}
	// now must be "test" only for now
	if ( strcmp(coll,"test") ) { char *xx=NULL;*xx=0; }
	// no spiders can be out. they may be referencing the CollectionRec
	// in XmlDoc.cpp... quite likely.
	if ( g_conf.m_spideringEnabled ||
	     g_spiderLoop.m_numSpidersOut > 0 ) {
		log("admin: Can not delete collection while "
		    "spiders are enabled or active.");
		return false;
	}
	// do not allow this if in repair mode
	if ( g_repairMode > 0 ) {
		log("admin: Can not delete collection while in repair mode.");
		return false;
	}
	// get the CollectionRec for "test"
	CollectionRec *cr = getRec ( "test" );
	// must be there. if not, we create test i guess
	if ( ! cr ) { 
		log("db: could not get test coll rec");
		char *xx=NULL;*xx=0; 
	}

	// make sure an update not in progress
	if ( cr->m_inProgress ) { char *xx=NULL;*xx=0; }

	CollectionRec tmp; 

	// copy it to "tmp"
	long size = (char *)&(cr->m_END_COPY) - (char *)cr;
	// do not copy the hashtable crap since you will have to re-init it!
	memcpy ( &tmp , cr , size ); // sizeof(CollectionRec) );

	// delete the test coll now
	if ( ! deleteRec ( "test" , resetTurkdb  ) ) 
		return log("admin: reset coll failed");

	// make a collection called "test2" so that we copy "test"'s parms
	bool status = addRec ( "test" , 
			       NULL , 
			       0 ,
			       true , // bool isNew ,
			       (collnum_t) -1 ,
			       // not a dump
			       false ,
			       // do not save it!
			       false );
	// bail on error
	if ( ! status ) return log("admin: failed to add new coll for reset");
	// get its rec
	CollectionRec *nr = getRec ( "test" );
	// must be there
	if ( ! nr ) { char *xx=NULL;*xx=0; }
	// save this though, this might have changed!
	collnum_t cn = nr->m_collnum;
	// overwrite its rec
	memcpy ( nr , &tmp , size ) ; // sizeof(CollectionRec) );
	// put that collnum back
	nr->m_collnum = cn;
	// set the flag
	m_needsSave = true;
	// save it again after copy
	nr->save();

	// and clear the robots.txt cache in case we recently spidered a
	// robots.txt, we don't want to use it, we want to use the one we
	// have in the test-parser subdir so we are consistent
	RdbCache *robots = Msg13::getHttpCacheRobots();
	RdbCache *others = Msg13::getHttpCacheOthers();
	robots->clear ( cn );
	others->clear ( cn );

	//g_templateTable.reset();
	//g_templateTable.save( g_hostdb.m_dir , "turkedtemplates.dat" );

	// repopulate CollectionRec::m_sortByDateTable. should be empty
	// since we are resetting here.
	//initSortByDateTable ( coll );

	// done
	return true;
}

Пример #3

Показать файл

Файл: PageCatdb.cpp Проект: BILObilo/open-source-search-engine

// . returns false if blocked, true otherwise
// . sets g_errno on error
bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
	// are we the admin?
	bool isAdmin    = g_collectiondb.isAdmin    ( r , s );
	// get the collection record
	CollectionRec *cr = g_collectiondb.getRec ( r );
	if ( ! cr ) {
		log("admin: No collection record found "
		    "for specified collection name. Could not add sites to "
		    "tagdb. Returning HTTP status of 500.");
		return g_httpServer.sendErrorReply ( s , 500 ,
						  "collection does not exist");
	}
	bool isAssassin = cr->isAssassin ( s->m_ip );
	if ( isAdmin ) isAssassin = true;
	// bail if permission denied
	if ( ! isAssassin && ! cr->hasPermission ( r , s ) ) {
		log("admin: Bad collection name or password. Could not add "
		    "sites to tagdb. Permission denied.");
		return sendPageLogin ( s , r , 
						    "Collection name or "
						    "password is incorrect");
	}
	// get the collection
	long collLen = 0;
	char *coll   = r->getString("c", &collLen, NULL);
	// check for generate catdb command
	long genCatdb = r->getLong("gencatdb", 0);
	// check for a lookup url
	long urlLen = 0;
	char *url   = r->getString("caturl", &urlLen, NULL);
	// create the State
	StateCatdb *st;
	try { st = new (StateCatdb); }
	catch ( ... ) {
		g_errno = ENOMEM;
		log("catdb: Unable to allocate %i bytes for StateCatdb",
		    sizeof(StateCatdb) );
		return true;
	}
	mnew ( st, sizeof(StateCatdb), "PageCatdb" );
	// fill the state
	st->m_socket = s;
	st->m_r.copy(r);
	// copy collection
	if (collLen > MAX_COLL_LEN) collLen = MAX_COLL_LEN - 1;
	memcpy(st->m_coll, coll, collLen);
	st->m_coll[collLen] = '\0';
	st->m_collLen = collLen;
	// defaults
	st->m_catLookup = false;
	st->m_genCatdb  = false;
	st->m_startTime = gettimeofdayInMilliseconds();
	// generate catdb if requested
	if (genCatdb == 1) {
		st->m_genCatdb = true;
		if (!st->m_msg2a.makeCatdb ( st->m_coll,
					     st->m_collLen,
					     false,
					     st,
					     sendReplyWrapper ) )
			return false;
	}
	// update catdb from .new files
	else if (genCatdb == 2) {
		st->m_genCatdb = true;
		if (!st->m_msg2a.makeCatdb ( st->m_coll,
					     st->m_collLen,
					     true,
					     st,
					     sendReplyWrapper ) )
			return false;
	}
	// lookup a url if requested
	else if (url && urlLen > 0) {
		st->m_catLookup = true;
		// set the url
		st->m_url.set(url, urlLen);
		// call msg8b to lookup in catdb
		if (!st->m_msg8b.getCatRec ( &st->m_url,
					     NULL,//st->m_coll,
					     0,//st->m_collLen,
					      true,
					      1,
					      &st->m_catRec,
					      st,
					      gotCatInfoWrapper))
					      //RDB_CATDB ) )
					      //RDB_TAGDB ) )
			return false;
	}
	// otherwise return the regular page
	return sendReply ( st );
}

Пример #4

Показать файл

Файл: PageAddUrl.cpp Проект: RevBooyah/open-source-search-engine

// . returns false if blocked, true otherwise
// . sets g_errno on error
bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
	// . get fields from cgi field of the requested url
	// . get the search query
	long  urlLen = 0;
	char *url = r->getString ( "u" , &urlLen , NULL /*default*/);

	// see if they provided a url of a file of urls if they did not
	// provide a url to add directly
	//bool isAdmin = g_collectiondb.isAdmin ( r , s );
	bool isAdmin = r->getIsLocal();
	long  ufuLen = 0;
	char *ufu = NULL;
	if ( isAdmin )
		// get the url of a file of urls (ufu)
		ufu = r->getString ( "ufu" , &ufuLen , NULL );

	// can't be too long, that's obnoxious
	if ( urlLen > MAX_URL_LEN || ufuLen > MAX_URL_LEN ) {
		g_errno = EBUFTOOSMALL;
		g_msg = " (error: url too long)";
		return g_httpServer.sendErrorReply(s,500,"url too long");
	}
	// get the collection
	long  collLen = 0;
	char *coll    = r->getString("c",&collLen);
	if ( ! coll || ! coll[0] ) {
		//coll    = g_conf.m_defaultColl;
		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
		collLen = gbstrlen(coll);
	}
	// get collection rec
	CollectionRec *cr = g_collectiondb.getRec ( coll );
	// bitch if no collection rec found
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		g_msg = " (error: no collection)";
		return g_httpServer.sendErrorReply(s,500,"no coll rec");
	}
	// . make sure the ip is not banned
	// . we may also have an exclusive list of IPs for private collections
	if ( ! cr->hasSearchPermission ( s ) ) {
		g_errno = ENOPERM;
		g_msg = " (error: permission denied)";
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
	}
	// make a new state
	State1 *st1 ;
	try { st1 = new (State1); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("PageAddUrl: new(%i): %s", 
		    sizeof(State1),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); }
	mnew ( st1 , sizeof(State1) , "PageAddUrl" );
	// save socket and isAdmin
	st1->m_socket  = s;
	st1->m_isAdmin = isAdmin;

	// assume no url buf yet, set below
	//st1->m_ubuf      = NULL;
	//st1->m_ubufAlloc = NULL;
	//st1->m_metaList  = NULL;

	// save the url
	st1->m_url[0] = '\0';
	if ( url ) {
		// normalize and add www. if it needs it
		Url uu;
		uu.set ( url , gbstrlen(url) , true );
		// remove >'s i guess and store in st1->m_url[] buffer
		st1->m_urlLen=cleanInput ( st1->m_url,
					   MAX_URL_LEN, 
					   uu.getUrl(),
					   uu.getUrlLen() );
		// point to that as the url "buf" to add
		//st1->m_ubuf      = st1->m_url;
		//st1->m_ubufSize  = urlLen;
		//st1->m_ubufAlloc = NULL; // do not free it!
	}

	// save the "ufu" (url of file of urls)
	st1->m_ufu[0] = '\0';
	st1->m_ufuLen  = ufuLen;
	memcpy ( st1->m_ufu , ufu , ufuLen );
	st1->m_ufu[ufuLen] = '\0';

	st1->m_doTuringTest = cr->m_doTuringTest;
	char *username     = g_users.getUsername(r);
	if(username) strcpy(st1->m_username,username);
	//st1->m_user    = g_pages.getUserType ( s , r );
	st1->m_spiderLinks = true;
	st1->m_strip   = true;
	//st1->m_raw = r->getLong("raw",0);

	// init state2
	for ( long i = 0; i < 5; i++ ){
		st1->m_state2[i].m_buf = NULL;
		st1->m_state2[i].m_bufLen = 0;
		st1->m_state2[i].m_bufMaxLen = 0;
	}

	// save the collection name in the State1 class
	if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
	strncpy ( st1->m_coll , coll , collLen );
	st1->m_coll [ collLen ] = '\0';

	// assume they answered turing test correctly
	st1->m_goodAnswer = true;
	// if addurl is turned off, just print "disabled" msg
	if ( ! g_conf.m_addUrlEnabled ) return sendReply ( st1 , false );
	// can also be turned off in the collection rec
	if ( ! cr->m_addUrlEnabled    ) return sendReply ( st1 , false );
	// or if in read-only mode
	if (   g_conf.m_readOnlyMode  ) return sendReply ( st1 , false );
	// cannot add if another Msg10 from here is still in progress
	if ( s_inprogress ) return sendReply ( st1 , true );
	// use now as the spiderTime

	// get ip of submitter
	//unsigned long h = ipdom ( s->m_ip );
	// . use top 2 bytes now, some isps have large blocks
	// . if this causes problems, then they can do pay for inclusion
	unsigned long h = iptop ( s->m_ip );
	long codeLen;
	char* code = r->getString("code", &codeLen);
	if(g_autoBan.hasCode(code, codeLen, s->m_ip)) {
		long uipLen = 0;
		char* uip = r->getString("uip",&uipLen);
		long hip = 0;
		//use the uip when we have a raw query to test if 
		//we can submit
		if(uip) {
			hip = atoip(uip, uipLen);
			h = iptop( hip );
		}
	}


	st1->m_strip = r->getLong("strip",0);
	// Remember, for cgi, if the box is not checked, then it is not 
	// reported in the request, so set default return value to 0
	long spiderLinks = r->getLong("spiderLinks",-1);
	// also support all lowercase like PageInject.cpp uses
	if ( spiderLinks == -1 )
		spiderLinks = r->getLong("spiderlinks",0);

	// . should we force it into spiderdb even if already in there
	// . use to manually update spider times for a url
	// . however, will not remove old scheduled spider times
	// . mdw: made force on the default
	st1->m_forceRespider = r->getLong("force",1); // 0);

	long now = getTimeGlobal();

	// . allow 1 submit every 1 hour
	// . restrict by submitter domain ip
	if ( ! st1->m_isAdmin &&
	     ! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
		// return error page
		g_errno = ETOOEARLY;
		return sendReply ( st1 , true );
	}


	//st1->m_query = r->getString( "qts", &st1->m_queryLen );


	// check it, if turing test is enabled for this collection
	if ( ! st1->m_isAdmin && cr->m_doTuringTest && 
	     ! g_turingTest.isHuman(r) )  {
		// log note so we know it didn't make it
		g_msg = " (error: bad answer)";
		//log("PageAddUrl:: addurl failed for %s : bad answer",
		//    iptoa(s->m_ip));
		st1->m_goodAnswer = false;
		return sendReply ( st1 , true /*addUrl enabled?*/ );
	}

	//if ( st1->m_queryLen > 0 )
	//	return getPages( st1 );

	// if no url given, just print a blank page
	if ( ! url ) return sendReply (  st1 , true );


	//
	// make a SpiderRequest
	//

	SpiderRequest *sreq = &st1->m_sreq;
	// reset it
	sreq->reset();
	// make the probable docid
	long long probDocId = g_titledb.getProbableDocId ( st1->m_url );
	// make one up, like we do in PageReindex.cpp
	long firstIp = (probDocId & 0xffffffff);
	// . now fill it up
	// . TODO: calculate the other values... lazy!!! (m_isRSSExt, 
	//         m_siteNumInlinks,...)
	sreq->m_isNewOutlink = 1;
	sreq->m_isAddUrl     = 1;
	sreq->m_addedTime    = now;
	sreq->m_fakeFirstIp   = 1;
	sreq->m_probDocId     = probDocId;
	sreq->m_firstIp       = firstIp;
	sreq->m_hopCount      = 0;
	// its valid if root
	Url uu; uu.set ( st1->m_url );
	if ( uu.isRoot() ) sreq->m_hopCountValid = true;
	// too big?
	//long len = st1->m_urlLen;
	// the url! includes \0
	strcpy ( sreq->m_url , st1->m_url );
	// call this to set sreq->m_dataSize now
	sreq->setDataSize();
	// make the key dude -- after setting url
	sreq->setKey ( firstIp , 0LL, false );
	// need a fake first ip lest we core!
	//sreq->m_firstIp = (pdocId & 0xffffffff);
	// how to set m_firstIp? i guess addurl can be throttled independently
	// of the other urls???  use the hash of the domain for it!
	long  dlen;
	char *dom = getDomFast ( st1->m_url , &dlen );
	// fake it for this...
	//sreq->m_firstIp = hash32 ( dom , dlen );
	// sanity
	if ( ! dom ) {
		g_errno = EBADURL;
		return sendReply ( st1 , true );
	}
	// shortcut
	Msg4 *m = &st1->m_msg4;
	// now add that to spiderdb using msg4
	if ( ! m->addMetaList ( (char *)sreq    ,
				sreq->getRecSize() ,
				coll            ,
				st1             , // state
				addedStuff      ,
				MAX_NICENESS    ,
				RDB_SPIDERDB    ) )
		// we blocked
		return false;

	// send back the reply
	return sendReply ( st1 , true );
}