C++ (Cpp) XmlDoc::indexDocの例

プログラミング言語: C++ (Cpp)

クラス/型: XmlDoc

メソッド/関数: indexDoc

hotexamples.comのコード掲載数: 2

C++ (Cpp) XmlDoc::indexDoc - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC++ (Cpp)のXmlDoc::indexDocの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

setCallback(10)

set3(5)

Children(5)

set4(4)

loadFromOldTitleRec(4)

LoadXmlFile(4)

getMetaList(3)

root(3)

printDocForProCog(2)

getUtf8Content(2)

printDoc(2)

indexDoc(2)

getMsg20Reply(2)

injectDoc(2)

set2(2)

getInlineSectionVotingBuf(2)

set20(1)

logIt(1)

clear(1)

doConsistencyTest(1)

getTitleRecBuf(1)

first_node(1)

getTitleRec(1)

getTagRec(1)

getSections(1)

getLinks(1)

getLinkInfo1(1)

getIsNoArchive(1)

getFirstUrl(1)

getCollRec(1)

getTitle(1)

コード例 #1

ファイルを表示

ファイル: PageInject.cpp プロジェクト: chushuai/open-source-search-engine

void handleRequest7 ( UdpSlot *slot , long netnice ) {

	//m_state = state;
	//m_callback = callback;

	// shortcut
	XmlDoc *xd;
	try { xd = new (XmlDoc); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("PageInject: import failed: new(%i): %s", 
		    (int)sizeof(XmlDoc),mstrerror(g_errno));
		sendReply(slot);
		return;
	}
	mnew ( xd, sizeof(XmlDoc) , "PageInject" );

	//xd->reset();
	char *titleRec = slot->m_readBuf;
	long titleRecSize = slot->m_readBufSize;

	long collnum = *(long *)titleRec;

	titleRec += 4;
	titleRecSize -= 4;

	CollectionRec *cr = g_collectiondb.m_recs[collnum];
	if ( ! cr ) {
		sendReply(slot);
		return;
	}

	// if injecting a titlerec from an import operation use set2()
	//if ( m_sbuf.length() > 0 ) {
	xd->set2 ( titleRec,//m_sbuf.getBufStart() ,
		   titleRecSize,//m_sbuf.length() ,
		   cr->m_coll ,
		   NULL, // pbuf
		   MAX_NICENESS ,
		   NULL ); // sreq
	// log it i guess
	log("inject: importing %s",xd->m_firstUrl.getUrl());
	// call this when done indexing
	//xd->m_masterState = this;
	//xd->m_masterLoop  = doneInjectingWrapper9;
	xd->m_state = xd;//this;
	xd->m_callback1  = doneInjectingWrapper10;
	xd->m_isImporting = true;
	xd->m_isImportingValid = true;
	// hack this
	xd->m_slot = slot;
	// then index it
	if ( ! xd->indexDoc() )
		// return if would block
		return;

	// all done?
	//return true;
	sendReply ( slot );
}

コード例 #2

ファイルを表示

ファイル: PageInject.cpp プロジェクト: BKJackson/open-source-search-engine

bool Msg7::inject ( char *url ,
		    long  forcedIp ,
		    char *content ,
		    long  contentLen ,
		    bool  recycleContent,
		    uint8_t contentType,
		    char *coll ,
		    bool  quickReply ,
		    char *username ,
		    char *pwd ,
		    long  niceness,
		    void *state ,
		    void (*callback)(void *state),
		    long firstIndexed,
		    long lastSpidered,
		    long hopCount,
		    char newOnly,
		    short charset,
		    char spiderLinks,
		    char deleteIt,
		    char hasMime,
		    bool doConsistencyTesting
		    ) {

	m_quickReply = quickReply;

	// store coll
	if ( ! coll ) { g_errno = ENOCOLLREC; return true; }
        long collLen = gbstrlen ( coll );
	if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
	strncpy ( m_coll , coll , collLen );
	m_coll [ collLen ] = '\0';

	// store user
	//long ulen = 0;
	//if ( username ) ulen = gbstrlen(username);
	//if ( ulen >= MAX_USER_SIZE-1 ) {g_errno = EBUFOVERFLOW; return true;}
	//if ( username ) strcpy( m_username, username );

	// store password
	//long pwdLen = 0;
	//if ( pwd ) pwdLen = gbstrlen(pwd);
	//m_pwd [ 0 ] ='\0';
	//if ( pwdLen > 31 ) pwdLen = 31;
	//if ( pwdLen > 0 ) strncpy ( m_pwd , pwd , pwdLen );
	//m_pwd [ pwdLen ] = '\0';

	// store url
	if ( ! url ) { g_errno = 0; return true; }
	long urlLen = gbstrlen(url);
	if ( urlLen > MAX_URL_LEN ) {g_errno = EBADENGINEER; return true; }
	// skip injecting if no url given! just print the admin page.
	if ( urlLen <= 0 ) return true;
	//strcpy ( m_url , url );

	if ( g_repairMode ) { g_errno = EREPAIRING; return true; }

	// send template reply if no content supplied
	if ( ! content && ! recycleContent ) {
		log("inject: no content supplied to inject command and "
		    "recycleContent is false.");
		//return true;
	}

	// clean url?
	// normalize and add www. if it needs it
	Url uu;
	uu.set ( url , gbstrlen(url) , true );
	// remove >'s i guess and store in st1->m_url[] buffer
	char cleanUrl[MAX_URL_LEN+1];
	urlLen = cleanInput ( cleanUrl,
			      MAX_URL_LEN, 
			      uu.getUrl(),
			      uu.getUrlLen() );


	// this can go on the stack since set4() copies it
	SpiderRequest sreq;
	sreq.reset();
	strcpy(sreq.m_url, cleanUrl );
	// parentdocid of 0
	long firstIp = hash32n(cleanUrl);
	if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
	sreq.setKey( firstIp,0LL, false );
	sreq.m_isInjecting   = 1; 
	sreq.m_isPageInject  = 1;
	sreq.m_hopCount      = hopCount;
	sreq.m_hopCountValid = 1;
	sreq.m_fakeFirstIp   = 1;
	sreq.m_firstIp       = firstIp;

	// shortcut
	XmlDoc *xd = &m_xd;

	// log it now
	//log("inject: injecting doc %s",cleanUrl);

	static char s_dummy[3];
	// sometims the content is indeed NULL...
	if ( newOnly && ! content ) { 
		// don't let it be NULL because then xmldoc will
		// try to download the page!
		s_dummy[0] = '\0';
		content = s_dummy;
		//char *xx=NULL;*xx=0; }
	}


	// . use the enormous power of our new XmlDoc class
	// . this returns false with g_errno set on error
	if ( //m_needsSet &&
	     ! xd->set4 ( &sreq       ,
			  NULL        ,
			  m_coll  ,
			  NULL        , // pbuf
			  // give it a niceness of 1, we have to be
			  // careful since we are a niceness of 0!!!!
			  niceness, // 1 , 
			  // inject this content
			  content ,
			  deleteIt, // false, // deleteFromIndex ,
			  forcedIp ,
			  contentType ,
			  lastSpidered ,
			  hasMime )) {
		// g_errno should be set if that returned false
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		return true;
	}
	// do not re-call the set
	//m_needsSet = false;
	// make this our callback in case something blocks
	xd->setCallback ( state , callback );

	xd->m_doConsistencyTesting = doConsistencyTesting;

	// . set xd from the old title rec if recycle is true
	// . can also use XmlDoc::m_loadFromOldTitleRec flag
	if ( recycleContent ) xd->m_recycleContent = true;

	// othercrap
	if ( firstIndexed ) {
		xd->m_firstIndexedDate = firstIndexed;
		xd->m_firstIndexedDateValid = true;
	}

	if ( lastSpidered ) {
		xd->m_spideredTime      = lastSpidered;
		xd->m_spideredTimeValid = true;
	}

	if ( hopCount != -1 ) {
		xd->m_hopCount = hopCount;
		xd->m_hopCountValid = true;
	}

	if ( charset != -1 && charset != csUnknown ) {
		xd->m_charset = charset;
		xd->m_charsetValid = true;
	}

	// avoid looking up ip of each outlink to add "firstip" tag to tagdb
	// because that can be slow!!!!!!!
	xd->m_spiderLinks = spiderLinks;
	xd->m_spiderLinks2 = spiderLinks;
	xd->m_spiderLinksValid = true;

	// . newOnly is true --> do not inject if document is already indexed!
	// . maybe just set indexCode
	xd->m_newOnly = newOnly;

	// do not re-lookup the robots.txt
	xd->m_isAllowed      = true;
	xd->m_isAllowedValid = true;
	xd->m_crawlDelay     = -1; // unknown
	xd->m_crawlDelayValid = true;

	// set this now
	g_inPageInject = true;

	// log it now
	//log("inject: indexing injected doc %s",cleanUrl);

	// . now tell it to index
	// . this returns false if blocked
	bool status = xd->indexDoc ( );

	// log it. i guess only for errors when it does not block?
	// because xmldoc.cpp::indexDoc calls logIt()
	if ( status ) xd->logIt();

	// undo it
	g_inPageInject = false;

	// note that it blocked
	//if ( ! status ) log("inject: blocked for %s",cleanUrl);

	// return false if it blocked
	return status;
}