Ejemplo n.º 1
0
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . add url page for admin, users use sendPageAddUrl() in PageRoot.cpp
bool sendPageAddUrl2 ( TcpSocket *sock , HttpRequest *hr ) {

	// or if in read-only mode
	if ( g_conf.m_readOnlyMode ) {
		g_errno = EREADONLYMODE;
		char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,500,msg);
	}

	// . get fields from cgi field of the requested url
	// . get the search query
	long  urlLen = 0;
	char *urls = hr->getString ( "urls" , &urlLen , NULL /*default*/);
	// also try "url" and "urls"
	//if ( ! url ) url = r->getString ( "url" , &urlLen , NULL );
	//if ( ! url ) url = r->getString ( "urls" , &urlLen , NULL );


	char format = hr->getReplyFormat();

	char *c = hr->getString("c");
	
	if ( ! c && (format == FORMAT_XML || format == FORMAT_JSON) ) {
		g_errno = EMISSINGINPUT;
		char *msg = "missing c parm. See /admin/api to see parms.";
		return g_httpServer.sendErrorReply(sock,500,msg);
	}

	if ( ! urls && (format == FORMAT_XML || format == FORMAT_JSON) ) {
		g_errno = EMISSINGINPUT;
		char *msg = "missing urls parm. See /admin/api to see parms.";
		return g_httpServer.sendErrorReply(sock,500,msg);
	}


	// get collection rec
	CollectionRec *cr = g_collectiondb.getRec ( hr );
	// bitch if no collection rec found
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		//g_msg = " (error: no collection)";
		char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,500,msg);
	}


	// make a new state
	GigablastRequest *gr;
	try { gr = new (GigablastRequest); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("PageAddUrl: new(%i): %s", 
		    sizeof(GigablastRequest),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(sock,500,
						   mstrerror(g_errno)); 
	}
	mnew ( gr , sizeof(GigablastRequest) , "PageAddUrl" );


	// this will fill in GigablastRequest so all the parms we need are set
	// set this. also sets gr->m_hr
	g_parms.setGigablastRequest ( sock , hr , gr );

	// if no url given, just print a blank page
	if ( ! urls ) return sendReply (  gr , true );

		


	bool status = true;

	// do not spider links for spots
	if ( ! getSpiderRequestMetaList ( urls,
					  // a safebuf
					  &gr->m_listBuf ,
					  gr->m_harvestLinks, // spiderLinks?
					  NULL ) )
		status = false;

	// empty?
	long size = gr->m_listBuf.length();
	
	// error?
	if ( ! status ) {
		// nuke it
		mdelete ( gr , sizeof(gr) , "PageAddUrl" );
		delete (gr);
		return g_httpServer.sendErrorReply(gr);
	}
	// if not list
	if ( ! size ) {
		// nuke it
		mdelete ( gr , sizeof(gr) , "PageAddUrl" );
		delete (gr);
		g_errno = EMISSINGINPUT;
		return g_httpServer.sendErrorReply(gr);
	}

	// add to spiderdb
	if ( ! gr->m_msg4.addMetaList( gr->m_listBuf.getBufStart() ,
				       gr->m_listBuf.length(),
				       cr->m_coll,
				       gr ,
				       addedUrlsToSpiderdbWrapper,
				       0 // niceness
				       ) )
		// blocked!
		return false;

	// did not block, print page!
	//addedUrlsToSpiderdbWrapper(gr);
	sendReply ( gr , true );
	return true;

	// send back the reply
	//return sendReply ( gr , true );
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . add url page for admin, users use sendPageAddUrl() in PageRoot.cpp
bool sendPageAddUrl2 ( TcpSocket *sock , HttpRequest *hr ) {

	// or if in read-only mode
	if ( g_conf.m_readOnlyMode ) {
		g_errno = EREADONLYMODE;
		const char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,500,msg);
	}

	// . get fields from cgi field of the requested url
	// . get the search query
	int32_t  urlLen = 0;
	const char *urls = hr->getString ( "urls" , &urlLen , NULL /*default*/);

	char format = hr->getReplyFormat();

	const char *c = hr->getString("c");
	
	if ( ! c && (format == FORMAT_XML || format == FORMAT_JSON) ) {
		g_errno = EMISSINGINPUT;
		const char *msg = "missing c parm. See /admin/api to see parms.";
		return g_httpServer.sendErrorReply(sock,500,msg);
	}

	if ( ! urls && (format == FORMAT_XML || format == FORMAT_JSON) ) {
		g_errno = EMISSINGINPUT;
		const char *msg = "missing urls parm. See /admin/api to see parms.";
		return g_httpServer.sendErrorReply(sock,500,msg);
	}


	// get collection rec
	CollectionRec *cr = g_collectiondb.getRec ( hr );
	// bitch if no collection rec found
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		const char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,500,msg);
	}


	// make a new state
	GigablastRequest *gr;
	try { gr = new (GigablastRequest); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log( LOG_WARN, "PageAddUrl: new(%i): %s", (int)sizeof(GigablastRequest),mstrerror(g_errno) );
		return g_httpServer.sendErrorReply(sock, 500, mstrerror(g_errno));
	}
	mnew ( gr , sizeof(GigablastRequest) , "PageAddUrl" );


	// this will fill in GigablastRequest so all the parms we need are set
	// set this. also sets gr->m_hr
	g_parms.setGigablastRequest ( sock , hr , gr );

	// if no url given, just print a blank page
	if ( ! urls ) return sendReply (  gr );

	// do not spider links for spots
	bool status = getSpiderRequestMetaList ( (char*)urls, &gr->m_listBuf , gr->m_harvestLinks, NULL );
	int32_t size = gr->m_listBuf.length();
	
	// error / not list
	if ( ! status || !size ) {
		// nuke it
		if ( !size ) {
			g_errno = EMISSINGINPUT;
		}

		bool rc = g_httpServer.sendErrorReply(gr);
		mdelete ( gr , sizeof(gr) , "PageAddUrl" );
		delete gr;
		return rc;
	}

	// add to spiderdb
	if ( ! gr->m_msg4.addMetaList( &(gr->m_listBuf), cr->m_collnum, gr, addedUrlsToSpiderdbWrapper, 0 ) ) {
		// blocked!
		return false;
	}

	// did not block, print page!
	sendReply ( gr );
	return true;
}