bool processLoop ( void *state ) {
	// cast it
	State8 *st = (State8 *)state;
	// get the xmldoc
	XmlDoc *xd = &st->m_xd;

	// error?
	if ( g_errno ) return sendErrorReply ( st , g_errno );

	// shortcut
	SafeBuf *xbuf = &st->m_xbuf;

	if ( st->m_u && st->m_u[0] ) {
		// . save the ips.txt file if we are the test coll
		// . saveTestBuf() is a function in Msge1.cpp
		CollectionRec *cr = xd->getCollRec();
		if ( xd && cr && cr->m_coll && !strcmp(cr->m_coll,"qatest123"))
			// use same dir that XmlDoc::getTestDir() would use
			//saveTestBuf ( "test-page-parser" );
			saveTestBuf("qa");
		// now get the meta list, in the process it will print out a 
		// bunch of junk into st->m_xbuf
		char *metalist = xd->getMetaList ( );
		if ( ! metalist ) return sendErrorReply ( st , g_errno );
		// return false if it blocked
		if ( metalist == (void *)-1 ) return false;
		// for debug...
		if ( ! xd->m_indexCode ) xd->doConsistencyTest ( false );
		// print it out
		xd->printDoc( xbuf );
	}

	// print reason we can't analyze it (or index it)
	//if ( st->m_indexCode != 0 ) {
	//	xbuf->safePrintf ("<br><br><b>indexCode: %s</b>\n<br>", 
	//			  mstrerror(st->m_indexCode));
	//}

	// we are done
	g_inPageParser = false;

	// print the final tail
	//p += g_httpServer.printTail ( p , pend - p );

	//log("parser: send sock=%li",st->m_s->m_sd);
	
	// now encapsulate it in html head/tail and send it off
	bool status = g_httpServer.sendDynamicPage( st->m_s , 
						    xbuf->getBufStart(), 
						    xbuf->length() ,
						    -1, //cachtime
						    false ,//postreply?
						    NULL, //ctype
						    -1 , //httpstatus
						    NULL,//cookie
						    "utf-8");
	// delete the state now
	if ( st->m_freeIt ) {
		mdelete ( st , sizeof(State8) , "PageParser" );
		delete (st);
	}
	// return the status
	return status;
}
Пример #2
0
// this should be called when all docs have finished spidering
void Test::stopIt ( ) {

	// sanity
	if ( m_isAdding ) { char *xx=NULL;*xx=0; }
	// flag that we are done
	m_isRunning = false;

	// print time
	log("test: took %lli ms to complete injections.",
	    gettimeofdayInMilliseconds() - m_testStartTime );

	// get this before setting testParserEnabled to false
	char *testDir = g_test.getTestDir();

	// turn this off now too
	g_conf.m_testParserEnabled = false;
	g_conf.m_testSpiderEnabled = false;



	// save all!
	bool disabled = g_threads.m_disabled;
	g_threads.disableThreads();
	// save it blocking style
	g_process.save();
	if ( ! disabled ) g_threads.enableThreads();

	// save ips.txt
	saveTestBuf ( testDir );

	log("test: test completed. making qa.html");

	//
	//
	// NOW MAKE THE qa.html FILE
	//
	//

	// only analyze up to last 7 runs
	long start = m_runId - 7;
	if ( start < 0 ) start = 0;

	SafeBuf sb;
	sb.safePrintf("<table border=1>\n");
	sb.safePrintf("<tr>"
		      "<td><b><nobr>run id</nobr></b></td>"
		      "<td><b><nobr>conf diff</nobr></b></td>"
		      "<td><b><nobr>coll diff</nobr></b></td>"
		      "<td><b><nobr>run info</nobr></b></td>"
		      "</tr>\n");

	// take diffs between this run and the last run for confparms
	for ( long i = m_runId ; i > start ; i-- ) {
		// shortcut
		char *dir = g_hostdb.m_dir;
		// make diff filename
		char diff1[200];
		sprintf(diff1,"%s/%s/run.%li.confparms.txt.diff",dir,
			testDir,i);
		File f1;
		f1.set(diff1);
		if ( ! f1.doesExist() ) {
			char df1[200];
			char df2[200];
			sprintf(df1,"%s/%s/run.%li.confparms.txt",dir,
				testDir,i);
			sprintf(df2,"%s/%s/run.%li.confparms.txt",dir,
				testDir,i-1);
			// do the diff
			char cmd[600];
			sprintf(cmd,"diff %s %s > %s",df1,df2,diff1);
			log("test: system(\"%s\")",cmd);
			system (cmd);
		}
		long fs1 = f1.getFileSize();
		sb.safePrintf("<tr><td>%li</td><td>%li</td>", i,fs1);

		// make diff filename
		char diff2[200];
		sprintf(diff2,"%s/%s/run.%li.collparms.txt.diff",dir,
			testDir,i);
		File f2;
		f2.set(diff2);
		if ( ! f2.doesExist() ) {
			char df1[200];
			char df2[200];
			sprintf(df1,"%s/%s/run.%li.collparms.txt",dir,
				testDir,i);
			sprintf(df2,"%s/%s/run.%li.collparms.txt",dir,
				testDir,i-1);
			// do the diff
			char cmd[600];
			sprintf(cmd,"diff %s %s > %s",df1,df2,diff2);
			log("test: system(\"%s\")",cmd);
			system (cmd);
		}
		long fs2 = f2.getFileSize();
		sb.safePrintf("<td>%li</td>", fs2);

		// the version
		char vf[200]; 
		sprintf(vf,"%s/%s/run.%li.version.txt",dir,testDir,i);
		File f3; 
		f3.set ( vf );
		long fs3 = f3.getFileSize();
		char vbuf[1000];
		vbuf[0] = 0;
		if ( fs3 > 0 ) {
			f3.open(O_RDONLY);
			long rs = f3.read(vbuf,fs3,0);
			vbuf[fs3] = '\0';
			if ( rs <= 0 ) continue;
			f3.close();
		}
		// show it
		sb.safePrintf("<td><pre>%s</pre></td></tr>\n", vbuf);
	}
	sb.safePrintf("</table>\n");
	sb.safePrintf("<br>\n");


	//
	// now diff each parser output file for each url in urls.txt
	//


	//
	// loop over url buf first so we can print one table per url
	//

	char *next = NULL;
	// reset the url buf ptr
	m_urlPtr = m_urlBuf;
	// count em
	long count = 0;

	// ptrs to each url table
	long  un = 0;
	long  uptr [5000]; // offsets now, not char ptr since buf gets reallocd
	char  udiff[5000];
	long  ulen [5000];
	long  uhits[5000]; // critical errors! validateOutput() choked!
	long  uunchecked[5000]; // events/addresses found but were not validatd
	long  umiss[5000];
	long  usort[5000];
	long  uevents[5000];
	SafeBuf tmp;

	long niceness = MAX_NICENESS;

	// advance to next url
	for ( ; m_urlPtr < m_urlEnd ; m_urlPtr = next ) {
		// breathe
		QUICKPOLL(niceness);
		// we converted all non-url chars into \0's so skip those!
		for ( ; m_urlPtr<m_urlEnd && !*m_urlPtr ; m_urlPtr++ );
		// breach check
		if ( m_urlPtr >= m_urlEnd ) break;
		// set this up
		next = m_urlPtr;
		// compute next url ptr
		for ( ; next < m_urlEnd && *next ; next++ );
		// point to this url
		char *u = m_urlPtr;
		// get hash
		long long h = hash64 ( u , gbstrlen(u) );
		// shortcut
		char *dir = g_hostdb.m_dir;


		// print into a secondary safe buf with a ptr to
		// it so we can sort that and transfer into the
		// primary safebuf later
		uptr[un] = tmp.length();
		// assume no diff
		udiff[un] = 0;

		// print number
		tmp.safePrintf("%li) ",count++);
		// . link to our stored http server reply
		// . TODO: link it to our [cached] copy in the test coll!!!
		char local[1200];
		sprintf(local,"/%s/doc.%llu.html",testDir,h);
		tmp.safePrintf("<a href=\"%s\"><b>%s</b></a> ",local,u);
		// link to live page
		tmp.safePrintf(" <a href=\"%s\">live</a> ",u);
		// link to page parser
		char ubuf[2000];
		urlEncode(ubuf,2000,u,gbstrlen(u),true);
		tmp.safePrintf(" <a href=\"/master/parser?c=test&"
			       "u=%s\">parser</a> ",ubuf);
		//tmp.safePrintf(" (%llu)",h);
		tmp.safePrintf("<br>\n");
		//tmp.safePrintf("<br>\n");
		tmp.safePrintf("<table border=1>\n");
		tmp.safePrintf("<tr>"
			      "<td><b><nobr>run id</nobr></b></td>"
			      "<td><b><nobr>crit hits</nobr></b></td>"
			      "<td><b><nobr>crit errors</nobr></b></td>"
			      "<td><b><nobr># e</nobr></b></td>"
			      "<td><b><nobr>unchecked</nobr></b></td>"
			      "<td><b><nobr>diff chars</nobr></b></td>"
			      "<td><b><nobr>diff file</nobr></b></td>"
			      "<td><b><nobr>full output</nobr></b></td>"
			      "</tr>\n");

		//SafeBuf sd;

		// loop over all the runs now, starting with latest run first
		for ( long ri = m_runId ; ri >= start ; ri-- ) {

			QUICKPOLL(niceness);

			// the diff filename
			char pdiff[200];
			sprintf(pdiff,"%s/%s/parse.%llu.%li.html.diff",dir,
				testDir,h,ri);
			File f;
			f.set(pdiff);
			long fs = f.getFileSize();
			if ( ! f.doesExist() && ri > 0 ) {
				// make the parse filename
				char pbuf1[200];
				char pbuf2[200];
				sprintf(pbuf1,"%s/%s/parse.%llu.%li.html",
					dir,testDir,h,ri);
				sprintf(pbuf2,"%s/%s/parse.%llu.%li.html",
					dir,testDir,h,ri-1);
				// sanity check
				//File tf; tf.set(pbuf1);
				//if ( ! tf.doesExist()) {char *xx=NULL;*xx=0;}
				// tmp file name
				char tmp1[200];
				char tmp2[200];
				sprintf(tmp1,"%s/%s/t1.html",dir,testDir);
				sprintf(tmp2,"%s/%s/t2.html",dir,testDir);
				// filter first
				char cmd[600];
				sprintf(cmd,
					"cat %s | "
					"grep -v \"<!--ignore-->\" "
					" > %s", pbuf1,tmp1);
				system(cmd);
				sprintf(cmd,
					"cat %s | "
					"grep -v \"<!--ignore-->\" "
					" > %s", pbuf2,tmp2);
				system(cmd);
				// make the system cmd to do the diff
				sprintf(cmd,
					"echo \"<pre>\" > %s ; "
					"diff -w --text %s %s "
					// ignore this table header row
					//" | grep -v \"R#4\""
					" >> %s",
					pdiff,
					tmp1,tmp2,pdiff);
				log("test: system(\"%s\")",cmd);
				system(cmd);
				// try again
				f.set(pdiff);
				fs = f.getFileSize();
			}

			QUICKPOLL(niceness);

			// this means 0 . it just has the <pre> tag in it!
			if ( fs < 0 || fs == 6 ) fs = 0;
			// . if no diff and NOT current run, do not print it
			// . print it if the run right before the current 
			//   now always too
			if ( ri != m_runId && ri != m_runId-1 && fs == 0 ) 
				continue;
			// relative filename
			char rel[200];
			sprintf(rel,"/%s/parse.%llu.%li.html.diff",
				testDir,h,ri);
			char full[200];
			sprintf(full,"/%s/parse.%llu.%li.html",
				testDir,h,ri);
			char validate[200];
			sprintf(validate,
				"/%s/parse-shortdisplay.%llu.%li.html",
				testDir,h,ri);
			// use red font for current run that has a diff!
			char *t1 = "";
			char *t2 = "";
			if ( ri == m_runId && fs != 0 ) {
				t1 = "<font color=pink><b>";
				t2 = "</b></font>";
				// a diff
				udiff[un] = 1;
			}

			// . get critical errors
			// . i.e. XmlDoc::validateOutput() could not validate
			//   a particular event or address that was in the
			//   url's "validated.uh64.txt" file since the admin
			//   clicked on the checkbox in the page parser output
			// . if we do not find such a tag in the parser output
			//   any more then Spider.cpp creates this file!
			if ( ri == m_runId ) {
				char cfile[256];
				sprintf(cfile,"%s/%s/critical.%llu.%li.txt",
					g_hostdb.m_dir,testDir,h,ri);
				SafeBuf ttt;
				ttt.fillFromFile(cfile);
				// first long is misses, then hits then events
				umiss[un] = 0;
				uhits[un] = 0;
				uevents[un] = 0;
				uunchecked[un] = 0;
				if ( ttt.length() >= 3 )
					sscanf(ttt.getBufStart(),
					       "%li %li %li %li",
					       &umiss[un],
					       &uhits[un],
					       &uevents[un],
					       &uunchecked[un]);
				usort[un] = umiss[un] + uunchecked[un];
				//File cf;
				//cf.set(cfile);
				//if ( cf.doesExist()) ucrit[un] = 1;
				//else                 ucrit[un] = 0;
			}

			// more critical?
			if ( ri == m_runId && umiss[un] != 0 ) {
				t1 = "<font color=red><b>";
				t2 = "</b></font>";
			}

			// . these are good to have
			// . if you don't have 1+ critical hits then you
			//   probably need to be validate by the qa guy
			char *uhb1 = "";
			char *uhb2 = "";
			if ( ri == m_runId && uhits[un] != 0 ) {
				uhb1 = "<font color=green><b>**";
				uhb2 = "**</b></font>";
			}

			QUICKPOLL(niceness);

			char *e1 = "<td>";
			char *e2 = "</td>";
			long ne = uevents[un];
			if ( ne ) { 
				e1="<td bgcolor=orange><b><font color=brown>"; 
				e2="</font></b></td>"; 
			}
			char *u1 = "<td>";
			char *u2 = "</td>";
			if ( uunchecked[un] ) {
				u1="<td bgcolor=purple><b><font color=white>"; 
				u2="</font></b></td>"; 
			}
				
			// print the row!
			tmp.safePrintf("<tr>"
				      "<td>%s%li%s</td>"
				       "<td>%s%li%s</td>" // critical hits
				       "<td>%s%li%s</td>" // critical misses
				       "%s%li%s" // # events
				       "%s%li%s" // unchecked
				       "<td>%s%li%s</td>" // filesize of diff
				      // diff filename
				      "<td><a href=\"%s\">%s%s%s</a></td>"
				      // full parser output
				      "<td>"
				       "<a href=\"%s\">full</a> | "
				       "<a href=\"%s\">validate</a> "
				       "</td>"
				      "</tr>\n",
				      t1,ri,t2,
				      uhb1,uhits[un],uhb2,
				      t1,umiss[un],t2,
				      e1,ne,e2,
				       u1,uunchecked[un],u2,
				      t1,fs,t2,
				      rel,t1,rel,t2,
				      full,
				       validate);


			// only fill "sd" for the most recent guy
			if ( ri != m_runId ) continue;

			// now concatenate the parse-shortdisplay file
			// to this little table so qa admin can check/uncheck
			// validation checkboxes for addresses and events
			//sprintf(cfile,
			//	"%s/test/parse-shortdisplay.%llu.%li.html",
			//	g_hostdb.m_dir,h,ri);
			//sd.fillFromFile ( cfile );
		}
		// end table
		tmp.safePrintf("</table>\n");

		// . and a separate little section for the checkboxes
		// . should already be in tables, etc.
		// . each checkbox should provide its own uh64 when it
		//   calls senddiv() when clicked now
		//tmp.cat ( sd );

		tmp.safePrintf("<br>\n");
		tmp.safePrintf("<br>\n");
		// set this
		ulen[un] = tmp.length() - uptr[un] ;
		// sanity check
		if ( ulen[un] > 10000000 ) { char *xx=NULL;*xx=0; }
		// inc it
		un++;
		// increase the 5000!!
		if ( un >= 5000 ) { char *xx=NULL; *xx=0; }
	}


	char flag ;
 bubble:
	flag = 0;
	// sort the url tables
	for ( long i = 0 ; i < un - 1 ; i++ ) {
		QUICKPOLL(niceness);
		if ( usort[i] >  usort[i+1] ) continue;
		if ( usort[i] == usort[i+1] ) 
			if ( udiff[i] >= udiff[i+1] ) continue;
		// swap em
		long  tp = uptr[i];
		long  td = udiff[i];
		long  um = umiss[i];
		long  us = usort[i];
		long  uh = uhits[i];
		long  tl = ulen [i];
		uptr[i] = uptr[i+1];
		umiss[i] = umiss[i+1];
		usort[i] = usort[i+1];
		uhits[i] = uhits[i+1];
		udiff[i] = udiff[i+1];
		ulen[i]  = ulen[i+1];
		uptr[i+1] = tp;
		umiss[i+1] = um;
		usort[i+1] = us;
		uhits[i+1] = uh;
		udiff[i+1] = td;
		ulen [i+1] = tl;
		flag = 1;
	}
	if ( flag ) goto bubble;

	// transfer into primary safe buf now
	for ( long i = 0 ; i < un ; i++ ) 
		sb.safeMemcpy(tmp.getBufStart() + uptr[i],ulen[i]);


	sb.safePrintf("</html>\n");

	char dfile[200];
	sprintf(dfile,"%s/%s/qa.html",g_hostdb.m_dir,testDir);
	sb.dumpToFile ( dfile );

	// free the buffer of urls
	reset();

	// turn off spiders
	g_conf.m_spideringEnabled = 0;

	// all done
	return;
}