bool processLoop ( void *state ) { // cast it State8 *st = (State8 *)state; // get the xmldoc XmlDoc *xd = &st->m_xd; // error? if ( g_errno ) return sendErrorReply ( st , g_errno ); // shortcut SafeBuf *xbuf = &st->m_xbuf; if ( st->m_u && st->m_u[0] ) { // . save the ips.txt file if we are the test coll // . saveTestBuf() is a function in Msge1.cpp CollectionRec *cr = xd->getCollRec(); if ( xd && cr && cr->m_coll && !strcmp(cr->m_coll,"qatest123")) // use same dir that XmlDoc::getTestDir() would use //saveTestBuf ( "test-page-parser" ); saveTestBuf("qa"); // now get the meta list, in the process it will print out a // bunch of junk into st->m_xbuf char *metalist = xd->getMetaList ( ); if ( ! metalist ) return sendErrorReply ( st , g_errno ); // return false if it blocked if ( metalist == (void *)-1 ) return false; // for debug... if ( ! xd->m_indexCode ) xd->doConsistencyTest ( false ); // print it out xd->printDoc( xbuf ); } // print reason we can't analyze it (or index it) //if ( st->m_indexCode != 0 ) { // xbuf->safePrintf ("<br><br><b>indexCode: %s</b>\n<br>", // mstrerror(st->m_indexCode)); //} // we are done g_inPageParser = false; // print the final tail //p += g_httpServer.printTail ( p , pend - p ); //log("parser: send sock=%li",st->m_s->m_sd); // now encapsulate it in html head/tail and send it off bool status = g_httpServer.sendDynamicPage( st->m_s , xbuf->getBufStart(), xbuf->length() , -1, //cachtime false ,//postreply? NULL, //ctype -1 , //httpstatus NULL,//cookie "utf-8"); // delete the state now if ( st->m_freeIt ) { mdelete ( st , sizeof(State8) , "PageParser" ); delete (st); } // return the status return status; }
// this should be called when all docs have finished spidering void Test::stopIt ( ) { // sanity if ( m_isAdding ) { char *xx=NULL;*xx=0; } // flag that we are done m_isRunning = false; // print time log("test: took %lli ms to complete injections.", gettimeofdayInMilliseconds() - m_testStartTime ); // get this before setting testParserEnabled to false char *testDir = g_test.getTestDir(); // turn this off now too g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // save all! bool disabled = g_threads.m_disabled; g_threads.disableThreads(); // save it blocking style g_process.save(); if ( ! disabled ) g_threads.enableThreads(); // save ips.txt saveTestBuf ( testDir ); log("test: test completed. making qa.html"); // // // NOW MAKE THE qa.html FILE // // // only analyze up to last 7 runs long start = m_runId - 7; if ( start < 0 ) start = 0; SafeBuf sb; sb.safePrintf("<table border=1>\n"); sb.safePrintf("<tr>" "<td><b><nobr>run id</nobr></b></td>" "<td><b><nobr>conf diff</nobr></b></td>" "<td><b><nobr>coll diff</nobr></b></td>" "<td><b><nobr>run info</nobr></b></td>" "</tr>\n"); // take diffs between this run and the last run for confparms for ( long i = m_runId ; i > start ; i-- ) { // shortcut char *dir = g_hostdb.m_dir; // make diff filename char diff1[200]; sprintf(diff1,"%s/%s/run.%li.confparms.txt.diff",dir, testDir,i); File f1; f1.set(diff1); if ( ! f1.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.confparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.confparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff1); log("test: system(\"%s\")",cmd); system (cmd); } long fs1 = f1.getFileSize(); sb.safePrintf("<tr><td>%li</td><td>%li</td>", i,fs1); // make diff filename char diff2[200]; sprintf(diff2,"%s/%s/run.%li.collparms.txt.diff",dir, testDir,i); File f2; f2.set(diff2); if ( ! f2.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.collparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.collparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff2); log("test: system(\"%s\")",cmd); system (cmd); } long fs2 = f2.getFileSize(); sb.safePrintf("<td>%li</td>", fs2); // the version char vf[200]; sprintf(vf,"%s/%s/run.%li.version.txt",dir,testDir,i); File f3; f3.set ( vf ); long fs3 = f3.getFileSize(); char vbuf[1000]; vbuf[0] = 0; if ( fs3 > 0 ) { f3.open(O_RDONLY); long rs = f3.read(vbuf,fs3,0); vbuf[fs3] = '\0'; if ( rs <= 0 ) continue; f3.close(); } // show it sb.safePrintf("<td><pre>%s</pre></td></tr>\n", vbuf); } sb.safePrintf("</table>\n"); sb.safePrintf("<br>\n"); // // now diff each parser output file for each url in urls.txt // // // loop over url buf first so we can print one table per url // char *next = NULL; // reset the url buf ptr m_urlPtr = m_urlBuf; // count em long count = 0; // ptrs to each url table long un = 0; long uptr [5000]; // offsets now, not char ptr since buf gets reallocd char udiff[5000]; long ulen [5000]; long uhits[5000]; // critical errors! validateOutput() choked! long uunchecked[5000]; // events/addresses found but were not validatd long umiss[5000]; long usort[5000]; long uevents[5000]; SafeBuf tmp; long niceness = MAX_NICENESS; // advance to next url for ( ; m_urlPtr < m_urlEnd ; m_urlPtr = next ) { // breathe QUICKPOLL(niceness); // we converted all non-url chars into \0's so skip those! for ( ; m_urlPtr<m_urlEnd && !*m_urlPtr ; m_urlPtr++ ); // breach check if ( m_urlPtr >= m_urlEnd ) break; // set this up next = m_urlPtr; // compute next url ptr for ( ; next < m_urlEnd && *next ; next++ ); // point to this url char *u = m_urlPtr; // get hash long long h = hash64 ( u , gbstrlen(u) ); // shortcut char *dir = g_hostdb.m_dir; // print into a secondary safe buf with a ptr to // it so we can sort that and transfer into the // primary safebuf later uptr[un] = tmp.length(); // assume no diff udiff[un] = 0; // print number tmp.safePrintf("%li) ",count++); // . link to our stored http server reply // . TODO: link it to our [cached] copy in the test coll!!! char local[1200]; sprintf(local,"/%s/doc.%llu.html",testDir,h); tmp.safePrintf("<a href=\"%s\"><b>%s</b></a> ",local,u); // link to live page tmp.safePrintf(" <a href=\"%s\">live</a> ",u); // link to page parser char ubuf[2000]; urlEncode(ubuf,2000,u,gbstrlen(u),true); tmp.safePrintf(" <a href=\"/master/parser?c=test&" "u=%s\">parser</a> ",ubuf); //tmp.safePrintf(" (%llu)",h); tmp.safePrintf("<br>\n"); //tmp.safePrintf("<br>\n"); tmp.safePrintf("<table border=1>\n"); tmp.safePrintf("<tr>" "<td><b><nobr>run id</nobr></b></td>" "<td><b><nobr>crit hits</nobr></b></td>" "<td><b><nobr>crit errors</nobr></b></td>" "<td><b><nobr># e</nobr></b></td>" "<td><b><nobr>unchecked</nobr></b></td>" "<td><b><nobr>diff chars</nobr></b></td>" "<td><b><nobr>diff file</nobr></b></td>" "<td><b><nobr>full output</nobr></b></td>" "</tr>\n"); //SafeBuf sd; // loop over all the runs now, starting with latest run first for ( long ri = m_runId ; ri >= start ; ri-- ) { QUICKPOLL(niceness); // the diff filename char pdiff[200]; sprintf(pdiff,"%s/%s/parse.%llu.%li.html.diff",dir, testDir,h,ri); File f; f.set(pdiff); long fs = f.getFileSize(); if ( ! f.doesExist() && ri > 0 ) { // make the parse filename char pbuf1[200]; char pbuf2[200]; sprintf(pbuf1,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri); sprintf(pbuf2,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri-1); // sanity check //File tf; tf.set(pbuf1); //if ( ! tf.doesExist()) {char *xx=NULL;*xx=0;} // tmp file name char tmp1[200]; char tmp2[200]; sprintf(tmp1,"%s/%s/t1.html",dir,testDir); sprintf(tmp2,"%s/%s/t2.html",dir,testDir); // filter first char cmd[600]; sprintf(cmd, "cat %s | " "grep -v \"<!--ignore-->\" " " > %s", pbuf1,tmp1); system(cmd); sprintf(cmd, "cat %s | " "grep -v \"<!--ignore-->\" " " > %s", pbuf2,tmp2); system(cmd); // make the system cmd to do the diff sprintf(cmd, "echo \"<pre>\" > %s ; " "diff -w --text %s %s " // ignore this table header row //" | grep -v \"R#4\"" " >> %s", pdiff, tmp1,tmp2,pdiff); log("test: system(\"%s\")",cmd); system(cmd); // try again f.set(pdiff); fs = f.getFileSize(); } QUICKPOLL(niceness); // this means 0 . it just has the <pre> tag in it! if ( fs < 0 || fs == 6 ) fs = 0; // . if no diff and NOT current run, do not print it // . print it if the run right before the current // now always too if ( ri != m_runId && ri != m_runId-1 && fs == 0 ) continue; // relative filename char rel[200]; sprintf(rel,"/%s/parse.%llu.%li.html.diff", testDir,h,ri); char full[200]; sprintf(full,"/%s/parse.%llu.%li.html", testDir,h,ri); char validate[200]; sprintf(validate, "/%s/parse-shortdisplay.%llu.%li.html", testDir,h,ri); // use red font for current run that has a diff! char *t1 = ""; char *t2 = ""; if ( ri == m_runId && fs != 0 ) { t1 = "<font color=pink><b>"; t2 = "</b></font>"; // a diff udiff[un] = 1; } // . get critical errors // . i.e. XmlDoc::validateOutput() could not validate // a particular event or address that was in the // url's "validated.uh64.txt" file since the admin // clicked on the checkbox in the page parser output // . if we do not find such a tag in the parser output // any more then Spider.cpp creates this file! if ( ri == m_runId ) { char cfile[256]; sprintf(cfile,"%s/%s/critical.%llu.%li.txt", g_hostdb.m_dir,testDir,h,ri); SafeBuf ttt; ttt.fillFromFile(cfile); // first long is misses, then hits then events umiss[un] = 0; uhits[un] = 0; uevents[un] = 0; uunchecked[un] = 0; if ( ttt.length() >= 3 ) sscanf(ttt.getBufStart(), "%li %li %li %li", &umiss[un], &uhits[un], &uevents[un], &uunchecked[un]); usort[un] = umiss[un] + uunchecked[un]; //File cf; //cf.set(cfile); //if ( cf.doesExist()) ucrit[un] = 1; //else ucrit[un] = 0; } // more critical? if ( ri == m_runId && umiss[un] != 0 ) { t1 = "<font color=red><b>"; t2 = "</b></font>"; } // . these are good to have // . if you don't have 1+ critical hits then you // probably need to be validate by the qa guy char *uhb1 = ""; char *uhb2 = ""; if ( ri == m_runId && uhits[un] != 0 ) { uhb1 = "<font color=green><b>**"; uhb2 = "**</b></font>"; } QUICKPOLL(niceness); char *e1 = "<td>"; char *e2 = "</td>"; long ne = uevents[un]; if ( ne ) { e1="<td bgcolor=orange><b><font color=brown>"; e2="</font></b></td>"; } char *u1 = "<td>"; char *u2 = "</td>"; if ( uunchecked[un] ) { u1="<td bgcolor=purple><b><font color=white>"; u2="</font></b></td>"; } // print the row! tmp.safePrintf("<tr>" "<td>%s%li%s</td>" "<td>%s%li%s</td>" // critical hits "<td>%s%li%s</td>" // critical misses "%s%li%s" // # events "%s%li%s" // unchecked "<td>%s%li%s</td>" // filesize of diff // diff filename "<td><a href=\"%s\">%s%s%s</a></td>" // full parser output "<td>" "<a href=\"%s\">full</a> | " "<a href=\"%s\">validate</a> " "</td>" "</tr>\n", t1,ri,t2, uhb1,uhits[un],uhb2, t1,umiss[un],t2, e1,ne,e2, u1,uunchecked[un],u2, t1,fs,t2, rel,t1,rel,t2, full, validate); // only fill "sd" for the most recent guy if ( ri != m_runId ) continue; // now concatenate the parse-shortdisplay file // to this little table so qa admin can check/uncheck // validation checkboxes for addresses and events //sprintf(cfile, // "%s/test/parse-shortdisplay.%llu.%li.html", // g_hostdb.m_dir,h,ri); //sd.fillFromFile ( cfile ); } // end table tmp.safePrintf("</table>\n"); // . and a separate little section for the checkboxes // . should already be in tables, etc. // . each checkbox should provide its own uh64 when it // calls senddiv() when clicked now //tmp.cat ( sd ); tmp.safePrintf("<br>\n"); tmp.safePrintf("<br>\n"); // set this ulen[un] = tmp.length() - uptr[un] ; // sanity check if ( ulen[un] > 10000000 ) { char *xx=NULL;*xx=0; } // inc it un++; // increase the 5000!! if ( un >= 5000 ) { char *xx=NULL; *xx=0; } } char flag ; bubble: flag = 0; // sort the url tables for ( long i = 0 ; i < un - 1 ; i++ ) { QUICKPOLL(niceness); if ( usort[i] > usort[i+1] ) continue; if ( usort[i] == usort[i+1] ) if ( udiff[i] >= udiff[i+1] ) continue; // swap em long tp = uptr[i]; long td = udiff[i]; long um = umiss[i]; long us = usort[i]; long uh = uhits[i]; long tl = ulen [i]; uptr[i] = uptr[i+1]; umiss[i] = umiss[i+1]; usort[i] = usort[i+1]; uhits[i] = uhits[i+1]; udiff[i] = udiff[i+1]; ulen[i] = ulen[i+1]; uptr[i+1] = tp; umiss[i+1] = um; usort[i+1] = us; uhits[i+1] = uh; udiff[i+1] = td; ulen [i+1] = tl; flag = 1; } if ( flag ) goto bubble; // transfer into primary safe buf now for ( long i = 0 ; i < un ; i++ ) sb.safeMemcpy(tmp.getBufStart() + uptr[i],ulen[i]); sb.safePrintf("</html>\n"); char dfile[200]; sprintf(dfile,"%s/%s/qa.html",g_hostdb.m_dir,testDir); sb.dumpToFile ( dfile ); // free the buffer of urls reset(); // turn off spiders g_conf.m_spideringEnabled = 0; // all done return; }