bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) { SafeBuf p; char getBuf[64]; // holds extra values for GET method char formBuf[256]; // holds extra values for forms snprintf(getBuf, 64, "c=%s", r->getString("c", 0, "")); snprintf(formBuf, 256, "<input type=hidden name=\"c\" value=\"%s\">", //"<input type=hidden name=\"pwd\" value=\"%s\">", r->getString("c", 0, "")); g_pages.printAdminTop( &p, s, r); if (r->getLong("cancel", 0) != 0) { g_thesaurus.cancelRebuild(); p.safePrintf("<br><br>\n"); p.safePrintf( "<center><b><font color=#ff0000>" "rebuild canceled" "</font></b></center>"); } if (r->getLong("rebuild", 0) != 0) { bool full = r->getLong("full", 0); p.safePrintf("<br><br>\n"); if (g_thesaurus.rebuild(0, full)) { p.safePrintf( "<center><b><font color=#ff0000>" "error starting rebuild, check log for details" "</font></b></center>"); } else { p.safePrintf( "<center><b><font color=#ff0000>" "rebuild started" "</font></b></center>"); } } if (r->getLong("rebuildaff", 0) != 0) { bool full = r->getLong("full", 0); p.safePrintf("<br><br>\n"); if (g_thesaurus.rebuildAffinity(0, full)) { p.safePrintf( "<center><b><font color=#ff0000>" "error starting rebuild, check log for details" "</font></b></center>"); } else { p.safePrintf( "<center><b><font color=#ff0000>" "rebuild started" "</font></b></center>"); } } if (r->getLong("distribute", 0) != 0) { char cmd[1024]; p.safePrintf("<br><br>\n"); if (g_thesaurus.m_affinityState) { p.safePrintf( "<center><b><font color=#ff0000>" "cannot distribute during rebuild" "</font></b></center>"); } else { for ( long i = 0; i < g_hostdb.getNumHosts() ; i++ ) { Host *h = g_hostdb.getHost(i); snprintf(cmd, 512, "rcp -r " "./dict/thesaurus.* " "%s:%s/dict/ &", iptoa(h->m_ip), h->m_dir); log(LOG_INFO, "admin: %s", cmd); system( cmd ); } p.safePrintf( "<center><b><font color=#ff0000>" "data distributed" "</font></b></center>"); } } if (r->getLong("reload", 0) != 0) { p.safePrintf("<br><br>\n"); if (r->getLong("cast", 0) != 0) { p.safePrintf( "<center><b><font color=#ff0000>" "reload command broadcast" "</font></b></center>"); } else if (g_thesaurus.init()) { p.safePrintf( "<center><b><font color=#ff0000>" "thesaurus data reloaded" "</font></b></center>"); } else { p.safePrintf( "<center><b><font color=#ff0000>" "error reloading thesaurus data" "</font></b></center>"); } } long manualAddLen = 0; char *manualAdd = NULL; SafeBuf manualAddBuf; if ((manualAdd = r->getString("manualadd", &manualAddLen))) { trimWhite(manualAdd); manualAddLen = gbstrlen(manualAdd); File manualFile; manualFile.set(g_hostdb.m_dir, "dict/thesaurus-manual.txt"); if (manualFile.open(O_WRONLY | O_CREAT | O_TRUNC) && (manualFile.write(manualAdd, manualAddLen, 0) == manualAddLen)) { char newl = '\n'; // for write() if (manualAdd[manualAddLen-1] != '\n') manualFile.write(&newl, 1, manualAddLen); p.safePrintf( "<center><b><font color=#ff0000>" "updated manual add file sucessfully" "</font></b></center>"); } else { p.safePrintf( "<center><b><font color=#ff0000>" "error writing manual add file" "</font></b></center>"); } } else { char ff[PATH_MAX]; snprintf(ff, PATH_MAX, "%sdict/thesaurus-manual.txt", g_hostdb.m_dir); if (manualAddBuf.fillFromFile(ff)) { if (*(manualAddBuf.getBuf()-1) != '\n') manualAddBuf.pushChar('\n'); manualAdd = manualAddBuf.getBufStart(); manualAddLen = manualAddBuf.length(); } } long affinityAddLen = 0; char *affinityAdd = NULL; SafeBuf affinityAddBuf; if ((affinityAdd = r->getString("affinityadd", &affinityAddLen))) { trimWhite(affinityAdd); affinityAddLen = gbstrlen(affinityAdd); File affinityFile; affinityFile.set(g_hostdb.m_dir, "dict/thesaurus-affinity.txt"); if (affinityFile.open(O_WRONLY | O_CREAT | O_TRUNC) && (affinityFile.write(affinityAdd, affinityAddLen, 0) == affinityAddLen)) { char newl = '\n'; // for write() if (affinityAdd[affinityAddLen-1] != '\n') affinityFile.write(&newl, 1, affinityAddLen); p.safePrintf( "<center><b><font color=#ff0000>" "updated affinity add file sucessfully" "</font></b></center>"); } else { p.safePrintf( "<center><b><font color=#ff0000>" "error writing affinity add file" "</font></b></center>"); } } else { char ff[PATH_MAX]; snprintf(ff, PATH_MAX, "%sdict/thesaurus-affinity.txt", g_hostdb.m_dir); if (affinityAddBuf.fillFromFile(ff)) { if (*(affinityAddBuf.getBuf()-1) != '\n') affinityAddBuf.pushChar('\n'); affinityAdd = affinityAddBuf.getBufStart(); affinityAddLen = affinityAddBuf.length(); } } char *syn = r->getString("synonym"); long len = 0; if (syn) len = gbstrlen(syn); if (len) { SynonymInfo info; bool r = g_thesaurus.getAllInfo(syn, &info, len, SYNBIT_ALL); p.safePrintf("<br><br>\n"); p.safePrintf ( "<table cellpadding=4 width=100%% bgcolor=#%s border=1>" "<tr>" "<td colspan=2 bgcolor=#%s>" "<center><b>Synonym List (%ld)</b></center>" "</td>" "</tr>\n", LIGHT_BLUE, DARK_BLUE, info.m_numSyns); if (r) { p.safePrintf("<tr>" "<td align=right><tt>%s</tt></td>" "<td align=left>" "<tt>1.000/%08lX (1.000/%08lX)</tt>" "</td>" "</tr>\n", syn, MAX_AFFINITY, MAX_AFFINITY); for (long i = 0; i < info.m_numSyns; i++) { // get the reverse affinity as well long aff = g_thesaurus.getAffinity( info.m_syn[i], syn, info.m_len[i], len); p.safePrintf( "<tr>" "<td width=40%% align=right>" "<tt>"); p.safeMemcpy(info.m_syn[i], info.m_len[i]); p.safePrintf("</tt>" "</td>" "<td width=60%% align=left>" "<tt>"); if (info.m_affinity[i] >= 0) { p.safePrintf("%0.3f/%08lX ", (float)info.m_affinity[i] / MAX_AFFINITY, info.m_affinity[i]); } else { p.safePrintf("u "); } if (aff >= 0) { p.safePrintf("(%0.3f/%08lX) ", (float)aff / MAX_AFFINITY, aff); } else { p.safePrintf("(u) "); } p.safePrintf("(%ld) (%ld) (%ld) (%ld) " "(%lld) (%lld)", (long)info.m_type[i], (long)info.m_sort[i], info.m_firstId[i], info.m_lastId[i], info.m_leftSynHash[i], info.m_rightSynHash[i]); for (int j = info.m_firstId[i]; j <= info.m_lastId[i]; j++) { p.safePrintf(" (%lld)", info.m_termId[j]); } p.safePrintf( "</tt>" "</td>" "</tr>\n"); } p.safePrintf("</table>"); } else { p.safePrintf("<tr>" "<td align=center><font color=#FF0000>" "synonym not found: %s" "</font></td>" "</tr>\n", syn); } } p.safePrintf ( "<br><br>\n" ); p.safePrintf ( "<table cellpadding=4 width=100%% bgcolor=#%s border=1>" "<tr>" "<td colspan=2 bgcolor=#%s>" "<center><b>Thesaurus Controls" "</b></center></td>" "</tr>\n", LIGHT_BLUE, DARK_BLUE); p.safePrintf ( "<tr>" "<td width=37%%><b>rebuild all data</b><br>" "<font size=1>" "rebuilds synonyms and then begins the rebuild process for " "affinity data; this should only be run on one host, as the " "data is copied when the process is finished; full rebuild " "does not use existing affinity data" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b><a href=\"/master/thesaurus?rebuild=1&%s\">" "rebuild all data</a> <a href=\"/master/thesaurus?" "rebuild=1&full=1&%s\">(full)</a></b></center>" "</td>" "</tr>\n", getBuf, getBuf); p.safePrintf ( "<tr>" "<td width=37%%><b>distribute data</b><br>" "<font size=1>" "distributes all thesaurus data to all hosts, this is " "normally done automatically but if there was a problem " "with the copy, this lets you do it manually" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b><a href=\"/master/thesaurus?distribute=1&%s\">" "distribute data</a></b></center>" "</td>" "</tr>\n", getBuf); p.safePrintf ( "<tr>" "<td width=37%%><b>reload data</b><br>" "<font size=1>" "reloads the synonyms and affinity table on this host only" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b>" "<a href=\"/master/thesaurus?reload=1&cast=0&%s\">" "reload data</a></b></center>" "</td>" "</tr>\n", getBuf); p.safePrintf ( "<tr>" "<td width=37%%><b>reload data (all hosts)</b><br>" "<font size=1>" "reloads the synonyms and affinity table on all hosts" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b>" "<a href=\"/master/thesaurus?reload=1&cast=1&%s\">" "reload data (all hosts)</a></b></center>" "</td>" "</tr>\n", getBuf); p.safePrintf ( "<tr>" "<td width=37%%><b>list synonyms</b><br>" "<font size=1>" "enter a word here to list all synonym entries and their " "affinities" "</font>" "</td>" "<td width=12%%>" "<form action=\"/master/thesaurus>\">" "<input type=text name=synonym size=20>" "<input type=submit value=Submit>" "%s" "</form></td>" "</tr>\n", formBuf); p.safePrintf ( "<tr>" "<td colspan=2 bgcolor=#%s>" "<center><b>Affinity Controls" "</b></center></td>" "</tr>\n", DARK_BLUE); p.safePrintf ( "<tr>" "<td width=37%%><b>cancel running rebuild</b><br>" "<font size=1>" "cancels the rebuild and throws all intermediate data away" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b><a href=\"/master/thesaurus?cancel=1&%s\">" "cancel running rebuild</a></b></center>" "</td>" "</tr>\n", getBuf); p.safePrintf ( "<tr>" "<td width=37%%><b>rebuild affinity only</b><br>" "<font size=1>" "begins the rebuild process for affinity data, has no " "effect if a rebuild is already in progress; full rebuild " "does not reuse existing affinity data" "</font>" "</td>" "<td width=12%% bgcolor=#0000ff>" "<center><b><a href=\"/master/thesaurus?rebuildaff=1&%s\">" "rebuild affinity</a> <a href=\"/master/thesaurus?" "rebuildaff=1&full=1&%s\">(full)</a></b></center>" "</td>" "</tr>\n", getBuf, getBuf); p.safePrintf ( "<tr>" "<td colspan=2 bgcolor=#%s>" "<center><b>Manual File Controls" "</b></td>" "</tr>\n", DARK_BLUE); p.safePrintf ( "<tr>" "<td align=center colspan=2>"); p.safePrintf( "<b>manually added pairs</b><br>\n" "<font size=1>place word pairs here that should be linked " "as synonyms, one pair per line, seperated by a pipe '|' " "character, optionally followed by another pipe and a type " "designation; any badly formatted lines will be silently " "ignored</font><br>\n" "<form action=\"/master/thesaurus\" method=post>" "<textarea name=\"manualadd\" rows=20 cols=80>"); if (manualAdd && manualAddLen) { p.htmlEncode(manualAdd, manualAddLen, true); } p.safePrintf ( "</textarea><br>" "<input type=submit value=Submit>" "<input type=reset value=Reset>" "%s" "</form></td>" "</tr>\n", formBuf); p.safePrintf ( "<tr>" "<td align=center colspan=2>" "<b>affinity value overrides</b><br>\n" "<font size=1>place word/phrase pairs here that should have " "there affinity values overridden, format is " "\"word1|word2|value\", where value is a floating point, " "integer (either decimal or hex), or the word \"max\"; " "any badly formatted lines will be silently ignored; note " "that these pairs will only work if the thesaurus otherwise " "has an entry for them, so add them to the manual add file " "above if need be</font><br>\n" "<form action=\"/master/thesaurus\" method=post>" "<textarea name=\"affinityadd\" rows=20 cols=80>"); if (affinityAdd && affinityAddLen) { p.htmlEncode(affinityAdd, affinityAddLen, true); } p.safePrintf ( "</textarea><br>" "<input type=submit value=Submit>" "<input type=reset value=Reset>" "%s" "</form></td>" "</tr>\n", formBuf); p.safePrintf ( "</table>\n" ); p.safePrintf ( "<br><br>\n" ); p.safePrintf ( "<table cellpadding=4 width=100%% bgcolor=#%s border=1>" "<tr>" "<td colspan=2 bgcolor=#%s>" "<center><b>Affinity Builder Status" "</b></td>" "</tr>\n", LIGHT_BLUE, DARK_BLUE); long long a, b, c, d, e, f, g, h, i, j, k; StateAffinity *aff = g_thesaurus.m_affinityState; if (!aff) { p.safePrintf ( "<tr><td colspan=2>" "<center><b>Not running</b></center>" "</td></tr>\n"); a = b = c = d = e = f = g = h = i = j = k = 0; } else { a = aff->m_oldTable->getNumSlotsUsed(); b = aff->m_oldTable->getNumSlotsUsed() - aff->m_n; c = aff->m_n; d = (gettimeofdayInMilliseconds() - aff->m_time) / 1000; if (!d || !(c / d)) { e = 0; } else { e = b / (c / d); } f = aff->m_sent; g = aff->m_recv; h = aff->m_errors; i = aff->m_old; j = aff->m_cache; k = aff->m_hitsTable.getNumSlotsUsed(); } p.safePrintf ( "<tr><td><b># of total pairs</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of pairs remaining</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of pairs processed</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b>elapsed time in seconds</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b>estimated remaining time in seconds</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of requests sent</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of requests received</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of request errors</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of old values reused</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b># of cache hits</b></td>" "<td>%lli</td></tr>\n" "<tr><td><b>cache size</b></td>" "<td>%lli</td></tr>\n", a, b, c, d, e, f, g, h, i, j, k); p.safePrintf ( "</table>\n" ); return g_httpServer.sendDynamicPage ( s, p.getBufStart(), p.length() ); }
// . sets m_fileOffset and m_bf // . returns false and sets g_errno on error // . returns false if nothing to read too... but does not set g_errno bool ImportState::setCurrentTitleFileAndOffset ( ) { // leave m_bf and m_fileOffset alone if there is more to read if ( m_fileOffset < m_bfFileSize ) return true; CollectionRec *cr = g_collectiondb.getRec ( m_collnum ); if ( ! cr ) return false; log("import: import finding next file"); // if ( m_offIsValid ) { // //*off = m_fileOffset; // return &m_bf; // } //m_offIsValid = true; // look for titledb0001.dat etc. files in the // workingDir/inject/ subdir SafeBuf ddd; ddd.safePrintf("%sinject",cr->m_importDir.getBufStart()); // now use the one provided. we should also provide the # of threads if ( cr->m_importDir.getBufStart() && cr->m_importDir.getBufStart()[0] ) { ddd.reset(); ddd.safeStrcpy ( cr->m_importDir.getBufStart() ); } // // assume we are the first filename // set s_fileId to the minimum // Dir dir; dir.set(ddd.getBufStart()); if ( ! dir.open() ) return false; // assume none long minFileId = -1; // getNextFilename() writes into this char pattern[64]; strcpy ( pattern , "titledb*" ); char *filename; while ( ( filename = dir.getNextFilename ( pattern ) ) ) { // filename must be a certain length long filenameLen = gbstrlen(filename); // we need at least "titledb0001.dat" if ( filenameLen < 15 ) continue; // ensure filename starts w/ our m_dbname if ( strncmp ( filename , "titledb", 7 ) != 0 ) continue; // skip if not .dat file if ( ! strstr ( filename , ".dat" ) ) continue; // then a 4 digit number should follow char *s = filename + 7; if ( ! isdigit(*(s+0)) ) continue; if ( ! isdigit(*(s+1)) ) continue; if ( ! isdigit(*(s+2)) ) continue; if ( ! isdigit(*(s+3)) ) continue; // convert digit to id long id = atol(s); // . do not accept files we've already processed // . -1 means we haven't processed any yet if ( m_bfFileId >= 0 && id <= m_bfFileId ) continue; // the min of those we haven't yet processed/injected if ( id < minFileId || minFileId < 0 ) minFileId = id; } // get where we left off if ( ! m_loadedPlaceHolder ) { // read where we left off from file if possible char fname[256]; sprintf(fname,"%slasttitledbinjectinfo.dat",g_hostdb.m_dir); SafeBuf ff; ff.fillFromFile(fname); if ( ff.length() > 1 ) { m_loadedPlaceHolder = true; // get the placeholder sscanf ( ff.getBufStart() , "%llu,%lu" , &m_fileOffset , &minFileId ); } } // if no files! return false to indicate we are done if ( minFileId == -1 ) return false; // set up s_bf then //if ( m_bfFileId != minFileId ) { SafeBuf tmp; tmp.safePrintf("titledb%04li-000.dat" //,dir.getDirname() ,minFileId); m_bf.set ( dir.getDirname() ,tmp.getBufStart() ); if ( ! m_bf.open( O_RDONLY ) ) { log("inject: import: could not open %s%s for reading", dir.getDirname(),tmp.getBufStart()); return false; } m_bfFileId = minFileId; // reset ptr into file //*off = 0; // and set this m_bfFileSize = m_bf.getFileSize(); m_fileOffset = 0; //} log("import: importing from file %s",m_bf.getFilename()); return true;//&m_bf; }
// this should be called when all docs have finished spidering void Test::stopIt ( ) { // sanity if ( m_isAdding ) { char *xx=NULL;*xx=0; } // flag that we are done m_isRunning = false; // print time log("test: took %lli ms to complete injections.", gettimeofdayInMilliseconds() - m_testStartTime ); // get this before setting testParserEnabled to false char *testDir = g_test.getTestDir(); // turn this off now too g_conf.m_testParserEnabled = false; g_conf.m_testSpiderEnabled = false; // save all! bool disabled = g_threads.m_disabled; g_threads.disableThreads(); // save it blocking style g_process.save(); if ( ! disabled ) g_threads.enableThreads(); // save ips.txt saveTestBuf ( testDir ); log("test: test completed. making qa.html"); // // // NOW MAKE THE qa.html FILE // // // only analyze up to last 7 runs long start = m_runId - 7; if ( start < 0 ) start = 0; SafeBuf sb; sb.safePrintf("<table border=1>\n"); sb.safePrintf("<tr>" "<td><b><nobr>run id</nobr></b></td>" "<td><b><nobr>conf diff</nobr></b></td>" "<td><b><nobr>coll diff</nobr></b></td>" "<td><b><nobr>run info</nobr></b></td>" "</tr>\n"); // take diffs between this run and the last run for confparms for ( long i = m_runId ; i > start ; i-- ) { // shortcut char *dir = g_hostdb.m_dir; // make diff filename char diff1[200]; sprintf(diff1,"%s/%s/run.%li.confparms.txt.diff",dir, testDir,i); File f1; f1.set(diff1); if ( ! f1.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.confparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.confparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff1); log("test: system(\"%s\")",cmd); system (cmd); } long fs1 = f1.getFileSize(); sb.safePrintf("<tr><td>%li</td><td>%li</td>", i,fs1); // make diff filename char diff2[200]; sprintf(diff2,"%s/%s/run.%li.collparms.txt.diff",dir, testDir,i); File f2; f2.set(diff2); if ( ! f2.doesExist() ) { char df1[200]; char df2[200]; sprintf(df1,"%s/%s/run.%li.collparms.txt",dir, testDir,i); sprintf(df2,"%s/%s/run.%li.collparms.txt",dir, testDir,i-1); // do the diff char cmd[600]; sprintf(cmd,"diff %s %s > %s",df1,df2,diff2); log("test: system(\"%s\")",cmd); system (cmd); } long fs2 = f2.getFileSize(); sb.safePrintf("<td>%li</td>", fs2); // the version char vf[200]; sprintf(vf,"%s/%s/run.%li.version.txt",dir,testDir,i); File f3; f3.set ( vf ); long fs3 = f3.getFileSize(); char vbuf[1000]; vbuf[0] = 0; if ( fs3 > 0 ) { f3.open(O_RDONLY); long rs = f3.read(vbuf,fs3,0); vbuf[fs3] = '\0'; if ( rs <= 0 ) continue; f3.close(); } // show it sb.safePrintf("<td><pre>%s</pre></td></tr>\n", vbuf); } sb.safePrintf("</table>\n"); sb.safePrintf("<br>\n"); // // now diff each parser output file for each url in urls.txt // // // loop over url buf first so we can print one table per url // char *next = NULL; // reset the url buf ptr m_urlPtr = m_urlBuf; // count em long count = 0; // ptrs to each url table long un = 0; long uptr [5000]; // offsets now, not char ptr since buf gets reallocd char udiff[5000]; long ulen [5000]; long uhits[5000]; // critical errors! validateOutput() choked! long uunchecked[5000]; // events/addresses found but were not validatd long umiss[5000]; long usort[5000]; long uevents[5000]; SafeBuf tmp; long niceness = MAX_NICENESS; // advance to next url for ( ; m_urlPtr < m_urlEnd ; m_urlPtr = next ) { // breathe QUICKPOLL(niceness); // we converted all non-url chars into \0's so skip those! for ( ; m_urlPtr<m_urlEnd && !*m_urlPtr ; m_urlPtr++ ); // breach check if ( m_urlPtr >= m_urlEnd ) break; // set this up next = m_urlPtr; // compute next url ptr for ( ; next < m_urlEnd && *next ; next++ ); // point to this url char *u = m_urlPtr; // get hash long long h = hash64 ( u , gbstrlen(u) ); // shortcut char *dir = g_hostdb.m_dir; // print into a secondary safe buf with a ptr to // it so we can sort that and transfer into the // primary safebuf later uptr[un] = tmp.length(); // assume no diff udiff[un] = 0; // print number tmp.safePrintf("%li) ",count++); // . link to our stored http server reply // . TODO: link it to our [cached] copy in the test coll!!! char local[1200]; sprintf(local,"/%s/doc.%llu.html",testDir,h); tmp.safePrintf("<a href=\"%s\"><b>%s</b></a> ",local,u); // link to live page tmp.safePrintf(" <a href=\"%s\">live</a> ",u); // link to page parser char ubuf[2000]; urlEncode(ubuf,2000,u,gbstrlen(u),true); tmp.safePrintf(" <a href=\"/master/parser?c=test&" "u=%s\">parser</a> ",ubuf); //tmp.safePrintf(" (%llu)",h); tmp.safePrintf("<br>\n"); //tmp.safePrintf("<br>\n"); tmp.safePrintf("<table border=1>\n"); tmp.safePrintf("<tr>" "<td><b><nobr>run id</nobr></b></td>" "<td><b><nobr>crit hits</nobr></b></td>" "<td><b><nobr>crit errors</nobr></b></td>" "<td><b><nobr># e</nobr></b></td>" "<td><b><nobr>unchecked</nobr></b></td>" "<td><b><nobr>diff chars</nobr></b></td>" "<td><b><nobr>diff file</nobr></b></td>" "<td><b><nobr>full output</nobr></b></td>" "</tr>\n"); //SafeBuf sd; // loop over all the runs now, starting with latest run first for ( long ri = m_runId ; ri >= start ; ri-- ) { QUICKPOLL(niceness); // the diff filename char pdiff[200]; sprintf(pdiff,"%s/%s/parse.%llu.%li.html.diff",dir, testDir,h,ri); File f; f.set(pdiff); long fs = f.getFileSize(); if ( ! f.doesExist() && ri > 0 ) { // make the parse filename char pbuf1[200]; char pbuf2[200]; sprintf(pbuf1,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri); sprintf(pbuf2,"%s/%s/parse.%llu.%li.html", dir,testDir,h,ri-1); // sanity check //File tf; tf.set(pbuf1); //if ( ! tf.doesExist()) {char *xx=NULL;*xx=0;} // tmp file name char tmp1[200]; char tmp2[200]; sprintf(tmp1,"%s/%s/t1.html",dir,testDir); sprintf(tmp2,"%s/%s/t2.html",dir,testDir); // filter first char cmd[600]; sprintf(cmd, "cat %s | " "grep -v \"<!--ignore-->\" " " > %s", pbuf1,tmp1); system(cmd); sprintf(cmd, "cat %s | " "grep -v \"<!--ignore-->\" " " > %s", pbuf2,tmp2); system(cmd); // make the system cmd to do the diff sprintf(cmd, "echo \"<pre>\" > %s ; " "diff -w --text %s %s " // ignore this table header row //" | grep -v \"R#4\"" " >> %s", pdiff, tmp1,tmp2,pdiff); log("test: system(\"%s\")",cmd); system(cmd); // try again f.set(pdiff); fs = f.getFileSize(); } QUICKPOLL(niceness); // this means 0 . it just has the <pre> tag in it! if ( fs < 0 || fs == 6 ) fs = 0; // . if no diff and NOT current run, do not print it // . print it if the run right before the current // now always too if ( ri != m_runId && ri != m_runId-1 && fs == 0 ) continue; // relative filename char rel[200]; sprintf(rel,"/%s/parse.%llu.%li.html.diff", testDir,h,ri); char full[200]; sprintf(full,"/%s/parse.%llu.%li.html", testDir,h,ri); char validate[200]; sprintf(validate, "/%s/parse-shortdisplay.%llu.%li.html", testDir,h,ri); // use red font for current run that has a diff! char *t1 = ""; char *t2 = ""; if ( ri == m_runId && fs != 0 ) { t1 = "<font color=pink><b>"; t2 = "</b></font>"; // a diff udiff[un] = 1; } // . get critical errors // . i.e. XmlDoc::validateOutput() could not validate // a particular event or address that was in the // url's "validated.uh64.txt" file since the admin // clicked on the checkbox in the page parser output // . if we do not find such a tag in the parser output // any more then Spider.cpp creates this file! if ( ri == m_runId ) { char cfile[256]; sprintf(cfile,"%s/%s/critical.%llu.%li.txt", g_hostdb.m_dir,testDir,h,ri); SafeBuf ttt; ttt.fillFromFile(cfile); // first long is misses, then hits then events umiss[un] = 0; uhits[un] = 0; uevents[un] = 0; uunchecked[un] = 0; if ( ttt.length() >= 3 ) sscanf(ttt.getBufStart(), "%li %li %li %li", &umiss[un], &uhits[un], &uevents[un], &uunchecked[un]); usort[un] = umiss[un] + uunchecked[un]; //File cf; //cf.set(cfile); //if ( cf.doesExist()) ucrit[un] = 1; //else ucrit[un] = 0; } // more critical? if ( ri == m_runId && umiss[un] != 0 ) { t1 = "<font color=red><b>"; t2 = "</b></font>"; } // . these are good to have // . if you don't have 1+ critical hits then you // probably need to be validate by the qa guy char *uhb1 = ""; char *uhb2 = ""; if ( ri == m_runId && uhits[un] != 0 ) { uhb1 = "<font color=green><b>**"; uhb2 = "**</b></font>"; } QUICKPOLL(niceness); char *e1 = "<td>"; char *e2 = "</td>"; long ne = uevents[un]; if ( ne ) { e1="<td bgcolor=orange><b><font color=brown>"; e2="</font></b></td>"; } char *u1 = "<td>"; char *u2 = "</td>"; if ( uunchecked[un] ) { u1="<td bgcolor=purple><b><font color=white>"; u2="</font></b></td>"; } // print the row! tmp.safePrintf("<tr>" "<td>%s%li%s</td>" "<td>%s%li%s</td>" // critical hits "<td>%s%li%s</td>" // critical misses "%s%li%s" // # events "%s%li%s" // unchecked "<td>%s%li%s</td>" // filesize of diff // diff filename "<td><a href=\"%s\">%s%s%s</a></td>" // full parser output "<td>" "<a href=\"%s\">full</a> | " "<a href=\"%s\">validate</a> " "</td>" "</tr>\n", t1,ri,t2, uhb1,uhits[un],uhb2, t1,umiss[un],t2, e1,ne,e2, u1,uunchecked[un],u2, t1,fs,t2, rel,t1,rel,t2, full, validate); // only fill "sd" for the most recent guy if ( ri != m_runId ) continue; // now concatenate the parse-shortdisplay file // to this little table so qa admin can check/uncheck // validation checkboxes for addresses and events //sprintf(cfile, // "%s/test/parse-shortdisplay.%llu.%li.html", // g_hostdb.m_dir,h,ri); //sd.fillFromFile ( cfile ); } // end table tmp.safePrintf("</table>\n"); // . and a separate little section for the checkboxes // . should already be in tables, etc. // . each checkbox should provide its own uh64 when it // calls senddiv() when clicked now //tmp.cat ( sd ); tmp.safePrintf("<br>\n"); tmp.safePrintf("<br>\n"); // set this ulen[un] = tmp.length() - uptr[un] ; // sanity check if ( ulen[un] > 10000000 ) { char *xx=NULL;*xx=0; } // inc it un++; // increase the 5000!! if ( un >= 5000 ) { char *xx=NULL; *xx=0; } } char flag ; bubble: flag = 0; // sort the url tables for ( long i = 0 ; i < un - 1 ; i++ ) { QUICKPOLL(niceness); if ( usort[i] > usort[i+1] ) continue; if ( usort[i] == usort[i+1] ) if ( udiff[i] >= udiff[i+1] ) continue; // swap em long tp = uptr[i]; long td = udiff[i]; long um = umiss[i]; long us = usort[i]; long uh = uhits[i]; long tl = ulen [i]; uptr[i] = uptr[i+1]; umiss[i] = umiss[i+1]; usort[i] = usort[i+1]; uhits[i] = uhits[i+1]; udiff[i] = udiff[i+1]; ulen[i] = ulen[i+1]; uptr[i+1] = tp; umiss[i+1] = um; usort[i+1] = us; uhits[i+1] = uh; udiff[i+1] = td; ulen [i+1] = tl; flag = 1; } if ( flag ) goto bubble; // transfer into primary safe buf now for ( long i = 0 ; i < un ; i++ ) sb.safeMemcpy(tmp.getBufStart() + uptr[i],ulen[i]); sb.safePrintf("</html>\n"); char dfile[200]; sprintf(dfile,"%s/%s/qa.html",g_hostdb.m_dir,testDir); sb.dumpToFile ( dfile ); // free the buffer of urls reset(); // turn off spiders g_conf.m_spideringEnabled = 0; // all done return; }