bool deleteUrls ( ) { static long s_ii2 = 0; for ( ; s_ii2 < s_numUrls ; ) { // pre-inc it s_ii2++; // reject using html api SafeBuf sb; sb.safePrintf( "/admin/inject?c=qatest123&delete=1&u="); sb.urlEncode ( s_urlPtrs[s_ii2] ); return getUrl ( sb.getBufStart() , qatestWrapper ); } return true; }
// ensure search results are consistent bool searchTest2 () { long nq = sizeof(s_queries)/sizeof(char *); for ( ; s_qi2 < nq ; ) { // pre-inc it s_qi2++; // inject using html api SafeBuf sb; // qa=1 tell gb to exclude "variable" or "random" things // from the serps so we can checksum it consistently sb.safePrintf ( "/search?c=qatest123&qa=1&q=" ); sb.urlEncode ( s_queries[s_qi2] ); return getUrl ( sb.getBufStart() , doneSearching2 ); } return true; }
// . "uf" is printf url format to scrape with a %s for the query // . example: uf="http://www.google.com/search?num=50&q=%s&scoring=d&filter=0"; bool Msg7::scrapeQuery ( ) { // advance round now in case we return early m_round++; // error? if ( m_qbuf.length() > 500 ) { g_errno = EQUERYTOOBIG; return true; } // first encode the query SafeBuf ebuf; ebuf.urlEncode ( m_qbuf.getBufStart() ); // queryUNEncoded ); char *uf; if ( m_round == 1 ) // set to 1 for debugging uf="http://www.google.com/search?num=20&" "q=%s&scoring=d&filter=0"; //uf = "https://startpage.com/do/search?q=%s"; //uf = "http://www.google.com/" // "/cse?cx=013269018370076798483%3A8eec3papwpi&" // "ie=UTF-8&q=%s&" // "num=20"; else uf="http://www.bing.com/search?q=%s"; // skip bing for now //if ( m_round == 2 ) // return true; //if ( m_round == 1 ) // return true; // make the url we will download char ubuf[2048]; sprintf ( ubuf , uf , ebuf.getBufStart() ); // log it log("inject: SCRAPING %s",ubuf); SpiderRequest sreq; sreq.reset(); // set the SpiderRequest strcpy(sreq.m_url, ubuf); // . tell it to only add the hosts of each outlink for now! // . that will be passed on to when XmlDoc calls Links::set() i guess // . xd will not reschedule the scraped url into spiderdb either sreq.m_isScraping = 1; sreq.m_fakeFirstIp = 1; long firstIp = hash32n(ubuf); if ( firstIp == 0 || firstIp == -1 ) firstIp = 1; sreq.m_firstIp = firstIp; // parent docid is 0 sreq.setKey(firstIp,0LL,false); // forceDEl = false, niceness = 0 m_xd.set4 ( &sreq , NULL , m_coll , NULL , 0 ); //m_xd.m_isScraping = true; // download without throttling //m_xd.m_throttleDownload = false; // disregard this m_xd.m_useRobotsTxt = false; // this will tell it to index ahrefs first before indexing // the doc. but do NOT do this if we are from ahrefs.com // ourselves to avoid recursive explosion!! if ( m_useAhrefs ) m_xd.m_useAhrefs = true; m_xd.m_reallyInjectLinks = m_injectLinks; // // rather than just add the links of the page to spiderdb, // let's inject them! // m_xd.setCallback ( this , doneInjectingLinksWrapper ); // niceness is 0 m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab2"); // do we actually inject the links, or just scrape? if ( ! m_xd.injectLinks ( &m_linkDedupTable , NULL, this , doneInjectingLinksWrapper ) ) return false; // otherwise, just download the google/bing search results so we // can display them in xml //else if ( m_xd.getUtf8Content() == (char **)-1 ) // return false; // print reply.. //printReply(); return true; }
bool qaspider1 ( ) { // // delete the 'qatest123' collection // //static bool s_x1 = false; if ( ! s_flags[0] ) { s_flags[0] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } // // add the 'qatest123' collection // //static bool s_x2 = false; if ( ! s_flags[1] ) { s_flags[1] = true; if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" , // checksum of reply expected 238170006 ) ) return false; } // restrict hopcount to 0 or 1 in url filters so we do not spider // too deep //static bool s_z1 = false; if ( ! s_flags[2] ) { s_flags[2] = true; SafeBuf sb; sb.safePrintf("&c=qatest123&" // make it the custom filter "ufp=0&" "fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&" // take out hopcount for now, just test quotas // "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&" // just one spider out allowed for consistency "fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&" "fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&" ); if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) ) return false; } // set the site list to // a few sites //static bool s_z2 = false; if ( ! s_flags[3] ) { s_flags[3] = true; SafeBuf sb; sb.safePrintf("&c=qatest123&format=xml&sitelist="); sb.urlEncode("tag:shallow site:www.walmart.com\r\n" "tag:shallow site:http://www.ibm.com/\r\n"); sb.nullTerm(); if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) ) return false; } // // use the add url interface now // walmart.com above was not seeded because of the site: directive // so this will seed it. // //static bool s_y2 = false; if ( ! s_flags[4] ) { s_flags[4] = true; SafeBuf sb; // delim=+++URL: sb.safePrintf("&c=qatest123" "&format=json" "&strip=1" "&spiderlinks=1" "&urls=www.walmart.com+ibm.com" ); // . now a list of websites we want to spider // . the space is already encoded as + //sb.urlEncode(s_urls1); if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) ) return false; } // // wait for spidering to stop // checkagain: // wait until spider finishes. check the spider status page // in json to see when completed //static bool s_k1 = false; if ( ! s_flags[5] ) { // wait 5 seconds, call sleep timer... then call qatest() //usleep(5000000); // 5 seconds wait(3.0); s_flags[5] = true; return false; } if ( ! s_flags[15] ) { s_flags[15] = true; if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) ) return false; } //static bool s_k2 = false; if ( ! s_flags[6] ) { // ensure spiders are done. // "Nothing currently available to spider" if ( s_content&&!strstr(s_content,"Nothing currently avail")){ s_flags[5] = false; s_flags[15] = false; goto checkagain; } s_flags[6] = true; } // wait for index msg4 to not be cached to ensure all results indexed if ( ! s_flags[22] ) { s_flags[22] = true; wait(1.5); } // verify no results for gbhopcount:2 query //static bool s_y4 = false; if ( ! s_flags[7] ) { s_flags[7] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=gbhopcount%3A2", -1672870556 ) ) return false; } // but some for gbhopcount:0 query //static bool s_t0 = false; if ( ! s_flags[8] ) { s_flags[8] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=gbhopcount%3A0", 908338607 ) ) return false; } // check facet sections query for walmart //static bool s_y5 = false; if ( ! s_flags[9] ) { s_flags[9] = true; if ( ! getUrl ( "/search?c=qatest123&format=json&stream=1&" "q=gbfacetstr%3Agbxpathsitehash2492664135", 55157060 ) ) return false; } //static bool s_y6 = false; if ( ! s_flags[10] ) { s_flags[10] = true; if ( ! getUrl ( "/get?page=4&q=gbfacetstr:gbxpathsitehash2492664135&qlang=xx&c=qatest123&d=9861563119&cnsp=0" , 999 ) ) return false; } // in xml //static bool s_y7 = false; if ( ! s_flags[11] ) { s_flags[11] = true; if ( ! getUrl ( "/get?xml=1&page=4&q=gbfacetstr:gbxpathsitehash2492664135&qlang=xx&c=qatest123&d=9861563119&cnsp=0" , 999 ) ) return false; } // and json //static bool s_y8 = false; if ( ! s_flags[12] ) { s_flags[12] = true; if ( ! getUrl ( "/get?json=1&page=4&q=gbfacetstr:gbxpathsitehash2492664135&qlang=xx&c=qatest123&d=9861563119&cnsp=0" , 999 ) ) return false; } // delete the collection //static bool s_fee = false; // if ( ! s_flags[13] ) { // s_flags[13] = true; // if ( ! getUrl ( "/admin/delcoll?delcoll=qatest123" ) ) // return false; // } if ( ! s_flags[17] ) { s_flags[17] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=site2%3Awww.walmart.com+" "gbsortby%3Agbspiderdate", 999 ) ) return false; } // xpath is like a title here i think. check the returned // facet table in the left column if ( ! s_flags[18] ) { s_flags[18] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=html&" "q=gbfacetstr%3Agbxpathsitehash3624590799" , 999 ) ) return false; } if ( ! s_flags[19] ) { s_flags[19] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&xml=1&" "q=gbfacetint%3Agbhopcount" , 999 ) ) return false; } if ( ! s_flags[20] ) { s_flags[20] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&json=1&" "q=gbfacetint%3Alog.score" , 999 ) ) return false; } if ( ! s_flags[21] ) { s_flags[21] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&xml=1&" "q=gbfacetfloat%3Atalks.rating" , 999 ) ) return false; } if ( ! s_flags[23] ) { s_flags[23] = true; // test facets mixed with gigabits in left hand column if ( ! getUrl ( "/search?c=qatest123&qa=1&html=1&" "q=gbfacetint%3Agbhopcount+walmart" , 999 ) ) return false; } //static bool s_fee2 = false; if ( ! s_flags[14] ) { s_flags[14] = true; log("qa: SUCCESSFULLY COMPLETED " "QA SPIDER1 TEST"); return true; } return true; }
bool qainject2 ( ) { //if ( ! s_callback ) s_callback = qainject2; // // delete the 'qatest123' collection // //static bool s_x1 = false; if ( ! s_flags[0] ) { s_flags[0] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } // // add the 'qatest123' collection // //static bool s_x2 = false; if ( ! s_flags[1] ) { s_flags[1] = true; if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" , // checksum of reply expected 238170006 ) ) return false; } // // try delimeter based injecting // //static bool s_y2 = false; if ( ! s_flags[7] ) { s_flags[7] = true; SafeBuf sb; // delim=+++URL: sb.safePrintf("&c=qatest123&deleteurl=0&" "delim=%%2B%%2B%%2BURL%%3A&format=xml&u=xyz.com&" "hasmime=1&content="); // use injectme3 file SafeBuf ubuf; ubuf.load("./injectme3"); sb.urlEncode(ubuf.getBufStart()); if ( ! getUrl ( "/admin/inject", // check reply, seems to have only a single // docid in it -1970198487, sb.getBufStart()) ) return false; } // now query check //static bool s_y4 = false; if ( ! s_flags[8] ) { wait(1.5); s_flags[8] = true; return false; } if ( ! s_flags[14] ) { s_flags[14] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe", -1804253505 ) ) return false; } //static bool s_y5 = false; if ( ! s_flags[9] ) { s_flags[9] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports" "+news&ns=1&tml=20&smxcpl=30&" "sw=10&showimages=1" ,-1874756636 ) ) return false; } //static bool s_y6 = false; if ( ! s_flags[10] ) { s_flags[10] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports" "+news&ns=1&tml=20&smxcpl=30&" "sw=10&showimages=0&hacr=1" ,1651330319 ) ) return false; } //static bool s_y7 = false; if ( ! s_flags[11] ) { s_flags[11] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports" "+news&ns=1&tml=20&smxcpl=30&" "sw=10&showimages=0&sc=1" ,-1405546537 ) ) return false; } // // delete the 'qatest123' collection // if ( ! s_flags[12] ) { s_flags[12] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } //static bool s_fee2 = false; if ( ! s_flags[13] ) { s_flags[13] = true; log("qa: SUCCESSFULLY COMPLETED " "QA INJECT TEST 2"); //if ( s_callback == qainject ) exit(0); return true; } return true; }
// // the injection qa test suite // bool qainject1 ( ) { //if ( ! s_callback ) s_callback = qainject1; // // delete the 'qatest123' collection // //static bool s_x1 = false; if ( ! s_flags[0] ) { s_flags[0] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } // // add the 'qatest123' collection // //static bool s_x2 = false; if ( ! s_flags[1] ) { s_flags[1] = true; if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" , // checksum of reply expected 238170006 ) ) return false; } // this only loads once loadUrls(); long max = s_ubuf2.length()/(long)sizeof(char *); //max = 1; // // inject urls, return false if not done yet // //static bool s_x4 = false; if ( ! s_flags[2] ) { // TODO: try delimeter based injection too //static long s_ii = 0; for ( ; s_flags[20] < max ; ) { // inject using html api SafeBuf sb; sb.safePrintf("&c=qatest123&deleteurl=0&" "format=xml&u="); sb.urlEncode ( s_urlPtrs[s_flags[20]] ); // the content sb.safePrintf("&hasmime=1"); // sanity //if ( strstr(s_urlPtrs[s_flags[20]],"wdc.htm") ) // log("hey"); sb.safePrintf("&content="); sb.urlEncode(s_contentPtrs[s_flags[20]] ); sb.nullTerm(); // pre-inc it in case getUrl() blocks s_flags[20]++;//ii++; if ( ! getUrl("/admin/inject", 0, // no idea what crc to expect sb.getBufStart()) ) return false; } s_flags[2] = true; } // +the //static bool s_x5 = false; if ( ! s_flags[3] ) { wait(1.5); s_flags[3] = true; return false; } if ( ! s_flags[16] ) { s_flags[16] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe", 702467314 ) ) return false; } // sports news //static bool s_x7 = false; if ( ! s_flags[4] ) { s_flags[4] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=sports+news",2009472889 ) ) return false; } // 'washer & dryer' does some algorithmic synonyms 'washer and dryer' if ( ! s_flags[15] ) { s_flags[15] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "debug=1&q=washer+%26+dryer",9999 ) ) return false; } // // mdw: query reindex test // // if ( ! s_flags[30] ) { // s_flags[30] = true; // if ( ! getUrl ( "/admin/reindex?c=qatest123&qa=1&format=xml&" // "debug=1&q=sports",9999 ) ) // return false; // } // // temp end it here // return true; // // eject/delete the urls // //static long s_ii2 = 0; for ( ; s_flags[5] < max ; ) { // reject using html api SafeBuf sb; sb.safePrintf( "/admin/inject?c=qatest123&deleteurl=1&" "format=xml&u="); sb.urlEncode ( s_urlPtrs[s_flags[5]] ); sb.nullTerm(); // pre-inc it in case getUrl() blocks //s_ii2++; s_flags[5]++; if ( ! getUrl ( sb.getBufStart() , 0 ) ) return false; } // // make sure no results left, +the // if ( ! s_flags[6] ) { wait(1.5); s_flags[6] = true; return false; } if ( ! s_flags[14] ) { s_flags[14] = true; if ( ! getUrl ( "/search?c=qatest123&qa=2&format=xml&q=%2Bthe", -1672870556 ) ) return false; } //static bool s_fee2 = false; if ( ! s_flags[13] ) { s_flags[13] = true; log("qa: SUCCESSFULLY COMPLETED " "QA INJECT TEST 1"); //if ( s_callback == qainject ) exit(0); return true; } return true; }
bool qajson ( ) { // // delete the 'qatest123' collection // //static bool s_x1 = false; if ( ! s_flags[0] ) { s_flags[0] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } // // add the 'qatest123' collection // //static bool s_x2 = false; if ( ! s_flags[1] ) { s_flags[1] = true; if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" , // checksum of reply expected 238170006 ) ) return false; } // add the 50 urls if ( ! s_flags[3] ) { s_flags[3] = true; SafeBuf sb; sb.safePrintf("&c=qatest123" "&format=json" "&strip=1" "&spiderlinks=0" "&urls="//www.walmart.com+ibm.com" ); sb.urlEncode ( s_ubuf4 ); // . now a list of websites we want to spider // . the space is already encoded as + if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) ) return false; } // // wait for spidering to stop // checkagain: // wait until spider finishes. check the spider status page // in json to see when completed //static bool s_k1 = false; if ( ! s_flags[5] ) { // wait 5 seconds, call sleep timer... then call qatest() //usleep(5000000); // 5 seconds wait(3.0); s_flags[5] = true; return false; } if ( ! s_flags[15] ) { s_flags[15] = true; if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) ) return false; } //static bool s_k2 = false; if ( ! s_flags[6] ) { // ensure spiders are done. // "Nothing currently available to spider" if ( s_content&&!strstr(s_content,"Nothing currently avail")){ s_flags[5] = false; s_flags[15] = false; goto checkagain; } s_flags[6] = true; } if ( ! s_flags[7] ) { s_flags[7] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=type%3Ajson+meta.authors%3Appk", -1310551262 ) ) return false; } if ( ! s_flags[8] ) { s_flags[8] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&n=100&" "q=type%3Ajson", -1310551262 ) ) return false; } if ( ! s_flags[9] ) { s_flags[9] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfacetstr%3Ameta.authors", -1310551262 ) ) return false; } if ( ! s_flags[10] ) { s_flags[10] = true; // this has > 50 values for the facet field hash if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfacetstr%3Astrings.key", -1310551262 ) ) return false; } // other query tests... if ( ! s_flags[12] ) { s_flags[12] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=inurl2%3Aquirksmode.org%2Fm%2F", -1310551262 ) ) return false; } if ( ! s_flags[13] ) { s_flags[13] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=site%3Aquirksmode.org", -1310551262 ) ) return false; } // test gbfieldmatch:field:"quoted value" query to ensure it converts // the quoted value into the right int32 if ( ! s_flags[14] ) { s_flags[14] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key" "%3Ainvestigate-tweet", -1310551262 ) ) return false; } if ( ! s_flags[15] ) { s_flags[15] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key" "%3A\"Maemo+Browser\"", -1310551262 ) ) return false; } if ( ! s_flags[16] ) { s_flags[16] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key" "%3A\"Google+Wireless+Transcoder\"", -1310551262 ) ) return false; } // this should have no results, not capitalized if ( ! s_flags[17] ) { s_flags[17] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key%3A\"samsung\"", -1310551262 ) ) return false; } if ( ! s_flags[18] ) { s_flags[18] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key%3ASamsung", -1310551262 ) ) return false; } if ( ! s_flags[18] ) { s_flags[18] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&" "q=gbfieldmatch%3Astrings.key%3A\"Samsung\"", -1310551262 ) ) return false; } //static bool s_fee2 = false; if ( ! s_flags[20] ) { s_flags[20] = true; log("qa: SUCCESSFULLY COMPLETED " "QA JSON TEST"); return true; } return true; }
bool qaspider2 ( ) { // // delete the 'qatest123' collection // //static bool s_x1 = false; if ( ! s_flags[0] ) { s_flags[0] = true; if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) ) return false; } // // add the 'qatest123' collection // //static bool s_x2 = false; if ( ! s_flags[1] ) { s_flags[1] = true; if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" , // checksum of reply expected 238170006 ) ) return false; } // restrict hopcount to 0 or 1 in url filters so we do not spider // too deep //static bool s_z1 = false; if ( ! s_flags[2] ) { s_flags[2] = true; SafeBuf sb; sb.safePrintf("&c=qatest123&" // make it the custom filter "ufp=0&" "fe=%%21ismanualadd+%%26%%26+%%21insitelist&hspl=0&hspl=1&fsf=0.000000&mspr=0&mspi=1&xg=1000&fsp=-3&" // take out hopcount for now, just test quotas // "fe1=tag%%3Ashallow+%%26%%26+hopcount%%3C%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=3&" // sitepages is a little fuzzy so take it // out for this test and use hopcount!!! //"fe1=tag%%3Ashallow+%%26%%26+sitepages%%3C%%3D20&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&" "fe1=tag%%3Ashallow+%%26%%26+hopcount<%%3D1&hspl1=0&hspl1=1&fsf1=1.000000&mspr1=1&mspi1=1&xg1=1000&fsp1=45&" "fe2=default&hspl2=0&hspl2=1&fsf2=1.000000&mspr2=0&mspi2=1&xg2=1000&fsp2=45&" ); if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) ) return false; } // set the site list to // a few sites // these should auto seed so no need to use addurl //static bool s_z2 = false; if ( ! s_flags[3] ) { s_flags[3] = true; SafeBuf sb; sb.safePrintf("&c=qatest123&format=xml&sitelist="); sb.urlEncode(//walmart has too many pages at depth 1, so remove it //"tag:shallow www.walmart.com\r\n" "tag:shallow http://www.ibm.com/\r\n"); sb.nullTerm(); if ( ! getUrl ("/admin/settings",0,sb.getBufStart() ) ) return false; } // // wait for spidering to stop // checkagain: // wait until spider finishes. check the spider status page // in json to see when completed //static bool s_k1 = false; if ( ! s_flags[4] ) { //usleep(5000000); // 5 seconds s_flags[4] = true; wait(3.0); return false; } if ( ! s_flags[14] ) { s_flags[14] = true; if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) ) return false; } //static bool s_k2 = false; if ( ! s_flags[5] ) { // ensure spiders are done. // "Nothing currently available to spider" if ( s_content&&!strstr(s_content,"Nothing currently avail")){ s_flags[4] = false; s_flags[14] = false; goto checkagain; } s_flags[5] = true; } // verify no results for gbhopcount:2 query //static bool s_y4 = false; if ( ! s_flags[6] ) { s_flags[6] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&" "q=gbhopcount%3A2", -1310551262 ) ) return false; } // but some for gbhopcount:0 query //static bool s_t0 = false; if ( ! s_flags[7] ) { s_flags[7] = true; if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&n=500&" "q=gbhopcount%3A0", 999 ) ) return false; } // check facet sections query for walmart //static bool s_y5 = false; if ( ! s_flags[8] ) { s_flags[8] = true; if ( ! getUrl ( "/search?c=qatest123&format=json&stream=0&" "q=gbfacetstr%3Agbxpathsitehash3311332088", 999 ) ) return false; } // wait for some reason if ( ! s_flags[15] ) { s_flags[15] = true; wait(1.5); return false; } //static bool s_y6 = false; if ( ! s_flags[9] ) { s_flags[9] = true; if ( ! getUrl ( "/get?page=4&q=gbfacetstr:gbxpathsitehash3311332088&qlang=xx&c=qatest123&d=9577169402&cnsp=0" , 999 ) ) return false; } // in xml //static bool s_y7 = false; if ( ! s_flags[10] ) { s_flags[10] = true; if ( ! getUrl ( "/get?xml=1&page=4&q=gbfacetstr:gbxpathsitehash2492664135&qlang=xx&c=qatest123&d=9577169402&cnsp=0" , 999 ) ) return false; } // and json //static bool s_y8 = false; if ( ! s_flags[11] ) { s_flags[11] = true; if ( ! getUrl ( "/get?json=1&page=4&q=gbfacetstr:gbxpathsitehash2492664135&qlang=xx&c=qatest123&d=9577169402&cnsp=0" , 999 ) ) return false; } // delete the collection //static bool s_fee = false; // if ( ! s_flags[12] ) { // s_flags[12] = true; // if ( ! getUrl ( "/admin/delcoll?delcoll=qatest123" ) ) // return false; // } //static bool s_fee2 = false; if ( ! s_flags[13] ) { s_flags[13] = true; log("qa: SUCCESSFULLY COMPLETED " "QA SPIDER2 TEST"); return true; } return true; }