// . just keep it simple and store gbunturked for each page that has // unturked events. // . have it store gbsemiturked if similar pages from its site have been turked // . have it store gbturked if it has been turked itself // . if page events change then we must re-turk it! // . just grabbing the pages based on docid should make us turk the // most popular sites first on avg... bool sendPageTurkEval ( State60 *st ) { // if we already have a docid, the display the editor page if ( st->m_docId ) return displayEditorPage ( st ); // . otherwise, do a search to get the best page int64_t termId = hash64n("gbunturked"); // event id range is forced to 1 to 1 for this one since special key128_t startKey = g_datedb.makeStartKey (termId); key128_t endKey = g_datedb.makeEndKey (termId); // get all the transactions associated with this username if ( ! st->m_msg0.getList ( -1 , // hostId -1 , // ip -1 , // port 0 , // maxAge , false , // addToCache? RDB_DATEDB , // rdbId st->m_coll , // coll &st->m_list , &startKey , &endKey , 200000 , // minRecSizes (get all!) st , // state gotDatedbListWrapper , 0 )) // niceness // return false if that blocked on us return false; // error? if ( g_errno ) { log("turk: error reading datedb for docs to turk: %s", mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerrno(g_errno)); } // got the list. this should send a page back to httpserver gotDatedbList ( st ); }
// . returns false if blocked and callback will be called, true otherwise // . sets g_errno on error bool Msg7::inject ( void *state , void (*callback)(void *state) //long spiderLinksDefault , //char *collOveride ) { ) { GigablastRequest *gr = &m_gr; char *coll2 = gr->m_coll; CollectionRec *cr = g_collectiondb.getRec ( coll2 ); if ( ! cr ) { g_errno = ENOCOLLREC; return true; } m_state = state; m_callback = callback; // shortcut XmlDoc *xd = &m_xd; if ( ! gr->m_url ) { log("inject: no url provied to inject"); g_errno = EBADURL; return true; } //char *coll = cr->m_coll; // test //diffbotReply = "{\"request\":{\"pageUrl\":\"http://www.washingtonpost.com/2011/03/10/ABe7RaQ_moreresults.html\",\"api\":\"article\",\"version\":3},\"objects\":[{\"icon\":\"http://www.washingtonpost.com/favicon.ico\",\"text\":\"In Case You Missed It\nWeb Hostess Live: The latest from the Web (vForum, May 15, 2014; 3:05 PM)\nGot Plans: Advice from the Going Out Guide (vForum, May 15, 2014; 2:05 PM)\nWhat to Watch: TV chat with Hank Stuever (vForum, May 15, 2014; 1:10 PM)\nColor of Money Live (vForum, May 15, 2014; 1:05 PM)\nWeb Hostess Live: The latest from the Web (vForum, May 15, 2014; 12:25 PM)\nMichael Devine outdoor entertaining and design | Home Front (vForum, May 15, 2014; 12:20 PM)\nThe Answer Sheet: Education chat with Valerie Strauss (vForum, May 14, 2014; 2:00 PM)\nThe Reliable Source Live (vForum, May 14, 2014; 1:05 PM)\nAsk Tom: Rants, raves and questions on the DC dining scene (vForum, May 14, 2014; 12:15 PM)\nOn Parenting with Meghan Leahy (vForum, May 14, 2014; 12:10 PM)\nAsk Aaron: The week in politics (vForum, May 13, 2014; 3:05 PM)\nEugene Robinson Live (vForum, May 13, 2014; 2:05 PM)\nTuesdays with Moron: Chatological Humor Update (vForum, May 13, 2014; 12:00 PM)\nComPost Live with Alexandra Petri (vForum, May 13, 2014; 11:05 AM)\nAsk Boswell: Redskins, Nationals and Washington sports (vForum, May 12, 2014; 1:50 PM)\nAdvice from Slate's 'Dear Prudence' (vForum, May 12, 2014; 1:40 PM)\nDr. Gridlock (vForum, May 12, 2014; 1:35 PM)\nSwitchback: Talking Tech (vForum, May 9, 2014; 12:05 PM)\nThe Fix Live (vForum, May 9, 2014; 12:00 PM)\nWhat to Watch: TV chat with Hank Stuever (vForum, May 8, 2014; 1:10 PM)\nMore News\",\"title\":\"The Washington Post\",\"diffbotUri\":\"article|3|828850106\",\"pageUrl\":\"http://www.washingtonpost.com/2011/03/10/ABe7RaQ_moreresults.html\",\"humanLanguage\":\"en\",\"html\":\"<p>In Case You Missed It<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/web-hostess-140515-new.html\\\">Web Hostess Live: The latest from the Web<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 3:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/got-plans-05-15-2014.html\\\">Got Plans: Advice from the Going Out Guide<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 2:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/tv-chat-140515.html\\\">What to Watch: TV chat with Hank Stuever<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 1:10 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/color-of-money-live-20140515.html\\\">Color of Money Live<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 1:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/web-hostess-140515-new.html\\\">Web Hostess Live: The latest from the Web<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 12:25 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/home-front-0515.html\\\">Michael Devine outdoor entertaining and design | Home Front<\\/a> <\\/p>\n<p>(vForum, May 15, 2014; 12:20 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/the-answer-sheet-20140514.html\\\">The Answer Sheet: Education chat with Valerie Strauss<\\/a> <\\/p>\n<p>(vForum, May 14, 2014; 2:00 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/the-reliable-source-140514-new.html\\\">The Reliable Source Live<\\/a> <\\/p>\n<p>(vForum, May 14, 2014; 1:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/ask-tom-5-14-14.html\\\">Ask Tom: Rants, raves and questions on the DC dining scene <\\/a> <\\/p>\n<p>(vForum, May 14, 2014; 12:15 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/parenting-0514.html\\\">On Parenting with Meghan Leahy<\\/a> <\\/p>\n<p>(vForum, May 14, 2014; 12:10 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/post-politics-ask-aaron-051313.html\\\">Ask Aaron: The week in politics<\\/a> <\\/p>\n<p>(vForum, May 13, 2014; 3:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/opinion-focus-with-eugene-robinson-20140513.html\\\">Eugene Robinson Live<\\/a> <\\/p>\n<p>(vForum, May 13, 2014; 2:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/gene-weingarten-140513.html\\\">Tuesdays with Moron: Chatological Humor Update<\\/a> <\\/p>\n<p>(vForum, May 13, 2014; 12:00 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/compost-live-140513.html\\\">ComPost Live with Alexandra Petri<\\/a> <\\/p>\n<p>(vForum, May 13, 2014; 11:05 AM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/ask-boswell-1400512.html\\\">Ask Boswell: Redskins, Nationals and Washington sports<\\/a> <\\/p>\n<p>(vForum, May 12, 2014; 1:50 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/dear-prudence-140512.html\\\">Advice from Slate's 'Dear Prudence'<\\/a> <\\/p>\n<p>(vForum, May 12, 2014; 1:40 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/gridlock-0512.html\\\">Dr. Gridlock <\\/a> <\\/p>\n<p>(vForum, May 12, 2014; 1:35 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/switchback-20140509.html\\\">Switchback: Talking Tech<\\/a> <\\/p>\n<p>(vForum, May 9, 2014; 12:05 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/live-fix-140509.html\\\">The Fix Live<\\/a> <\\/p>\n<p>(vForum, May 9, 2014; 12:00 PM)<\\/p>\n<p> <a href=\\\"http://live.washingtonpost.com/tv-chat-140508.html\\\">What to Watch: TV chat with Hank Stuever<\\/a> <\\/p>\n<p>(vForum, May 8, 2014; 1:10 PM)<\\/p>\n<p> <a href=\\\"http://www.washingtonpost.com/2011/03/10/ /2011/03/10/ABe7RaQ_moreresults.html ?startIndex=20&dwxLoid=\\\">More News <\\/a> <\\/p>\",\"date\":\"Tue, 13 May 2014 00:00:00 GMT\",\"type\":\"article\"}]}"; if ( g_repairMode ) { g_errno = EREPAIRING; return true; } // this will be NULL if the "content" was empty or not given char *content = gr->m_content; // . try the uploaded file if nothing in the text area // . this will be NULL if the "content" was empty or not given if ( ! content ) content = gr->m_contentFile; if ( m_firstTime ) { m_firstTime = false; m_start = content; } // save current start since we update it next char *start = m_start; // if this is empty we are done //if ( ! start ) // return true; char *delim = gr->m_contentDelim; if ( delim && ! delim[0] ) delim = NULL; if ( m_fixMe ) { // we had made the first delim char a \0 to index the // previous document, now put it back to what it was *m_start = *delim; // i guess unset this m_fixMe = false; } // if we had a delimeter... if ( delim ) { // we've saved m_start as "start" above, // so find the next delimeter after it and set that to m_start // add +1 to avoid infinite loop m_start = strstr(start+1,delim); // for injecting "start" set this to \0 if ( m_start ) { // null term it *m_start = '\0'; // put back the original char on next round...? m_fixMe = true; } } // this is the url of the injected content m_injectUrlBuf.safeStrcpy ( gr->m_url ); bool modifiedUrl = false; // if we had a delimeter we must make a fake url // if ( delim ) { // // if user had a <url> or <doc> or <docid> field use that // char *hint = strcasestr ( start , "<url>" ); // if ( hint ) { // modifiedUrl = true; // ... // } // } // if we had a delimeter thus denoting multiple items/documents to // be injected, we must create unique urls for each item. if ( delim && ! modifiedUrl ) { // use hash of the content long long ch64 = hash64n ( start , 0LL ); // normalize it Url u; u.set ( gr->m_url ); // reset it m_injectUrlBuf.reset(); // by default append a -<ch64> to the provided url m_injectUrlBuf.safePrintf("%s-%llu",u.getUrl(),ch64); } // count them m_injectCount++; m_inUse = true; if ( ! xd->injectDoc ( m_injectUrlBuf.getBufStart() , cr , start , // content , gr->m_diffbotReply, gr->m_hasMime, // content starts with http mime? gr->m_hopCount, gr->m_charset, gr->m_deleteUrl, gr->m_contentTypeStr, // text/html text/xml gr->m_spiderLinks , gr->m_newOnly, // index iff new this , doneInjectingWrapper9 ) ) // we blocked... return false; m_inUse = false; return true; }