// . returns false if blocked, true otherwise // . sets g_errno on error // . "termIds/termFreqs" should NOT be on the stack in case we block // . i based this on ../titled/Msg25.cpp since it sends out multiple msgs at // the same time, too bool Msg37::getTermFreqs ( collnum_t collnum,//char *coll , long maxAge , long long *termIds , long numTerms , long long *termFreqs , void *state , void (* callback)(void *state ) , long niceness , bool exactCount ) { // warning if ( collnum < 0 ) log(LOG_LOGIC,"net: bad collection. msg37."); // we haven't got any responses as of yet or sent any requests m_callback = callback; m_state = state; m_exactCount = exactCount; m_niceness = niceness; m_numReplies = 0; m_numRequests = 0; m_errno = 0; m_numTerms = numTerms; m_termFreqs = termFreqs; m_collnum = collnum; //m_coll = coll; m_maxAge = maxAge; m_termIds = termIds; // set all to 1 in case there's an error for ( long i = 0 ; i < m_numTerms ; i++ ) { //if ( ignore[i] ) m_termFreqs[i] = 0LL; //else m_termFreqs[i] = 1LL; m_termFreqs[i] = 1LL; } // reset m_i = 0; memset ( m_inUse , 0 , MAX_MSG36_OUT ); // launch the requests if ( ! launchRequests() ) return false; // set our array gotTermFreq ( NULL ); // we did not block, return true return true; }
// . returns false if blocked, returns true otherwise // . sets g_errno on error bool Images::getThumbnail ( char *pageSite , long siteLen , long long docId , XmlDoc *xd , collnum_t collnum,//char *coll , //char **statusPtr , long hopCount, void *state , void (*callback)(void *state) ) { // sanity check if ( ! m_setCalled ) { char *xx=NULL;*xx=0; } // we haven't had any error m_hadError = 0; // no reason to stop yet m_stopDownloading = false; // reset here now m_i = 0; m_j = 0; m_phase = 0; // sanity check if ( ! m_pageUrl ) { char *xx=NULL;*xx=0; } // sanity check if ( ! pageSite ) { char *xx=NULL;*xx=0; } // we need to be a permalink //if ( ! isPermalink ) return true; // save these //m_statusPtr = statusPtr; // save this m_collnum = collnum; m_docId = docId; m_callback = callback; m_state = state; // if this doc is a json diffbot reply it already has the primary // image selected so just use that m_xd = xd; if ( m_xd->m_isDiffbotJSONObject ) return downloadImages(); // if no candidates, we are done, no error if ( m_numImages == 0 ) return true; //Vector *v = xd->getTagVector(); // this will at least have one component, the 0/NULL component uint32_t *tph = xd->getTagPairHash32(); // must not block or error on us if ( tph == (void *)-1 ) { char *xx=NULL;*xx=0; } // must not error on use? if ( ! tph ) { char *xx=NULL;*xx=0; } // . see DupDetector.cpp, very similar to this // . see how many pages we have from our same site with our same // html template (and that are permalinks) char buf[2000]; char c = pageSite[siteLen]; pageSite[siteLen]=0; // site MUST NOT start with "http://" if ( strncmp ( pageSite , "http://", 7)==0){char*xx=NULL;*xx=0;} // this must match what we hash in XmlDoc::hashNoSplit() sprintf ( buf , "gbsitetemplate:%lu%s", (unsigned long)*tph,pageSite ); pageSite[siteLen]=c; // TODO: make sure this is a no-split termid storage thingy // in Msg14.cpp Query q; if ( ! q.set2 ( buf , langUnknown , false ) ) // return true with g_errno set on error return true; // store the termid long long termId = q.getTermId(0); key144_t startKey ; key144_t endKey ; g_posdb.makeStartKey(&startKey,termId); g_posdb.makeEndKey (&endKey ,termId); // get shard of that (this termlist is sharded by termid - // see XmlDoc.cpp::hashNoSplit() where it hashes gbsitetemplate: term) long shardNum = g_hostdb.getShardNumByTermId ( &startKey ); // if ( ! m_msg36.getTermFreq ( m_collnum , // 0 , // maxAge // termId , // this , // gotTermFreqWrapper , // MAX_NICENESS , // true , // exact count? // false , // inc count? // false , // dec count? // false )) // is split? // return false; // just use msg0 and limit to like 1k or something if ( ! m_msg0.getList ( -1 , // hostid -1 , // ip -1 , // port 0 , // maxAge false , // addToCache? RDB_POSDB , m_collnum , &m_list , // RdbList ptr (char *)&startKey , (char *)&endKey , 1024 , // minRecSize this , gotTermListWrapper , MAX_NICENESS , false , // err correction? true , // inc tree? true , // domergeobsolete -1 , // firstHostId 0 , // start filenum -1 , // numFiles 30 , // timeout -1 , // syncpoint -1 , // preferlocalreads NULL , // msg5 NULL , // msg5b false , // isRealMerge? true , // allow pg cache false , // focelocalindexdb false , // doIndexdbSplit? shardNum ))// force paritysplit return false; // did not block return gotTermFreq(); }
// . returns false if blocked, true otherwise // . sets g_errno on error // . make a web page displaying the config of this host // . call g_httpServer.sendDynamicPage() to send it bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) { // . get fields from cgi field of the requested url // . get the search query long queryLen = 0; char *query = r->getString ( "q" , &queryLen , NULL /*default*/); // ensure query not too big if ( queryLen >= MAX_QUERY_LEN ) { g_errno = EQUERYTOOBIG; return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // get the collection long collLen = 0; char *coll = r->getString("c",&collLen); if ( ! coll || ! coll[0] ) { //coll = g_conf.m_defaultColl; coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() ); collLen = gbstrlen(coll); } // ensure collection not too big if ( collLen >= MAX_COLL_LEN ) { g_errno = ECOLLTOOBIG; return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // make a state State10 *st ; try { st = new (State10); } catch ( ... ) { g_errno = ENOMEM; log("PageIndexdb: new(%i): %s", sizeof(State10),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( st , sizeof(State10) , "PageIndexdb" ); // password, too long pwdLen = 0 ; char *pwd = r->getString ( "pwd" , &pwdLen ); if ( pwdLen > 31 ) pwdLen = 31; if ( pwdLen > 0 ) strncpy ( st->m_pwd , pwd , pwdLen ); st->m_pwd[pwdLen]='\0'; // get # of records to retreive from IndexList st->m_numRecs = r->getLong ( "numRecs" , 100 ); // use disk, tree, or cache? st->m_useDisk = r->getLong ("ud" , 0 ); st->m_useTree = r->getLong ("ut" , 0 ); st->m_useCache = r->getLong ("uc" , 0 ); st->m_useDatedb= r->getLong ("ub" , 0 ); st->m_add = r->getLong ("add", 0 ); st->m_del = r->getLong ("del", 0 ); // get the termId, if any, from the cgi vars st->m_termId = r->getLongLong ("t", 0LL ) ; // get docid and score st->m_docId = r->getLongLong ("d", 0LL ); st->m_score = r->getLong ("score", 0 ); // copy query/collection memcpy ( st->m_query , query , queryLen ); st->m_queryLen = queryLen; st->m_query [ queryLen ] ='\0'; //memcpy ( st->m_coll , coll , collLen ); //st->m_collLen = collLen; //st->m_coll [ collLen ] ='\0'; st->m_coll = coll; // save the TcpSocket st->m_socket = s; // and if the request is local/internal or not st->m_isAdmin = g_collectiondb.isAdmin ( r , s ); st->m_isLocal = r->isLocal(); st->m_r.copy ( r ); // . check for add/delete request if ( st->m_add || st->m_del ) { key_t startKey = g_indexdb.makeStartKey ( st->m_termId ); key_t endKey = g_indexdb.makeEndKey ( st->m_termId ); // construct the key to add/delete st->m_key = g_indexdb.makeKey ( st->m_termId, st->m_score , st->m_docId , st->m_del ); // make an RdbList out of the key st->m_keyList.set ( (char*)&st->m_key, sizeof(key_t), (char*)&st->m_key, sizeof(key_t), startKey, endKey, 0, false, true ); log ( LOG_INFO, "build: adding indexdb key to indexdb: " "%lx %llx", st->m_key.n1, st->m_key.n0 ); // call msg1 to add/delete key if ( ! st->m_msg1.addList ( &st->m_keyList, RDB_INDEXDB, st->m_coll, st, addedKeyWrapper, false, MAX_NICENESS ) ) return false; // continue to page if no block return gotIndexList ( st ); } if ( ! st->m_query[0] ) return gotIndexList(st); // . set query class // . a boolFlag of 0 means query is not boolean Query q; q.set2 ( query , langUnknown , true ); // 0 = boolFlag, not boolean! // reset st->m_msg36.m_termFreq = 0LL; // if query was provided, use that, otherwise use termId if ( q.getNumTerms() > 0 ) st->m_termId = q.getTermId(0); // skip if nothing else return gotTermFreq ( st ); // get the termfreq of this term! if ( ! st->m_msg36.getTermFreq ( coll , 0 , st->m_termId, st , gotTermFreqWrapper ) ) return false; // otherwise, we didn't block return gotTermFreq ( st ); }
void gotTermFreqWrapper ( void *state ) { gotTermFreq( (State10 *) state ); }