int antLoop(ipc_t ipc, grid_t grid) { char stop; handler_f* handlers; ant_t ant; cmd_t cmd; message_t message, ret; srand(getpid()); ant = antNew(); ant->ahr = ant->r = grid->anthillRow; ant->ahc = ant->c = grid->anthillCol; LOGPID("Starting ant %d logic loop.\n", ipc->id); handlers = buildHandlerArray(); antFillHandlerArray(handlers); sendMessage(ipc, message = mnew(ipc->id, 1, sizeof(struct cmd_start_t), (char*) (cmd = newStart()))); mdel(message); free(cmd); stop = 0; while(!stop) { if (message = recvMessage(ipc)) { LOGPID("Ant %d received cmd type %d.\n", ipc->id, ((cmd_t) mdata(message))->type); if (cmd = dispatchCmd((void*) ant, (cmd_t) mdata(message), handlers)) { mdel(message); sendMessage(ipc, message = mnew(ipc->id, 1, cmdsize(cmd), (char*) cmd)); LOGPID("Ant %d sent cmd type %d.\n", ipc->id, ((cmd_t) mdata(message))->type); mdel(message); free(cmd); } } stop = (ant->state == ANT_STATE_FINAL); } ipc->stop = 1; return 0; }
/* returns 1 if save successful */ int dosave0(boolean emergency) { int fd; struct memfile mf; boolean log_disabled = iflags.disable_log; mnew(&mf, NULL); fd = logfile; /* when we leave via nh_exit, logging is disabled. It needs to be enabled briefly so that log_finish will update the log header. */ iflags.disable_log = FALSE; log_finish(LS_SAVED); iflags.disable_log = log_disabled; vision_recalc(2); /* shut down vision to prevent problems in the event of an impossible() call */ savegame(&mf); store_mf(fd, &mf); /* also frees mf */ freedynamicdata(); return TRUE; }
int main(int argc, char * argv[]){ int cclients = 10; int sentMsg = 0; printf("Server iniciado para %d clientes.\n", cclients); message_t incomingMsg; message_t msgsent; char * word = "Mensaje del Server"; ipc_t server = (ipc_t) fifoServe(cclients); server->stop = 0; while(sentMsg != 1000){ if((incomingMsg = qget(server->inbox)) != NULL){ printf("%d. Server recibio: %s\n", sentMsg, incomingMsg->data); printf("Manda mensaje a:%d.\n", incomingMsg->header.from); qput(server->outbox, (msgsent = mnew(0,incomingMsg->header.from, strlen(word) + 1, word))); mdel(msgsent); mdel(incomingMsg); sentMsg++; } } stopServer(server); }
extern tcvp_pipe_t * s_open_mux(stream_t *s, tcconf_section_t *cs, tcvp_timer_t *t, muxed_stream_t *ms) { mux_new_t mnew = NULL; char *name, *sf; char *m = NULL; if(tcconf_getvalue(cs, "mux/url", "%s", &name) <= 0) return NULL; if((sf = strrchr(name, '.'))){ int i; for(i = 0; i < suffix_map_size; i++){ if(!strcmp(sf, suffix_map[i].suffix)){ m = suffix_map[i].muxer; break; } } } if(m){ char mb[strlen(m) + 5]; sprintf(mb, "mux/%s", m); mnew = tc2_get_symbol(mb, "new"); } free(name); return mnew? mnew(s, cs, t, ms): NULL; }
// . call this when gb startsup // . scan collections to see if any imports were active // . returns false and sets g_errno on failure bool resumeImports ( ) { if ( s_tried ) return true; s_tried = true; if ( g_hostdb.m_hostId != 0 ) return true; for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { CollectionRec *cr = g_collectiondb.m_recs[i]; if ( ! cr ) continue; if ( ! cr->m_importEnabled ) continue; // each import has its own state // it contains a sequence of msg7s to do simulataneous // injections ImportState *is; try { is = new (ImportState); } catch ( ... ) { g_errno = ENOMEM; log("PageInject: new(%li): %s", (long)sizeof(ImportState),mstrerror(g_errno)); return false; } mnew ( is, sizeof(ImportState) , "isstate"); // assign to cr as well cr->m_importState = is; // and collnum is->m_collnum = cr->m_collnum; // resume the import is->importLoop ( ); } return true; }
void handleRequest7 ( UdpSlot *slot , long netnice ) { //m_state = state; //m_callback = callback; // shortcut XmlDoc *xd; try { xd = new (XmlDoc); } catch ( ... ) { g_errno = ENOMEM; log("PageInject: import failed: new(%i): %s", (int)sizeof(XmlDoc),mstrerror(g_errno)); sendReply(slot); return; } mnew ( xd, sizeof(XmlDoc) , "PageInject" ); //xd->reset(); char *titleRec = slot->m_readBuf; long titleRecSize = slot->m_readBufSize; long collnum = *(long *)titleRec; titleRec += 4; titleRecSize -= 4; CollectionRec *cr = g_collectiondb.m_recs[collnum]; if ( ! cr ) { sendReply(slot); return; } // if injecting a titlerec from an import operation use set2() //if ( m_sbuf.length() > 0 ) { xd->set2 ( titleRec,//m_sbuf.getBufStart() , titleRecSize,//m_sbuf.length() , cr->m_coll , NULL, // pbuf MAX_NICENESS , NULL ); // sreq // log it i guess log("inject: importing %s",xd->m_firstUrl.getUrl()); // call this when done indexing //xd->m_masterState = this; //xd->m_masterLoop = doneInjectingWrapper9; xd->m_state = xd;//this; xd->m_callback1 = doneInjectingWrapper10; xd->m_isImporting = true; xd->m_isImportingValid = true; // hack this xd->m_slot = slot; // then index it if ( ! xd->indexDoc() ) // return if would block return; // all done? //return true; sendReply ( slot ); }
bool sendPageTurk ( TcpSocket *s , HttpRequest *r ) { // get the current timestamp int32_t now = getTimeGlobal (); char *coll = r->getString("c"); if ( ! coll ) return g_httpServer.sendErrorReply( s, 500, "No collection"); // make a state for callback State60 *st ; try { st = new ( State60 ); } catch ( ... ) { g_errno = ENOMEM; log( "pgrank: new(%i): %s", sizeof(State60), mstrerror(g_errno) ); return g_httpServer.sendErrorReply(s,500,mstrerrno(g_errno)); } mnew ( st , sizeof(State60) , "PageTurk" ); // get username char *username = r->getStringFromCookie("username", NULL); if ( !username ) username = r->getString("username",NULL); if ( !username ) username = r->getString("user",NULL); if ( !username ) username = r->getString("code",NULL); if ( ! username ) return g_httpServer.sendErrorReply(s,500,"No username"); int32_t ulen = gbsrlen(username); if ( ulen >= MAX_USER_SIZE ) return g_httpServer.sendErrorReply(s,500,"Bad username"); // save crap. don't we need to copy "r" into our own? yeah... st->m_s = s; // save username strcpy(st->m_username,username,ulen+1); // assume no url //st->m_url[0] = 0; // copy coll strcpy(st->m_coll,coll); // this is 1 to imply to edit a page st->m_editMode = r->getString("edit",0); st->m_docId = r->getLongLong("docid",0LL); // get url //char *url = r->getString ("url", NULL); // if no url is given then present their stats page if ( ! edit ) return sendPageTurkStats (st); // copy url //strcpy ( st->m_url , url ); // otherwise, send them the eval page return sendPageTurkEval (st); }
static unsigned int * subf(unsigned int *a, unsigned int *b) { int i, sign = 0; unsigned int c, *x; switch (ucmp(a, b)) { case 0: return mint(0); case 1: sign = MSIGN(a); /* |a| > |b| */ break; case -1: sign = -MSIGN(a); /* |a| < |b| */ x = a; a = b; b = x; break; } x = mnew(MLENGTH(a)); c = 0; for (i = 0; i < MLENGTH(b); i++) { x[i] = a[i] - b[i] - c; if (c) if (a[i] <= x[i]) c = 1; else c = 0; else if (a[i] < x[i]) c = 1; else c = 0; } for (i = MLENGTH(b); i < MLENGTH(a); i++) { x[i] = a[i] - c; if (a[i] < x[i]) c = 1; else c = 0; } for (i = MLENGTH(a) - 1; i > 0; i--) if (x[i]) break; MLENGTH(x) = i + 1; MSIGN(x) = sign; return x; }
void Blaster:: processLogFile(void *state){ // No need to print how many docs processed in log // because this is called at epochs given in the log char *urlStart=(char*)state; if (!urlStart){ log(LOG_WARN,"blaster: got NULL urlStart"); return; } // log(LOG_WARN,"blaster:: Line is %s",urlStart); char tmp[1024]; if (urlStart[0]=='P'){ //POST // advance by "POST /search HTTP/1.1 " = 22 chars urlStart+=22; sprintf(tmp,"http://www.gigablast.com/search?%s",urlStart); } else if (urlStart[0]=='G'){ //GET // advance by "GET "= 4 chars urlStart+=4; char *end=strstr(urlStart," HTTP/1."); if (end) end[0]='\0'; sprintf(tmp,"http://www.gigablast.com%s",urlStart); } // log(LOG_WARN,"blaster: URL=%s",tmp); StateBD *st; try { st = new (StateBD); } catch ( ... ) { g_errno = ENOMEM; log("blaster: Failed. " "Could not allocate %li bytes for query. " "Returning HTTP status of 500.", (long)sizeof(StateBD)); return; } mnew ( st , sizeof(StateBD) , "BlasterDiff3" ); //st->m_u1.set(tmp,gbstrlen(tmp)); st->m_buf1=NULL; // get it bool status = g_httpServer.getDoc ( tmp, // &(st->m_u1) , // url 0 , // ip (none) 0 , // offset -1 , // size 0 , // ifModifiedSince st, // state gotDocWrapper1, // callback 20*1000, // timeout 0, // proxy ip 0, // proxy port 30*1024*1024, //maxLen 30*1024*1024);//maxOtherLen // continue if it blocked if ( status ) // else there was error log("blaster: got doc %s: %s", urlStart,mstrerror(g_errno) ); return; }
MAT MAT::tpose() { MAT mnew(n); for(int i=0;i<n;i++){ for(int j=0;j<n;j++){ mnew[i][j]=(*va[j])[i]; } } return mnew; }
unsigned int * mroot(unsigned int *n, unsigned int index) { int i, j, k; unsigned int m, *x, *y; if (index == 0) stop("root index is zero"); // count number of bits k = 32 * (MLENGTH(n) - 1); m = n[MLENGTH(n) - 1]; while (m) { m >>= 1; k++; } if (k == 0) return mint(0); // initial guess k = (k - 1) / index; j = k / 32 + 1; x = mnew(j); MSIGN(x) = 1; MLENGTH(x) = j; for (i = 0; i < j; i++) x[i] = 0; while (k >= 0) { mp_set_bit(x, k); y = mpow(x, index); switch (mcmp(y, n)) { case -1: break; case 0: mfree(y); return x; case 1: mp_clr_bit(x, k); break; } mfree(y); k--; } mfree(x); return 0; }
// . returns false if blocked, true otherwise // . sets g_errno on error bool sendPageInject ( TcpSocket *s , HttpRequest *r ) { // get the collection long collLen = 0; char *coll = r->getString ( "c" , &collLen , NULL /*default*/); // get collection rec CollectionRec *cr = g_collectiondb.getRec ( coll ); // bitch if no collection rec found if ( ! cr ) { g_errno = ENOCOLLREC; log("build: Injection from %s failed. " "Collection \"%s\" does not exist.", iptoa(s->m_ip),coll); return g_httpServer.sendErrorReply(s,500, "collection does not exist"); } // make a new state Msg7 *msg7; try { msg7= new (Msg7); } catch ( ... ) { g_errno = ENOMEM; log("PageInject: new(%i): %s", sizeof(Msg7),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( msg7, sizeof(Msg7) , "PageInject" ); msg7->m_socket = s; msg7->m_isScrape = false; // a scrape request? char *qts = r->getString("qts",NULL); if ( qts && ! qts[0] ) qts = NULL; if ( qts ) { // qts is html encoded? NO! fix that below then... //char *uf="http://www.google.com/search?num=50&" // "q=%s&scoring=d&filter=0"; strncpy(msg7->m_coll,coll,MAX_COLL_LEN); msg7->m_isScrape = true; msg7->m_qbuf.safeStrcpy(qts); msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab"); msg7->m_useAhrefs = r->getLong("useahrefs",0); // default to yes, injectlinks.. no default to no msg7->m_injectLinks = r->getLong("injectlinks",0); if ( ! msg7->scrapeQuery ( ) ) return false; return sendReply ( msg7 ); } if ( ! msg7->inject ( s , r , msg7 , sendReplyWrapper ) ) return false; // it did not block, i gues we are done return sendReply ( msg7 ); }
unsigned int * mint(int n) { unsigned int *p = mnew(1); if (n < 0) MSIGN(p) = -1; else MSIGN(p) = 1; MLENGTH(p) = 1; p[0] = abs(n); return p; }
/******************************************************************* Part of Sub-level Kurtosis Calculate: sum(Zjk*ones(1,p).*(data_proj))./sum(Zjk) *******************************************************************/ void kurtmodel(matrix *mZjk, double sumZjk, matrix *data, vector *meanZjk) { int i; matrix Mt; mnew(&Mt, data->m, data->n); mmDotMul(mZjk, data, &Mt); msum(&Mt, 'c', meanZjk); for (i=0; i<(meanZjk->l); i++) { *(meanZjk->pr + i) /= sumZjk; }; mdelete(&Mt); }
static unsigned int * addf(unsigned int *a, unsigned int *b) { int i, sign; unsigned int c, *x; sign = MSIGN(a); if (MLENGTH(a) < MLENGTH(b)) { x = a; a = b; b = x; } x = mnew(MLENGTH(a) + 1); c = 0; for (i = 0; i < MLENGTH(b); i++) { x[i] = a[i] + b[i] + c; if (c) if (a[i] >= x[i]) c = 1; else c = 0; else if (a[i] > x[i]) c = 1; else c = 0; } for (i = MLENGTH(b); i < MLENGTH(a); i++) { x[i] = a[i] + c; if (a[i] > x[i]) c = 1; else c = 0; } x[MLENGTH(a)] = c; for (i = MLENGTH(a); i > 0; i--) if (x[i]) break; MLENGTH(x) = i + 1; MSIGN(x) = sign; return x; }
static void test(struct map *m , int n, int start) { init(n,start); shuffle(n); int i; for (i=0;i<n;i++) { mnew(m,INDEX[i]); } shuffle(n); n = rand() % (n/2); for (i=0;i<n;i++) { mdelete(m,INDEX[i]); } }
// . returns false if blocked, true otherwise // . sets g_errno on error // . make a web page displaying the titleRec of "docId" given via cgi // . call g_httpServer.sendDynamicPage() to send it bool sendPageTitledb ( TcpSocket *s , HttpRequest *r ) { // get the docId from the cgi vars long long docId = r->getLongLong ("d", 0LL ); // set up a msg22 to get the next titleRec State4 *st ; try { st = new (State4); } catch ( ... ) { g_errno = ENOMEM; log("PageTitledb: new(%i): %s", (int)sizeof(State4),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( st , sizeof(State4) , "PageTitledb"); // save the socket st->m_socket = s; // copy it st->m_r.copy ( r ); // remember if http request is internal/local or not st->m_isRootAdmin = g_conf.isCollAdmin ( s , r ); st->m_isLocal = r->isLocal(); st->m_docId = docId; // password, too st->m_pwd = r->getString ( "pwd" ); // get the collection long collLen = 0; char *coll = st->m_r.getString("c",&collLen); if ( ! coll || ! coll[0] ) { //coll = g_conf.m_defaultColl; coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() ); collLen = gbstrlen(coll); } st->m_coll = coll; st->m_collLen = collLen; // just print page if no docid provided if ( ! docId ) return gotTitleRec ( st ); // get the handy XmlDoc XmlDoc *xd = &st->m_xd; // use 0 for niceness xd->set3 ( docId , coll , 0 ); // callback xd->setCallback ( st , gotTitleRec ); // . and tell it to load from old title rec // . this sets all the member vars from it and also sets // m_titleRecBuf to contain the actual compressed title rec if ( ! xd->loadFromOldTitleRec ( ) ) return false; // we got it without blocking. cached? return gotTitleRec ( st ); }
void ckurtmodel(matrix *mZjk, double sumZjk, matrix *data_re, matrix *data_im, vector *meanZjk_re, vector *meanZjk_im) { int i; matrix Mt_re; matrix Mt_im; matrix mZjk_im; mnew(&Mt_re, data_re->m, data_re->n); mnew(&Mt_im, data_im->m, data_im->n); mnew(&mZjk_im, mZjk->m, mZjk->n); cmmDotMul(mZjk, &mZjk_im, data_re, data_im, &Mt_re, &Mt_im); msum(&Mt_re, 'c', meanZjk_re); msum(&Mt_im, 'c', meanZjk_im); for (i=0; i<(meanZjk_re->l); i++) { *(meanZjk_re->pr + i) /= sumZjk; *(meanZjk_im->pr + i) /= sumZjk; }; mdelete(&Mt_re); mdelete(&Mt_im); mdelete(&mZjk_im); }
unsigned int * mcopy(unsigned int *a) { int i; unsigned int *b; b = mnew(MLENGTH(a)); MSIGN(b) = MSIGN(a); MLENGTH(b) = MLENGTH(a); for (i = 0; i < MLENGTH(a); i++) b[i] = a[i]; return b; }
// . handle a request to get a the search results, list of docids only // . returns false if slot should be nuked and no reply sent // . sometimes sets g_errno on error void handleRequest39 ( UdpSlot *slot , long netnice ) { // use Msg39 to get the lists and intersect them Msg39 *THIS ; try { THIS = new ( Msg39 ); } catch ( ... ) { g_errno = ENOMEM; log("msg39: new(%i): %s", sizeof(Msg39),mstrerror(g_errno)); sendReply ( slot , NULL , NULL , 0 , 0 ,true); return; } mnew ( THIS , sizeof(Msg39) , "Msg39" ); // clear it g_errno = 0; // . get the resulting docIds, usually blocks // . sets g_errno on error THIS->getDocIds ( slot ) ; }
void store_mf(int fd, struct memfile *mf) { int len, left, ret; len = left = mf->pos; while (left) { ret = write(fd, &mf->buf[len - left], left); if (ret == -1) /* error */ goto out; left -= ret; } out: mfree(mf); mnew(mf, NULL); }
unsigned int * msqrt(unsigned int *n) { int i, k, kk; unsigned int m, *x, *y; if (MLENGTH(n) == 1 && n[0] == 0) { x = mint(0); return x; } // count number of bits k = 32 * (MLENGTH(n) - 1); m = n[MLENGTH(n) - 1]; while (m) { m >>= 1; k++; } k = (k - 1) / 2; // initial guess kk = k / 32 + 1; x = mnew(kk); MSIGN(x) = 1; MLENGTH(x) = kk; for (i = 0; i < kk; i++) x[i] = 0; mp_set_bit(x, k); while (--k >= 0) { mp_set_bit(x, k); y = mmul(x, x); if (mcmp(y, n) == 1) mp_clr_bit(x, k); mfree(y); } return x; }
TEST_F(XmlDocTest, PosdbGetMetaListChangedDoc) { const char *url = "http://www.example.test/index.html"; char contentOld[] = "<html><head><title>my title</title></head><body>old document</body></html>"; char contentNew[] = "<html><head><title>my title</title></head><body>new document</body></html>"; XmlDoc *xmlDocOld = new XmlDoc(); mnew(xmlDocOld, sizeof(*xmlDocOld), "XmlDoc"); initializeDocForPosdb(xmlDocOld, url, contentOld); XmlDoc xmlDocNew; initializeDocForPosdb(&xmlDocNew, url, contentNew); xmlDocNew.m_oldDocValid = true; xmlDocNew.m_oldDoc = xmlDocOld; xmlDocNew.getMetaList(false); auto metaListKeys = parseMetaList(xmlDocNew.m_metaList, xmlDocNew.m_metaListSize); // make sure no special key is inserted (positive or negative) EXPECT_FALSE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, POSDB_DELETEDOC_TERMID, false)); EXPECT_FALSE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, POSDB_DELETEDOC_TERMID, true)); // make sure title & body text is indexed (with difference between old & new document deleted) // title EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("title", "my"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("title", "title"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("title", "mytitle"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("my"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("title"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("mytitle"), false)); // body EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("new"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("document"), false)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("newdocument"), false)); // deleted terms EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("old"), true)); EXPECT_TRUE(posdbFindRecord(metaListKeys, xmlDocNew.m_docId, hashWord("olddocument"), true)); /// @todo ALC add other terms }
// . a new interface so Msg3b can call this with "s" set to NULL // . returns false if blocked, true otherwise // . sets g_errno on error bool sendPageParser2 ( TcpSocket *s , HttpRequest *r , State8 *st , long long docId , Query *q , // in query term space, not imap space long long *termFreqs , // in imap space float *termFreqWeights , // in imap space float *affWeights , void *state , void (* callback)(void *state) ) { //log("parser: read sock=%li",s->m_sd); // might a simple request to addsomething to validated.*.txt file // from XmlDoc::print() or XmlDoc::validateOutput() char *add = r->getString("add",NULL); //long long uh64 = r->getLongLong("uh64",0LL); char *uh64str = r->getString("uh64",NULL); //char *divTag = r->getString("div",NULL); if ( uh64str ) { // convert add to number long addNum = 0; if ( to_lower_a(add[0])=='t' ) // "true" or "false"? addNum = 1; // convert it. skip beginning "str" inserted to prevent // javascript from messing with the long long since it // was rounding it! //long long uh64 = atoll(uh64str);//+3); // urldecode that //long divTagLen = gbstrlen(divTag); //long newLen = urlDecode ( divTag , divTag , divTagLen ); // null term? //divTag[newLen] = '\0'; // do it. this is defined in XmlDoc.cpp //addCheckboxSpan ( uh64 , divTag , addNum ); // make basic reply char *reply; reply = "HTTP/1.0 200 OK\r\n" "Connection: Close\r\n"; // that is it! send a basic reply ok bool status = g_httpServer.sendDynamicPage( s , reply, gbstrlen(reply), -1, //cachtime false ,//postreply? NULL, //ctype -1 , //httpstatus NULL,//cookie "utf-8"); return status; } // make a state if ( st ) st->m_freeIt = false; if ( ! st ) { try { st = new (State8); } catch ( ... ) { g_errno = ENOMEM; log("PageParser: new(%i): %s", (int)sizeof(State8),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500, mstrerror(g_errno));} mnew ( st , sizeof(State8) , "PageParser" ); st->m_freeIt = true; } // msg3b uses this to get a score from the query st->m_state = state; st->m_callback = callback; st->m_q = q; st->m_termFreqs = termFreqs; st->m_termFreqWeights = termFreqWeights; st->m_affWeights = affWeights; //st->m_total = (score_t)-1; st->m_indexCode = 0; st->m_blocked = false; st->m_didRootDom = false; st->m_didRootWWW = false; st->m_wasRootDom = false; st->m_u = NULL; st->m_recompute = false; //st->m_url.reset(); // do not allow more than one to be launched at a time if in // a quickpoll. will cause quickpoll in quickpoll. g_inPageParser = true; // password, too long pwdLen = 0; char *pwd = r->getString ( "pwd" , &pwdLen ); if ( pwdLen > 31 ) pwdLen = 31; if ( pwdLen > 0 ) strncpy ( st->m_pwd , pwd , pwdLen ); st->m_pwd[pwdLen]='\0'; // save socket ptr st->m_s = s; st->m_r.copy ( r ); // get the collection char *coll = r->getString ( "c" , &st->m_collLen ,NULL /*default*/); if ( st->m_collLen > MAX_COLL_LEN ) return sendErrorReply ( st , ENOBUFS ); if ( ! coll ) return sendErrorReply ( st , ENOCOLLREC ); strcpy ( st->m_coll , coll ); // version to use, if -1 use latest st->m_titleRecVersion = r->getLong("version",-1); if ( st->m_titleRecVersion == -1 ) st->m_titleRecVersion = TITLEREC_CURRENT_VERSION; // default to 0 if not provided st->m_hopCount = r->getLong("hc",0); //long ulen = 0; //char *u = r->getString ( "u" , &ulen , NULL /*default*/); long old = r->getLong ( "old", 0 ); // set query long qlen; char *qs = r->getString("q",&qlen,NULL); if ( qs ) st->m_tq.set2 ( qs , langUnknown , true ); // url will override docid if given if ( ! st->m_u || ! st->m_u[0] ) st->m_docId = r->getLongLong ("docid",-1); else st->m_docId = -1; // set url in state class (may have length 0) //if ( u ) st->m_url.set ( u , ulen ); //st->m_urlLen = ulen; st->m_u = st->m_r.getString("u",&st->m_ulen,NULL); // should we recycle link info? st->m_recycle = r->getLong("recycle",0); st->m_recycle2 = r->getLong("recycleimp",0); st->m_render = r->getLong("render" ,0); // for quality computation... takes way longer cuz we have to // lookup the IP address of every outlink, so we can get its root // quality using Msg25 which needs to filter out voters from that IP // range. st->m_oips = r->getLong("oips" ,0); long linkInfoLen = 0; // default is NULL char *linkInfoColl = r->getString ( "oli" , &linkInfoLen, NULL ); if ( linkInfoColl ) strcpy ( st->m_linkInfoColl , linkInfoColl ); else st->m_linkInfoColl[0] = '\0'; // set the flag in our SafeBuf class so that Words.cpp knows to show // html or html source depending on this value st->m_xbuf.m_renderHtml = st->m_render; // should we use the old title rec? st->m_old = old; // are we coming from a local machine? st->m_isLocal = r->isLocal(); //no more setting the default root quality to 30, instead if we do not // know it setting it to -1 st->m_rootQuality=-1; // header SafeBuf *xbuf = &st->m_xbuf; xbuf->safePrintf("<meta http-equiv=\"Content-Type\" " "content=\"text/html; charset=utf-8\">\n"); // print standard header g_pages.printAdminTop ( xbuf , st->m_s , &st->m_r ); // print the standard header for admin pages char *dd = ""; char *rr = ""; char *rr2 = ""; char *render = ""; char *oips = ""; char *us = ""; if ( st->m_u && st->m_u[0] ) us = st->m_u; //if ( st->m_sfn != -1 ) sprintf ( rtu , "%li",st->m_sfn ); if ( st->m_old ) dd = " checked"; if ( st->m_recycle ) rr = " checked"; if ( st->m_recycle2 ) rr2 = " checked"; if ( st->m_render ) render = " checked"; if ( st->m_oips ) oips = " checked"; xbuf->safePrintf( "<style>" ".poo { background-color:#%s;}\n" "</style>\n" , LIGHT_BLUE ); long clen; char *contentParm = r->getString("content",&clen,""); // print the input form xbuf->safePrintf ( "<style>\n" "h2{font-size: 12px; color: #666666;}\n" ".gbtag { border: 1px solid gray;" "background: #ffffef;display:inline;}\n" ".gbcomment { border: 1px solid gray;" "color: #888888; font-style:italic; " "background: #ffffef;display:inline;}\n" ".token { border: 1px solid gray;" "background: #f0ffff;display:inline;}\n" ".spam { border: 1px solid gray;" "background: #af0000;" "color: #ffffa0;}" ".hs {color: #009900;}" "</style>\n" "<center>" "<table %s>" "<tr><td colspan=5><center><b>" "Parser" "</b></center></td></tr>\n" "<tr class=poo>" "<td>" "<b>url</b>" "<br><font size=-2>" "Type in <b>FULL</b> url to parse." "</font>" "</td>" "</td>" "<td>" "<input type=text name=u value=\"%s\" size=\"40\">\n" "</td>" "</tr>" /* "<tr class=poo>" "<td>" "Parser version to use: " "</td>" "<td>" "<input type=text name=\"version\" size=\"4\" value=\"-1\"> " "</td>" "<td>" "(-1 means to use latest title rec version)<br>" "</td>" "</tr>" */ /* "<tr class=poo>" "<td>" "Hop count to use: " "</td>" "<td>" "<input type=text name=\"hc\" size=\"4\" value=\"%li\"> " "</td>" "<td>" "(-1 is unknown. For root urls hopcount is always 0)<br>" "</td>" "</tr>" */ "<tr class=poo>" "<td>" "<b>use cached</b>" "<br><font size=-2>" "Load page from cache (titledb)?" "</font>" "</td>" "<td>" "<input type=checkbox name=old value=1%s> " "</td>" "</tr>" /* "<tr class=poo>" "<td>" "Reparse root:" "</td>" "<td>" "<input type=checkbox name=artr value=1%s> " "</td>" "<td>" "Apply selected ruleset to root to update quality" "</td>" "</tr>" */ "<tr class=poo>" "<td>" "<b>recycle link info</b>" "<br><font size=-2>" "Recycle the link info from the title rec" "Load page from cache (titledb)?" "</font>" "</td>" "<td>" "<input type=checkbox name=recycle value=1%s> " "</td>" "</tr>" /* "<tr class=poo>" "<td>" "Recycle Link Info Imported:" "</td>" "<td>" "<input type=checkbox name=recycleimp value=1%s> " "</td>" "<td>" "Recycle the link info imported from other coll" "</td>" "</tr>" */ "<tr class=poo>" "<td>" "<b>render html</b>" "<br><font size=-2>" "Render document content as HTML" "</font>" "</td>" "<td>" "<input type=checkbox name=render value=1%s> " "</td>" "</tr>" /* "<tr class=poo>" "<td>" "Lookup outlinks' ruleset, ips, quality:" "</td>" "<td>" "<input type=checkbox name=oips value=1%s> " "</td>" "<td>" "To compute quality lookup IP addresses of roots " "of outlinks." "</td>" "</tr>" "<tr class=poo>" "<td>" "LinkInfo Coll:" "</td>" "<td>" "<input type=text name=\"oli\" size=\"10\" value=\"\"> " "</td>" "<td>" "Leave empty usually. Uses this coll to lookup link info." "</td>" "</tr>" */ "<tr class=poo>" "<td>" "<b>optional query</b>" "<br><font size=-2>" "Leave empty usually. For title generation only." "</font>" "</td>" "<td>" "<input type=text name=\"q\" size=\"20\" value=\"\"> " "</td>" "</tr>", TABLE_STYLE, us , dd, rr, render ); xbuf->safePrintf( "<tr class=poo>" "<td>" "<b>content type below is</b>" "<br><font size=-2>" "Is the content below HTML? XML? JSON?" "</font>" "</td>" "<td>" //"<input type=checkbox name=xml value=1> " "<select name=ctype>\n" "<option value=%li selected>HTML</option>\n" "<option value=%li selected>XML</option>\n" "<option value=%li selected>JSON</option>\n" "</select>\n" "</td>" "</tr>", (long)CT_HTML, (long)CT_XML, (long)CT_JSON ); xbuf->safePrintf( "<tr class=poo>" "<td><b>content</b>" "<br><font size=-2>" "Use this content for the provided <i>url</i> " "rather than downloading it from the web." "</td>" "<td>" "<textarea rows=10 cols=80 name=content>" "%s" "</textarea>" "</td>" "</tr>" "</table>" "</center>" "</form>" "<br>", //oips , contentParm ); xbuf->safePrintf( "<center>" "<input type=submit value=Submit>" "</center>" ); // just print the page if no url given if ( ! st->m_u || ! st->m_u[0] ) return processLoop ( st ); XmlDoc *xd = &st->m_xd; // set this up SpiderRequest sreq; sreq.reset(); strcpy(sreq.m_url,st->m_u); long firstIp = hash32n(st->m_u); if ( firstIp == -1 || firstIp == 0 ) firstIp = 1; // parentdocid of 0 sreq.setKey( firstIp, 0LL, false ); sreq.m_isPageParser = 1; sreq.m_hopCount = st->m_hopCount; sreq.m_hopCountValid = 1; sreq.m_fakeFirstIp = 1; sreq.m_firstIp = firstIp; Url nu; nu.set(sreq.m_url); sreq.m_domHash32 = nu.getDomainHash32(); sreq.m_siteHash32 = nu.getHostHash32(); // . get provided content if any // . will be NULL if none provided // . "content" may contain a MIME long contentLen = 0; char *content = r->getString ( "content" , &contentLen , NULL ); // is the "content" url-encoded? default is true. bool contentIsEncoded = true; // mark doesn't like to url-encode his content if ( ! content ) { content = r->getUnencodedContent (); contentLen = r->getUnencodedContentLen (); contentIsEncoded = false; } // ensure null if ( contentLen == 0 ) content = NULL; uint8_t contentType = CT_HTML; if ( r->getBool("xml",0) ) contentType = CT_XML; contentType = r->getLong("ctype",contentType);//CT_HTML); // if facebook, load xml content from title rec... bool isFacebook = (bool)strstr(st->m_u,"http://www.facebook.com/"); if ( isFacebook && ! content ) { long long docId = g_titledb.getProbableDocId(st->m_u); sprintf(sreq.m_url ,"%llu", docId ); sreq.m_isPageReindex = true; } // hack if ( content ) { st->m_dbuf.purge(); st->m_dbuf.safeStrcpy(content); //char *data = strstr(content,"\r\n\r\n"); //long dataPos = 0; //if ( data ) dataPos = (data + 4) - content; //st->m_dbuf.convertJSONtoXML(0,dataPos); //st->m_dbuf.decodeJSON(0); content = st->m_dbuf.getBufStart(); } // . use the enormous power of our new XmlDoc class // . this returns false if blocked if ( ! xd->set4 ( &sreq , NULL , st->m_coll , &st->m_wbuf , 0 ,//PP_NICENESS )) content , false, // deletefromindex 0, // forced ip contentType )) // return error reply if g_errno is set return sendErrorReply ( st , g_errno ); // make this our callback in case something blocks xd->setCallback ( st , processLoop ); // . set xd from the old title rec if recycle is true // . can also use XmlDoc::m_loadFromOldTitleRec flag if ( st->m_recycle ) xd->m_recycleContent = true; return processLoop ( st ); }
// for procog bool sendPageAnalyze ( TcpSocket *s , HttpRequest *r ) { // make a state State8 *st; try { st = new (State8); } catch ( ... ) { g_errno = ENOMEM; log("PageParser: new(%i): %s", (int)sizeof(State8),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500, mstrerror(g_errno));} mnew ( st , sizeof(State8) , "PageParser" ); st->m_freeIt = true; st->m_state = NULL; //st->m_callback = callback; //st->m_q = q; //st->m_termFreqs = termFreqs; //st->m_termFreqWeights = termFreqWeights; //st->m_affWeights = affWeights; //st->m_total = (score_t)-1; st->m_indexCode = 0; st->m_blocked = false; st->m_didRootDom = false; st->m_didRootWWW = false; st->m_wasRootDom = false; st->m_u = NULL; // password, too long pwdLen = 0; char *pwd = r->getString ( "pwd" , &pwdLen ); if ( pwdLen > 31 ) pwdLen = 31; if ( pwdLen > 0 ) strncpy ( st->m_pwd , pwd , pwdLen ); st->m_pwd[pwdLen]='\0'; // save socket ptr st->m_s = s; st->m_r.copy ( r ); // get the collection char *coll = r->getString ( "c" , &st->m_collLen ,NULL /*default*/); if ( ! coll ) coll = g_conf.m_defaultColl; if ( ! coll ) coll = "main"; long collLen = gbstrlen(coll); if ( collLen > MAX_COLL_LEN ) return sendErrorReply ( st , ENOBUFS ); strcpy ( st->m_coll , coll ); // version to use, if -1 use latest st->m_titleRecVersion = r->getLong("version",-1); if ( st->m_titleRecVersion == -1 ) st->m_titleRecVersion = TITLEREC_CURRENT_VERSION; // default to 0 if not provided st->m_hopCount = r->getLong("hc",0); long old = r->getLong ( "old", 0 ); // set query long qlen; char *qs = r->getString("q",&qlen,NULL); if ( qs ) st->m_tq.set2 ( qs , langUnknown , true ); // url will override docid if given st->m_docId = r->getLongLong ("d",-1); st->m_docId = r->getLongLong ("docid",st->m_docId); long ulen; char *u = st->m_r.getString("u",&ulen,NULL); if ( ! u ) u = st->m_r.getString("url",&ulen,NULL); if ( ! u && st->m_docId == -1LL ) return sendErrorReply ( st , EBADREQUEST ); // set url in state class (may have length 0) //if ( u ) st->m_url.set ( u , ulen ); //st->m_urlLen = ulen; st->m_u = u; st->m_ulen = 0; if ( u ) st->m_ulen = gbstrlen(u); // should we recycle link info? st->m_recycle = r->getLong("recycle",1); st->m_recycle2 = r->getLong("recycleimp",0); st->m_render = r->getLong("render" ,0); st->m_recompute = r->getLong("recompute" ,0); // for quality computation... takes way longer cuz we have to // lookup the IP address of every outlink, so we can get its root // quality using Msg25 which needs to filter out voters from that IP // range. st->m_oips = r->getLong("oips" ,0); //st->m_page = r->getLong("page",1); long linkInfoLen = 0; // default is NULL char *linkInfoColl = r->getString ( "oli" , &linkInfoLen, NULL ); if ( linkInfoColl ) strcpy ( st->m_linkInfoColl , linkInfoColl ); else st->m_linkInfoColl[0] = '\0'; // set the flag in our SafeBuf class so that Words.cpp knows to show // html or html source depending on this value //st->m_xbuf.m_renderHtml = st->m_render; // should we use the old title rec? st->m_old = old; // are we coming from a local machine? st->m_isLocal = r->isLocal(); //no more setting the default root quality to 30, instead if we do not // know it setting it to -1 st->m_rootQuality=-1; // header //xbuf->safePrintf("<meta http-equiv=\"Content-Type\" " // "content=\"text/html; charset=utf-8\">\n"); XmlDoc *xd = &st->m_xd; long isXml = r->getLong("xml",0); // if got docid, use that if ( st->m_docId != -1 ) { if ( ! xd->set3 ( st->m_docId, st->m_coll, 0 ) ) // niceness // return error reply if g_errno is set return sendErrorReply ( st , g_errno ); // make this our callback in case something blocks xd->setCallback ( st , gotXmlDoc ); xd->m_pbuf = &st->m_wbuf; // reset this flag st->m_donePrinting = false; // . set xd from the old title rec if recycle is true // . can also use XmlDoc::m_loadFromOldTitleRec flag //if ( st->m_recycle ) xd->m_recycleContent = true; xd->m_recycleContent = true; // force this on //xd->m_useSiteLinkBuf = true; //xd->m_usePageLinkBuf = true; if ( isXml ) xd->m_printInXml = true; // now tell it to fetch the old title rec if ( ! xd->loadFromOldTitleRec () ) // return false if this blocks return false; return gotXmlDoc ( st ); } // set this up SpiderRequest sreq; sreq.reset(); if ( st->m_u ) strcpy(sreq.m_url,st->m_u); long firstIp = hash32n(st->m_u); if ( firstIp == -1 || firstIp == 0 ) firstIp = 1; // parentdocid of 0 sreq.setKey( firstIp, 0LL, false ); sreq.m_isPageParser = 1; sreq.m_hopCount = st->m_hopCount; sreq.m_hopCountValid = 1; sreq.m_fakeFirstIp = 1; sreq.m_firstIp = firstIp; Url nu; nu.set(sreq.m_url); sreq.m_domHash32 = nu.getDomainHash32(); sreq.m_siteHash32 = nu.getHostHash32(); // . get provided content if any // . will be NULL if none provided // . "content" may contain a MIME long contentLen = 0; char *content = r->getString ( "content" , &contentLen , NULL ); // is the "content" url-encoded? default is true. bool contentIsEncoded = true; // mark doesn't like to url-encode his content if ( ! content ) { content = r->getUnencodedContent (); contentLen = r->getUnencodedContentLen (); contentIsEncoded = false; } // ensure null if ( contentLen == 0 ) content = NULL; //uint8_t contentType = CT_HTML; //if ( isXml ) contentType = CT_XML; long ctype = r->getLong("ctype",CT_HTML); // . use the enormous power of our new XmlDoc class // . this returns false if blocked if ( ! xd->set4 ( &sreq , NULL , st->m_coll , // we need this so the term table is set! &st->m_wbuf , // XmlDoc::m_pbuf 0, // try 0 now! 1 ,//PP_NICENESS )) content , false, // deletefromindex 0, // forced ip ctype )) // return error reply if g_errno is set return sendErrorReply ( st , g_errno ); // make this our callback in case something blocks xd->setCallback ( st , gotXmlDoc ); // reset this flag st->m_donePrinting = false; // prevent a core here in the event we download the page content xd->m_crawlDelayValid = true; xd->m_crawlDelay = 0; // . set xd from the old title rec if recycle is true // . can also use XmlDoc::m_loadFromOldTitleRec flag //if ( st->m_recycle ) xd->m_recycleContent = true; // only recycle if docid is given!! if ( st->m_recycle ) xd->m_recycleContent = true; // force this on //xd->m_useSiteLinkBuf = true; //xd->m_usePageLinkBuf = true; if ( isXml ) xd->m_printInXml = true; return gotXmlDoc ( st ); }
// . returns false if blocked, true otherwise // . sets g_errno on error bool sendPageGet ( TcpSocket *s , HttpRequest *r ) { // get the collection long collLen = 0; char *coll = r->getString("c",&collLen); if ( ! coll || ! coll[0] ) { //coll = g_conf.m_defaultColl; coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() ); collLen = gbstrlen(coll); } // ensure collection not too big if ( collLen >= MAX_COLL_LEN ) { g_errno = ECOLLTOOBIG; return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // get the collection rec CollectionRec *cr = g_collectiondb.getRec ( coll ); if ( ! cr ) { g_errno = ENOCOLLREC; log("query: Archived copy retrieval failed. " "No collection record found for " "collection \"%s\".",coll); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // does this collection ban this IP? if ( ! cr->hasSearchPermission ( s ) ) { g_errno = ENOPERM; //log("PageGet::sendDynamicReply0: permission denied for %s", // iptoa(s->m_ip) ); g_msg = " (error: permission denied)"; return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // . get fields from cgi field of the requested url // . get the search query long qlen = 0; char *q = r->getString ( "q" , &qlen , NULL /*default*/); // ensure query not too big if ( qlen >= MAX_QUERY_LEN-1 ) { g_errno=EQUERYTOOBIG; return g_httpServer.sendErrorReply (s,500 ,mstrerror(g_errno)); } // the docId long long docId = r->getLongLong ( "d" , 0LL /*default*/ ); // get url char *url = r->getString ( "u",NULL); if ( docId == 0 && ! url ) { g_errno = EMISSINGINPUT; return g_httpServer.sendErrorReply (s,500 ,mstrerror(g_errno)); } // . should we do a sequential lookup? // . we need to match summary here so we need to know this //bool seq = r->getLong ( "seq" , false ); // restrict to root file? bool rtq = r->getLong ( "rtq" , false ); // . get the titleRec // . TODO: redirect client to a better http server to save bandwidth State2 *st ; try { st = new (State2); } catch (... ) { g_errno = ENOMEM; log("PageGet: new(%i): %s", (int)sizeof(State2),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( st , sizeof(State2) , "PageGet1" ); // save the socket and if Host: is local in the Http request Mime st->m_socket = s; st->m_isAdmin = g_conf.isCollAdmin ( s , r ); st->m_isLocal = r->isLocal(); st->m_docId = docId; st->m_printed = false; // include header ... "this page cached by Gigablast on..." st->m_includeHeader = r->getLong ("ih" , true ); st->m_includeBaseHref = r->getLong ("ibh" , false ); st->m_queryHighlighting = r->getLong ("qh" , true ); st->m_strip = r->getLong ("strip" , 0 ); st->m_clickAndScroll = r->getLong ("cas" , true ); st->m_cnsPage = r->getLong ("cnsp" , true ); char *langAbbr = r->getString("qlang",NULL); st->m_langId = langUnknown; if ( langAbbr ) { uint8_t langId = getLangIdFromAbbr ( langAbbr ); st->m_langId = langId; } strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 ); // store query for query highlighting st->m_netTestResults = r->getLong ("rnettest", false ); //if( st->m_netTestResults ) { // mdelete ( st , sizeof(State2) , "PageGet1" ); // delete ( st ); // return sendPageNetResult( s ); //} if ( q && qlen > 0 ) strcpy ( st->m_q , q ); else st->m_q[0] = '\0'; st->m_qlen = qlen; //st->m_seq = seq; st->m_rtq = rtq; st->m_boolFlag = r->getLong ("bq", 2 /*default is 2*/ ); st->m_isBanned = false; st->m_noArchive = false; st->m_socket = s; st->m_format = r->getReplyFormat(); // default to 0 niceness st->m_niceness = 0; st->m_r.copy ( r ); //st->m_cr = cr; st->m_printDisclaimer = true; if ( st->m_cnsPage ) st->m_printDisclaimer = false; if ( st->m_strip ) // ! st->m_evbits.isEmpty() ) st->m_printDisclaimer = false; // should we cache it? char useCache = r->getLong ( "usecache" , 1 ); char rcache = r->getLong ( "rcache" , 1 ); char wcache = r->getLong ( "wcache" , 1 ); long cacheAge = r->getLong ( "cacheAge" , 60*60 ); // default one hour if ( useCache == 0 ) { cacheAge = 0; wcache = 0; } if ( rcache == 0 ) cacheAge = 0; // . fetch the TitleRec // . a max cache age of 0 means not to read from the cache XmlDoc *xd = &st->m_xd; // url based? if ( url ) { SpiderRequest sreq; sreq.reset(); strcpy(sreq.m_url, url ); sreq.setDataSize(); // this returns false if "coll" is invalid if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) ) goto hadSetError; } // . when getTitleRec() is called it will load the old one // since XmlDoc::m_setFromTitleRec will be true // . niceness is 0 // . use st->m_coll since XmlDoc just points to it! // . this returns false if "coll" is invalid else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) { hadSetError: mdelete ( st , sizeof(State2) , "PageGet1" ); delete ( st ); g_errno = ENOMEM; log("PageGet: set3: %s", mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } // if it blocks while it loads title rec, it will re-call this routine xd->setCallback ( st , processLoopWrapper ); // good to go! return processLoop ( st ); }
unsigned int * mgcd(unsigned int *u, unsigned int *v) { int i, k, n; unsigned int *t; if (MZERO(u)) { t = mcopy(v); MSIGN(t) = 1; return t; } if (MZERO(v)) { t = mcopy(u); MSIGN(t) = 1; return t; } u = mcopy(u); v = mcopy(v); MSIGN(u) = 1; MSIGN(v) = 1; k = 0; while ((u[0] & 1) == 0 && (v[0] & 1) == 0) { mshiftright(u); mshiftright(v); k++; } if (u[0] & 1) { t = mcopy(v); MSIGN(t) *= -1; } else t = mcopy(u); while (1) { while ((t[0] & 1) == 0) mshiftright(t); if (MSIGN(t) == 1) { mfree(u); u = mcopy(t); } else { mfree(v); v = mcopy(t); MSIGN(v) *= -1; } mfree(t); t = msub(u, v); if (MZERO(t)) { mfree(t); mfree(v); n = (k / 32) + 1; v = mnew(n); MSIGN(v) = 1; MLENGTH(v) = n; for (i = 0; i < n; i++) v[i] = 0; mp_set_bit(v, k); t = mmul(u, v); mfree(u); mfree(v); return t; } } }
// . returns false if blocked, true otherwise // . sets g_errno on error // . query re-index interface // . call g_httpServer.sendDynamicPage() to send it bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) { // make a state State13 *st ; try { st = new (State13); } catch ( ... ) { g_errno = ENOMEM; log("PageTagdb: new(%i): %s", (int)sizeof(State13),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( st , sizeof(State13) , "PageReindex" ); // set this. also sets gr->m_hr GigablastRequest *gr = &st->m_gr; // this will fill in GigablastRequest so all the parms we need are set g_parms.setGigablastRequest ( s , r , gr ); TcpSocket *sock = gr->m_socket; // get collection rec CollectionRec *cr = g_collectiondb.getRec ( gr->m_coll ); // bitch if no collection rec found if ( ! cr ) { g_errno = ENOCOLLREC; // g_errno should be set so it will return an error response g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno)); mdelete ( st , sizeof(State13) , "PageTagdb" ); delete (st); return true; } collnum_t collnum = cr->m_collnum; // if no query send back the page blanked out i guess if ( ! gr->m_query || ! gr->m_query[0] ) { doneReindexing ( st ); return true; } // no permmission? bool isMasterAdmin = g_conf.isMasterAdmin ( s , r ); bool isCollAdmin = g_conf.isCollAdmin ( s , r ); if ( ! isMasterAdmin && ! isCollAdmin ) { g_errno = ENOPERM; doneReindexing ( st ); return true; } int32_t langId = getLangIdFromAbbr ( gr->m_qlang ); // let msg1d do all the work now if ( ! st->m_msg1c.reindexQuery ( gr->m_query , collnum, gr->m_srn , // startNum , gr->m_ern , // endNum , (bool)gr->m_forceDel, langId, st , doneReindexing ) ) return false; // no waiting doneReindexing ( st ); return true; }
// . reply to a request for an RdbList // . MUST call g_udpServer::sendReply or sendErrorReply() so slot can // be destroyed void handleRequest0 ( UdpSlot *slot , int32_t netnice ) { logTrace( g_conf.m_logTraceMsg0, "BEGIN. Got request for an RdbList" ); // if niceness is 0, use the higher priority udpServer UdpServer *us = &g_udpServer; //if ( netnice == 0 ) us = &g_udpServer2; // get the request char *request = slot->m_readBuf; int32_t requestSize = slot->m_readBufSize; // collection is now stored in the request, so i commented this out //if ( requestSize != MSG0_REQ_SIZE ) { // log("net: Received bad data request size of %" PRId32" bytes. " // "Should be %" PRId32".", requestSize ,(int32_t)MSG0_REQ_SIZE); // us->sendErrorReply ( slot , EBADREQUESTSIZE ); // return; //} // parse the request char *p = request; int64_t syncPoint = *(int64_t *)p ; p += 8; //key_t startKey = *(key_t *)p ; p += sizeof(key_t); //key_t endKey = *(key_t *)p ; p += sizeof(key_t); int32_t minRecSizes = *(int32_t *)p ; p += 4; int32_t startFileNum = *(int32_t *)p ; p += 4; int32_t numFiles = *(int32_t *)p ; p += 4; int32_t maxCacheAge = *(int32_t *)p ; p += 4; char rdbId = *p++; char addToCache = *p++; char doErrorCorrection = *p++; char includeTree = *p++; // this was messing up our niceness conversion logic int32_t niceness = slot->m_niceness;//(int32_t)(*p++); // still need to skip it though! p++; bool allowPageCache = (bool)(*p++); char ks = getKeySizeFromRdbId ( rdbId ); char *startKey = p; p+=ks; char *endKey = p; p+=ks; collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t); CollectionRec *xcr = g_collectiondb.getRec ( collnum ); if ( ! xcr ) g_errno = ENOCOLLREC; if( g_conf.m_logTraceMsg0 ) { logTrace( g_conf.m_logTraceMsg0, "rdbId....... %d", (int)rdbId ); logTrace( g_conf.m_logTraceMsg0, "key size.... %d", (int)ks ); logTrace( g_conf.m_logTraceMsg0, "startFileNum %" PRId32, startFileNum ); logTrace( g_conf.m_logTraceMsg0, "numFiles.... %" PRId32, numFiles ); } // error set from XmlDoc::cacheTermLists()? if ( g_errno ) { logTrace( g_conf.m_logTraceMsg0, "END. Invalid collection" ); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot , EBADRDBID ); return; } // . get the rdb we need to get the RdbList from // . returns NULL and sets g_errno on error //Msg0 msg0; //Rdb *rdb = msg0.getRdb ( rdbId ); Rdb *rdb = getRdbFromId ( rdbId ); if ( ! rdb ) { logTrace( g_conf.m_logTraceMsg0, "END. Invalid rdbId" ); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot , EBADRDBID ); return; } // keep track of stats rdb->readRequestGet ( requestSize ); // . do a local get // . create a msg5 to get the list State00 *st0 ; try { st0 = new (State00); } catch ( ... ) { g_errno = ENOMEM; log("Msg0: new(%" PRId32"): %s", (int32_t)sizeof(State00),mstrerror(g_errno)); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot , g_errno ); return; } mnew ( st0 , sizeof(State00) , "State00" ); // timing debug if ( g_conf.m_logTimingNet ) st0->m_startTime = gettimeofdayInMilliseconds(); // save slot in state st0->m_slot = slot; // save udp server to send back reply on st0->m_us = us; // init this one st0->m_niceness = niceness; st0->m_rdbId = rdbId; QUICKPOLL(niceness); // debug msg if ( maxCacheAge != 0 && ! addToCache ) { log( LOG_LOGIC, "net: msg0: check but don't add... rdbid=%" PRId32".", ( int32_t ) rdbId ); } // . if this request came over on the high priority udp server // make sure the priority gets passed along // . return if this blocks // . we'll call sendReply later if ( ! st0->m_msg5.getList ( rdbId , collnum , &st0->m_list , startKey , endKey , minRecSizes , includeTree , // include tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , st0 , gotListWrapper , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum 2 , // maxRetries true , // compensateForMerge syncPoint , false, allowPageCache ) ) { logTrace( g_conf.m_logTraceMsg0, "END. m_msg5.getList returned false" ); return; } // call wrapper ouselves logTrace( g_conf.m_logTraceMsg0, "Calling gotListWrapper" ); gotListWrapper ( st0 , NULL , NULL ); logTrace( g_conf.m_logTraceMsg0, "END" ); }
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc. // . if list is stored locally this tries to get it locally // . otherwise tries to get the list from the network // . returns false if blocked, true otherwise // . sets g_errno on error // . NOTE: i was having problems with queries being cached too long, you // see the cache here is a NETWORK cache, so when the machines that owns // the list updates it on disk it can't flush our cache... so use a small // maxCacheAge of like , 30 seconds or so... bool Msg0::getList ( int64_t hostId , // host to ask (-1 if none) int32_t ip , // info on hostId int16_t port , int32_t maxCacheAge , // max cached age in seconds bool addToCache , // add net recv'd list to cache? char rdbId , // specifies the rdb collnum_t collnum , RdbList *list , const char *startKey , const char *endKey , int32_t minRecSizes , // use -1 for no max void *state , void (* callback)(void *state ),//, RdbList *list ) , int32_t niceness , bool doErrorCorrection , bool includeTree , bool doMerge , int32_t firstHostId , int32_t startFileNum , int32_t numFiles , int64_t timeout , int64_t syncPoint , int32_t preferLocalReads , Msg5 *msg5 , bool isRealMerge , bool allowPageCache , bool forceLocalIndexdb , bool noSplit , int32_t forceParitySplit ) { logTrace( g_conf.m_logTraceMsg0, "BEGIN. hostId: %" PRId64", rdbId: %d", hostId, (int)rdbId ); // warning if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg0."); // reset the list they passed us list->reset(); // get keySize of rdb m_ks = getKeySizeFromRdbId ( rdbId ); // if( g_conf.m_logTraceMsg0 ) // { // log("%s:%s:%d: rdbId. [%d]", __FILE__,__func__,__LINE__, (int)rdbId); // log("%s:%s:%d: m_ks.. [%d]", __FILE__,__func__,__LINE__, (int)m_ks); // log("%s:%s:%d: hostId [%" PRId64"]", __FILE__,__func__,__LINE__, hostId); // } // if startKey > endKey, don't read anything //if ( startKey > endKey ) return true; if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue // . reset hostid if it is dead // . this is causing UOR queries to take forever when we have a dead if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1; // no longer accept negative minrecsize if ( minRecSizes < 0 ) { g_errno = EBADENGINEER; logTrace( g_conf.m_logTraceMsg0, "END" ); log(LOG_LOGIC, "net: msg0: Negative minRecSizes no longer supported."); char *xx=NULL;*xx=0; } // remember these m_state = state; m_callback = callback; m_list = list; m_hostId = hostId; m_niceness = niceness; m_addToCache = addToCache; // . these define our request 100% KEYSET(m_startKey,startKey,m_ks); KEYSET(m_endKey,endKey,m_ks); m_minRecSizes = minRecSizes; m_rdbId = rdbId; m_collnum = collnum;// = coll; m_isRealMerge = isRealMerge; m_allowPageCache = allowPageCache; // . group to ask is based on the first key // . we only do 1 group per call right now // . groupMask must turn on higher bits first (count downwards kinda) // . titledb and spiderdb use special masks to get groupId // if diffbot.cpp is reading spiderdb from each shard we have to // get groupid from hostid here lest we core in getGroupId() below. // it does that for dumping spiderdb to the client browser. they // can download the whole enchilada. if ( hostId >= 0 && m_rdbId == RDB_SPIDERDB ) m_shardNum = 0; // did they force it? core until i figure out what this is else if ( forceParitySplit >= 0 ) //m_groupId = g_hostdb.getGroupId ( forceParitySplit ); m_shardNum = forceParitySplit; else //m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit ); m_shardNum = getShardNum ( m_rdbId , startKey ); // if we are looking up a termlist in posdb that is split by termid and // not the usual docid then we have to set this posdb key bit that tells // us that ... if ( noSplit && m_rdbId == RDB_POSDB ) m_shardNum = g_hostdb.getShardNumByTermId ( startKey ); // how is this used? if ( forceLocalIndexdb ) m_shardNum = getMyShardNum(); // if( g_conf.m_logTraceMsg0 ) log("%s:%s:%d: shardNum [%" PRId32"]", __FILE__,__func__, __LINE__, m_shardNum); // . store these parameters // . get a handle to the rdb in case we can satisfy locally // . returns NULL and sets g_errno on error QUICKPOLL((m_niceness)); Rdb *rdb = getRdbFromId ( m_rdbId ); if ( ! rdb ) return true; // we need the fixedDataSize m_fixedDataSize = rdb->getFixedDataSize(); m_useHalfKeys = rdb->useHalfKeys(); // . debug msg // . Msg2 does this when checking for a cached compound list. // compound lists do not actually exist, they are merges of smaller // UOR'd lists. if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree)) { log( LOG_LOGIC, "net: msg0: Weird. check but don't add... rdbid=%" PRId32".", ( int32_t ) m_rdbId ); } // set this here since we may not call msg5 if list not local //m_list->setFixedDataSize ( m_fixedDataSize ); // . now that we do load balancing we don't want to do a disk lookup // even if local if we are merging or dumping // . UNLESS g_conf.m_preferLocalReads is true if ( preferLocalReads == -1 ) preferLocalReads = g_conf.m_preferLocalReads; // . always prefer local for full split clusterdb // . and keep the tfndb/titledb lookups in the same stripe // . so basically we can't do biased caches if fully split //if ( g_conf.m_fullSplit ) preferLocalReads = true; preferLocalReads = true; // it it stored locally? bool isLocal = ( m_hostId == -1 && //g_hostdb.m_groupId == m_groupId ); m_shardNum == getMyShardNum() ); // only do local lookups if this is true if ( ! preferLocalReads ) isLocal = false; /* int64_t singleDocIdQuery = 0LL; if ( rdbId == RDB_POSDB ) { int64_t d1 = g_posdb.getDocId(m_startKey); int64_t d2 = g_posdb.getDocId(m_endKey); if ( d1+1 == d2 ) singleDocIdQuery = d1; } // . try the LOCAL termlist cache // . so when msg2 is evaluating a gbdocid:| query and it has to // use msg0 to go across the network to get the same damn termlist // over and over again for the same docid, this will help alot. // . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to // send the same gbdocid:xxxx docids to the same hosts. maybe hash // based on docid into the list of hosts and if that host is busy // just chain until we find someone not busy. if ( singleDocIdQuery && getListFromTermListCache ( coll, m_startKey, m_endKey, maxCacheAge, list ) ) // found! return true; */ // but always local if only one host if ( g_hostdb.getNumHosts() == 1 ) isLocal = true; // . if the group is local then do it locally // . Msg5::getList() returns false if blocked, true otherwise // . Msg5::getList() sets g_errno on error // . don't do this if m_hostId was specified if ( isLocal ) { logTrace( g_conf.m_logTraceMsg0, "isLocal" ); if ( msg5 ) { m_msg5 = msg5; m_deleteMsg5 = false; } else { try { m_msg5 = new ( Msg5 ); } catch ( ... ) { g_errno = ENOMEM; log("net: Local alloc for disk read failed " "while tring to read data for %s. " "Trying remote request.", getDbnameFromId(m_rdbId)); goto skip; } mnew ( m_msg5 , sizeof(Msg5) , "Msg0::Msg5" ); m_deleteMsg5 = true; } QUICKPOLL(m_niceness); if ( ! m_msg5->getList ( rdbId, m_collnum , m_list , m_startKey , m_endKey , m_minRecSizes , includeTree , // include Tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , this , gotListWrapper2 , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum -1 , // maxRetries true , // compensateForMerge syncPoint , m_isRealMerge , m_allowPageCache ) ) { logTrace( g_conf.m_logTraceMsg0, "END, return false" ); return false; } // nuke it reset(); logTrace( g_conf.m_logTraceMsg0, "END, return true" ); return true; } skip: // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG,"net: msg0: Sending request for data to " "shard=%" PRIu32" " "listPtr=%" PTRFMT" minRecSizes=%" PRId32" termId=%" PRIu64" " //"startKey.n1=%" PRIx32",n0=%" PRIx64" (niceness=%" PRId32")", "startKey.n1=%" PRIx64",n0=%" PRIx64" (niceness=%" PRId32")", //g_hostdb.makeHostId ( m_groupId ) , m_shardNum, (PTRTYPE)m_list, m_minRecSizes, g_posdb.getTermId(m_startKey) , //m_startKey.n1,m_startKey.n0 , (int32_t)m_niceness); KEY1(m_startKey,m_ks),KEY0(m_startKey), (int32_t)m_niceness); char *replyBuf = NULL; int32_t replyBufMaxSize = 0; bool freeReply = true; // . make a request with the info above (note: not in network order) // . IMPORTANT!!!!! if you change this change // Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!! // no, not anymore, we commented out that request peeking code char *p = m_request; *(int64_t *) p = syncPoint ; p += 8; //*(key_t *) p = m_startKey ; p += sizeof(key_t); //*(key_t *) p = m_endKey ; p += sizeof(key_t); *(int32_t *) p = m_minRecSizes ; p += 4; *(int32_t *) p = startFileNum ; p += 4; *(int32_t *) p = numFiles ; p += 4; *(int32_t *) p = maxCacheAge ; p += 4; if ( p - m_request != RDBIDOFFSET ) { char *xx=NULL;*xx=0; } *p = m_rdbId ; p++; *p = addToCache ; p++; *p = doErrorCorrection; p++; *p = includeTree ; p++; *p = (char)niceness ; p++; *p = (char)m_allowPageCache; p++; KEYSET(p,m_startKey,m_ks); ; p+=m_ks; KEYSET(p,m_endKey,m_ks); ; p+=m_ks; // NULL terminated collection name //strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0'; *(collnum_t *)p = m_collnum; p += sizeof(collnum_t); m_requestSize = p - m_request; // ask an individual host for this list if hostId is NOT -1 if ( m_hostId != -1 ) { // get Host Host *h = g_hostdb.getHost ( m_hostId ); if ( ! h ) { g_errno = EBADHOSTID; log(LOG_LOGIC,"net: msg0: Bad hostId of %" PRId64".", m_hostId); logTrace( g_conf.m_logTraceMsg0, "END, return true. Bad hostId" ); return true; } // if niceness is 0, use the higher priority udpServer UdpServer *us ; uint16_t port; QUICKPOLL(m_niceness); us = &g_udpServer ; port = h->m_port ; // . returns false on error and sets g_errno, true otherwise // . calls callback when reply is received (or error) // . we return true if it returns false if ( ! us->sendRequest ( m_request , m_requestSize , 0x00 , // msgType h->m_ip , port , m_hostId , NULL , // the slotPtr this , gotSingleReplyWrapper , timeout , -1 , // backoff -1 , // maxwait replyBuf , replyBufMaxSize , m_niceness ) ) { // cback niceness logTrace( g_conf.m_logTraceMsg0, "END, return true. Request sent" ); return true; } // return false cuz it blocked logTrace( g_conf.m_logTraceMsg0, "END, return false. sendRequest blocked" ); return false; } // timing debug if ( g_conf.m_logTimingNet ) m_startTime = gettimeofdayInMilliseconds(); else m_startTime = 0; // . get the top int32_t of the key // . i guess this will work for 128 bit keys... hmmmmm int32_t keyTop = hash32 ( (char *)startKey , m_ks ); // . otherwise, multicast to a host in group "groupId" // . returns false and sets g_errno on error // . calls callback on completion // . select first host to send to in group based on upper 32 bits // of termId (m_startKey.n1) // . need to send out to all the indexdb split hosts m_numRequests = 0; m_numReplies = 0; //for ( int32_t i = 0; i < m_numSplit; i++ ) { QUICKPOLL(m_niceness); //int32_t gr; char *buf; buf = replyBuf; // get the multicast Multicast *m = &m_mcast; if ( ! m->send ( m_request , m_requestSize, 0x00 , // msgType 0x00 false , // does multicast own request? m_shardNum , false , // send to whole group? //m_startKey.n1, // key is passed on startKey keyTop , // key is passed on startKey this , // state data NULL , // state data gotMulticastReplyWrapper0 , timeout*1000 , // timeout niceness , firstHostId , buf , replyBufMaxSize , freeReply , // free reply buf? true , // do disk load balancing? maxCacheAge , //(key_t *)cacheKey , // multicast uses it for determining the best // host to send the request to when doing // disk load balancing. if the host has our // data cached, then it will probably get to // handle the request. for now let's just assume // this is a 96-bit key. TODO: fix... 0 , // *(key_t *)cacheKey , rdbId , minRecSizes ) ) { log(LOG_ERROR, "net: Failed to send request for data from %s in shard " "#%" PRIu32" over network: %s.", getDbnameFromId(m_rdbId),m_shardNum, mstrerror(g_errno)); // but speed it up m_errno = g_errno; m->reset(); if ( m_numRequests > 0 ) { logTrace( g_conf.m_logTraceMsg0, "END - returning false" ); return false; } logTrace( g_conf.m_logTraceMsg0, "END - returning true" ); return true; } m_numRequests++; // we blocked logTrace( g_conf.m_logTraceMsg0, "END - returning false, blocked" ); return false; }