bool Syncdb::init ( ) { // reset loopReset(); m_doRcp = false; m_rcpStarted = false; // setup quick tree if ( ! m_qt.set ( 0 , // fixedDataSize 300000 , // 300k nodes true , // balance? -1 , // maxmem, no max false , // ownData? "tresyncdb" , false , // dataInPtrs? "quicktree" , // dbname 16 , // keySize false ))// useProtection? return false; BigFile f; f.set ( g_hostdb.m_dir , "quicktree.dat" ); // only load if it exists bool exists = f.doesExist(); // load it if ( exists && ! m_qt.fastLoad( &f , &m_stack ) ) return log("sync: quicktree.dat load failed: %s", mstrerror(g_errno)); // done f.close(); // assume permanently out of sync long val = 2; // load the insync.dat file f.set ( g_hostdb.m_dir , "insync.dat" ); // fail on open failure if ( ! f.open ( O_RDONLY ) ) return false; // if not there, permanently out of sync if ( ! f.doesExist() ) log("sync: insync.dat does not exist. Assuming host is " "unrecoverable."); else { // get the value char buf[20]; long n = f.read ( &buf , 10 , 0 ) ; if ( n <= 0 ) return log("sync: read insync.dat: %s", mstrerror(g_errno)); // must be digit if ( ! is_digit ( buf[0] ) ) return log("sync: insync.dat has no number in it."); // unlink it if ( ! f.unlink() ) return log("sync: failed to unlink insync.dat: %s", mstrerror(g_errno)); // get the value val = atol ( buf ); } // bad val? if ( val < 0 || val > 2 ) return log("sync: insync.dat had bad value of %li",val); // report if in sync or not if ( val == 0 ) log("sync: insync.dat says out of sync"); if ( val == 1 ) log("sync: insync.dat says in sync"); if ( val == 2 ) log("sync: insync.dat says PERMANENTLY out of sync"); // set it Host *h = g_hostdb.m_myHost; if ( val == 1 ) h->m_inSync = 1; if ( val == 2 ) h->m_isPermanentOutOfSync = 1; // call this once per second if ( ! g_loop.registerSleepCallback ( 1000 , NULL , sleepWrapper ) ) return false; // 10 MB long maxTreeMem = 10000000; // . what's max # of tree nodes? // . key+4+left+right+parents+dataPtr = 12+4 +4+4+4+4 = 32 // . 28 bytes per record when in the tree long maxTreeNodes = maxTreeMem / ( 16 + 1000 ); // . initialize our own internal rdb // . records are actual msg4 requests received from Msg4 // . the key is formed calling Syncdb::makeKey() which is based on // the tid, sid and zid of the msg4 request, where tid is the // twin hostid we are chatting with in our group, sid is the // ORIGINAL sending hostid of the msg4 request, and zid is the // kinda transaction #, and is unique. if ( ! m_rdb.init ( g_hostdb.m_dir , "syncdb" , true , // dedup -1 , // dataSize is variable 50 , // min files to merge maxTreeMem , maxTreeNodes , // maxTreeNodes , true , // balance tree? 50000 , // maxCacheMem , 100 , // maxCacheNodes , false , // half keys? false , // save cache? NULL , // page cache false , // is titledb false , // preload disk page cache 16 , // key size false , // bias disk page cache? true ))// is collectionless? return false; // add the coll //if ( ! g_syncdb.m_rdb.addColl ( "dummy" ) ) return true; // reset quick tree? if ( ! h->m_isPermanentOutOfSync ) return true; // clear it all! m_qt.clear(); // add the base since it is a collectionless rdb return m_rdb.addRdbBase1 ( NULL ); }
// // . ENTRY POINT FOR IMPORTING TITLEDB RECS FROM ANOTHER CLUSTER // . when user clicks 'begin' in import page we come here.. // . so when that parm changes in Parms.cpp we sense that and call // beginImport(CollectionRec *cr) // . or on startup we call resumeImports to check each coll for // an import in progress. // . search for files named titledb*.dat // . if none found just return // . when msg7 inject competes it calls this // . call this from sleep wrapper in Process.cpp // . returns false if would block (outstanding injects), true otherwise // . sets g_errno on error bool ImportState::importLoop ( ) { CollectionRec *cr = g_collectiondb.getRec ( m_collnum ); if ( ! cr || g_hostdb.m_hostId != 0 ) { // if coll was deleted! log("import: collnum %li deleted while importing into", (long)m_collnum); //if ( m_numOut > m_numIn ) return true; // delete the entire import state i guess // what happens if we have a msg7 reply come back in? // it should see the collrec is NULL and just fail. mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } INJECTLOOP: // stop if waiting on outstanding injects long long out = m_numOut - m_numIn; if ( out >= cr->m_numImportInjects ) { g_errno = 0; return false; } if ( ! cr->m_importEnabled ) { // wait for all to return if ( out > 0 ) return false; // then delete it log("import: collnum %li import loop disabled", (long)m_collnum); mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } // scan each titledb file scanning titledb0001.dat first, // titledb0003.dat second etc. //long long offset = -1; // . when offset is too big for current m_bigFile file then // we go to the next and set offset to 0. // . sets m_bf and m_fileOffset if ( ! setCurrentTitleFileAndOffset ( ) ) {//cr , -1 ); log("import: import: no files to read"); //goto INJECTLOOP; return true; } // this is -1 if none remain! if ( m_fileOffset == -1 ) { log("import: import fileoffset is -1. done."); return true; } long long saved = m_fileOffset; //Msg7 *msg7; //GigablastRequest *gr; //SafeBuf *sbuf = NULL; long need = 12; long dataSize = -1; //XmlDoc xd; key_t tkey; bool status; SafeBuf tmp; SafeBuf *sbuf = &tmp; long long docId; long shardNum; long key; Multicast *mcast; char *req; long reqSize; if ( m_fileOffset >= m_bfFileSize ) { log("inject: import: done processing file %li %s", m_bfFileId,m_bf.getFilename()); goto nextFile; } // read in title rec key and data size status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset ); //if ( n != 12 ) goto nextFile; if ( g_errno ) { log("inject: import: reading file error: %s. advancing " "to next file",mstrerror(g_errno)); goto nextFile; } m_fileOffset += 12; // if negative key, skip if ( (tkey.n0 & 0x01) == 0 ) { goto INJECTLOOP; } // if non-negative then read in size status = m_bf.read ( &dataSize , 4 , m_fileOffset ); if ( g_errno ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); goto nextFile; } m_fileOffset += 4; need += 4; need += dataSize; need += 4; // collnum, first 4 bytes if ( dataSize < 0 || dataSize > 500000000 ) { log("main: could not scan in titledb rec of " "corrupt dataSize of %li. BAILING ENTIRE " "SCAN of file %s",dataSize,m_bf.getFilename()); goto nextFile; } //gr = &msg7->m_gr; //XmlDoc *xd = getAvailXmlDoc(); //msg7 = getAvailMsg7(); mcast = getAvailMulticast(); // if none, must have to wait for some to come back to us if ( ! mcast ) { // restore file offset //m_fileOffset = saved; // no, must have been a oom or something log("import: import no mcast available"); return true;//false; } // this is for holding a compressed titlerec //sbuf = &mcast->m_sbuf;//&gr->m_sbuf; // point to start of buf sbuf->reset(); // ensure we have enough room sbuf->reserve ( need ); // collnum first 4 bytes sbuf->pushLong( (long)m_collnum ); // store title key sbuf->safeMemcpy ( &tkey , sizeof(key_t) ); // then datasize if any. neg rec will have -1 datasize if ( dataSize >= 0 ) sbuf->pushLong ( dataSize ); // then read data rec itself into it, compressed titlerec part if ( dataSize > 0 ) { // read in the titlerec after the key/datasize status = m_bf.read ( sbuf->getBuf() , dataSize , m_fileOffset ); if ( g_errno ) { // n != dataSize ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); // essentially free up this msg7 now //msg7->m_inUse = false; //msg7->reset(); goto nextFile; } // advance m_fileOffset += dataSize; // it's good, count it sbuf->m_length += dataSize; } // set xmldoc from the title rec //xd->set ( sbuf.getBufStart() ); //xd->m_masterState = NULL; //xd->m_masterCallback ( titledbInjectLoop ); // we use this so we know where the doc we are injecting // was in the foregien titledb file. so we can update our bookmark // code. mcast->m_hackFileOff = saved;//m_fileOffset; mcast->m_hackFileId = m_bfFileId; // // inject a title rec buf this time, we are doing an import // FROM A TITLEDB FILE!!! // //gr->m_titleRecBuf = &sbuf; // break it down into gw // xd.set2 ( sbuf.getBufStart() , // sbuf.length() , // max size // cr->m_coll, // use our coll // NULL , // pbuf for page parser // 1 , // niceness // NULL ); //sreq ); // // note it // log("import: importing %s",xd.m_firstUrl.getUrl()); // now we can set gr for the injection // TODO: inject the whole "sbuf" so we get sitenuminlinks etc // all exactly the same... // gr->m_url = xd.getFirstUrl()->getUrl(); // gr->m_queryToScrape = NULL; // gr->m_contentDelim = 0; // gr->m_contentTypeStr = g_contentTypeStrings [xd.m_contentType]; // gr->m_contentFile = NULL; // gr->m_content = xd.ptr_utf8Content; // gr->m_diffbotReply = NULL; // gr->m_injectLinks = false; // gr->m_spiderLinks = true; // gr->m_shortReply = false; // gr->m_newOnly = false; // gr->m_deleteUrl = false; // gr->m_recycle = true; // recycle content? or sitelinks? // gr->m_dedup = false; // gr->m_hasMime = false; // gr->m_doConsistencyTesting = false; // gr->m_getSections = false; // gr->m_gotSections = false; // gr->m_charset = xd.m_charset; // gr->m_hopCount = xd.m_hopCount; // // point to next doc in the titledb file // //m_fileOffset += need; // get docid from key docId = g_titledb.getDocIdFromKey ( &tkey ); // get shard that holds the titlerec for it shardNum = g_hostdb.getShardNumFromDocId ( docId ); // for selecting which host in the shard receives it key = (long)docId; m_numOut++; // then index it. master callback will be called //if ( ! xd->index() ) return false; // TODO: make this forward the request to an appropriate host!! // . gr->m_sbuf is set to the titlerec so this should handle that // and use XmlDoc::set4() or whatever // if ( msg7->injectTitleRec ( msg7 , // state // gotMsg7ReplyWrapper , // callback // cr )) { // // it didn't block somehow... // msg7->m_inUse = false; // msg7->gotMsg7Reply(); // } req = sbuf->getBufStart(); reqSize = sbuf->length(); if ( reqSize != need ) { char *xx=NULL;*xx=0 ; } // do not free it, let multicast free it after sending it sbuf->detachBuf(); if ( ! mcast->send ( req , reqSize , 0x07 , true , // ownmsg? shardNum, false, // send to whole shard? key , // for selecting host in shard mcast , // state NULL , // state2 gotMulticastReplyWrapper , 999999 ) ) { // total timeout in seconds log("import: import mcast had error: %s",mstrerror(g_errno)); m_numIn++; } goto INJECTLOOP; nextFile: // invalidate this flag //m_offIsValid = false; // . and call this function. we add one to m_bfFileId so we // do not re-get the file we just injected. // . sets m_bf and m_fileOffset // . returns false if nothing to read if ( ! setCurrentTitleFileAndOffset ( ) ) { //cr , m_bfFileId+1 ); log("import: import: no files left to read"); //goto INJECTLOOP; return true; } // if it returns NULL we are done! log("main: titledb injection loop completed. waiting for " "outstanding injects to return."); if ( m_numOut > m_numIn ) return false; log("main: all injects have returned. DONE."); // dummy return return true; }