int main(int argc, char **argv) { if (argc < 2) { print_usage(argv[0]); return 1; } if (strcmp(argv[1], "--h") == 0 || strcmp(argv[1], "--help") == 0 ) { print_usage(argv[0]); return 1; } char filepath[PATH_MAX]; char dir[PATH_MAX]; strcpy(filepath, argv[1]); strcpy(dir, dirname(filepath)); char filename[PATH_MAX]; strcpy(filepath, argv[1]); strcpy(filename, basename(filepath)); // initialize library g_mem.init(); hashinit(); g_conf.init(NULL); BigFile bigFile; bigFile.set(dir, filename); RdbBuckets buckets; if (starts_with(filename, "posdb")) { buckets.set(Posdb::getFixedDataSize(), g_conf.m_posdbMaxTreeMem, "buckets-posdb", RDB_POSDB, "posdb", Posdb::getKeySize()); if (!buckets.fastLoad(&bigFile, "posdb")) { fprintf(stdout, "Unable to load bucket\n"); return 1; } buckets.printBuckets(printRecord); } else { fprintf(stdout, "Unsupported rdb type\n"); return 1; } return 0; }
void Indexdb::deepVerify ( char *coll ) { log ( LOG_INFO, "db: Deep Verifying Indexdb for coll %s...", coll ); g_threads.disableThreads(); Msg5 msg5; Msg5 msg5b; RdbList list; key_t startKey; key_t endKey; startKey.setMin(); endKey.setMax(); //long minRecSizes = 64000; collnum_t collnum = g_collectiondb.getCollnum(coll); RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum); long numFiles = rdbBase->getNumFiles(); long currentFile = 0; deepLoop: // done after scanning all files if ( currentFile >= numFiles ) { g_threads.enableThreads(); log ( LOG_INFO, "db: Finished deep verify for %li files.", numFiles ); return; } // scan this file if ( ! msg5.getList ( RDB_INDEXDB , coll , &list , startKey , endKey , 64000 , // minRecSizes , true , // includeTree , false , // add to cache? 0 , // max cache age currentFile , // startFileNum , 1 , // numFiles , NULL , // state NULL , // callback 0 , // niceness false , // err correction? NULL , 0 , -1 , true , -1LL , &msg5b , false )) { g_threads.enableThreads(); log("db: HEY! it did not block"); return; } long count = 0; long got = 0; for ( list.resetListPtr() ; ! list.isExhausted() ; list.skipCurrentRecord() ) { key_t k = list.getCurrentKey(); count++; //unsigned long groupId = k.n1 & g_hostdb.m_groupMask; unsigned long groupId = getGroupId ( RDB_INDEXDB , &k ); if ( groupId == g_hostdb.m_groupId ) got++; } if ( got != count ) { BigFile *f = rdbBase->getFile(currentFile); log ("db: File %s: Out of first %li records in indexdb, " "only %li belong to our group.", f->getFilename(),count,got ); } //else // log ( LOG_INFO, "db: File %li: Indexdb passed verification " // "successfully for %li recs.",currentFile,count ); // next file currentFile++; goto deepLoop; }
// . but now that we may get a list remotely to fix data corruption, // this may indeed block bool Msg3::doneScanning ( ) { QUICKPOLL(m_niceness); // . did we have any error on any scan? // . if so, repeat ALL of the scans g_errno = m_errno; // 2 retry is the default int32_t max = 2; // see if explicitly provided by the caller if ( m_maxRetries >= 0 ) max = m_maxRetries; // now use -1 (no max) as the default no matter what max = -1; // ENOMEM is particulary contagious, so watch out with it... if ( g_errno == ENOMEM && m_maxRetries == -1 ) max = 0; // msg0 sets maxRetries to 2, don't let max stay set to -1 if ( g_errno == ENOMEM && m_maxRetries != -1 ) max = m_maxRetries; // when thread cannot alloc enough read buf it keeps the read buf // set to NULL and BigFile.cpp sets g_errno to EBUFTOOSMALL if ( g_errno == EBUFTOOSMALL && m_maxRetries == -1 ) max = 0; // msg0 sets maxRetries to 2, don't let max stay set to -1 if ( g_errno == EBUFTOOSMALL && m_maxRetries != -1 ) max = m_maxRetries; // . if no thread slots available, that hogs up serious memory. // the size of Msg3 is 82k, so having just 5000 of them is 430MB. // . i just made Msg3 alloc mem when it needs more than about 2k // so this problem is greatly reduced, therefore let's keep // retrying... forever if no thread slots in thread queue since // we become the thread queue in a way. if ( g_errno == ENOTHREADSLOTS ) max = -1; // this is set above if the map has the same consecutive key repeated // and the read is enormous if ( g_errno == ECORRUPTDATA ) max = 0; // usually bad disk failures, don't retry those forever //if ( g_errno == EIO ) max = 3; // no, now our hitachis return these even when they're good so // we have to keep retrying forever if ( g_errno == EIO ) max = -1; // count these so we do not take drives offline just because // kernel ring buffer complains... if ( g_errno == EIO ) g_numIOErrors++; // bail early on high priority reads for these errors if ( g_errno == EDISKSTUCK && m_niceness == 0 ) max = 0; if ( g_errno == EIO && m_niceness == 0 ) max = 0; // how does this happen? we should never bail out on a low priority // disk read... we just wait for it to complete... if ( g_errno == EDISKSTUCK && m_niceness != 0 ) { char *xx=NULL;*xx=0;} // on I/O, give up at call it corrupt after a while. some hitachis // have I/O errros on little spots, like gk88, maybe we can fix him if ( g_errno == EIO && m_retryNum >= 5 ) { m_errno = ECORRUPTDATA; m_hadCorruption = true; // do not do any retries any more max = 0; } // convert m_errno to ECORRUPTDATA if it is EBUFTOOSMALL and the // max of the bytesToRead are over 500MB. // if bytesToRead was ludicrous, then assume that the data file // was corrupted, the map was regenerated and it patched // over the corrupted bits which were 500MB or more in size. // we cannot practically allocate that much, so let's just // give back an empty buffer. treat it like corruption... // the way it patches is to store the same key over all the corrupted // pages, which can get pretty big. so if you read a range with that // key you will be hurting!! // this may be the same scenario as when the rdbmap has consecutive // same keys. see above where we set m_errno to ECORRUPTDATA... if ( g_errno == EBUFTOOSMALL ) { int32_t biggest = 0; for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) { if ( m_scans[i].m_bytesToRead < biggest ) continue; biggest = m_scans[i].m_bytesToRead; } if ( biggest > 500000000 ) { log("db: Max read size was %" PRId32" > 500000000. Assuming " "corrupt data in data file.",biggest); m_errno = ECORRUPTDATA; m_hadCorruption = true; // do not do any retries on this, the read was > 500MB max = 0; } } // if shutting down gb then limit to 20 so we can shutdown because // it can't shutdown until all threads are out of the queue i think if ( g_process.m_mode == EXIT_MODE && max < 0 ) { //log("msg3: forcing retries to 0 because shutting down"); max = 0; } // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base = getRdbBase( m_rdbId, m_collnum ); if ( ! base ) { return true; } // this really slows things down because it blocks the cpu so // leave it out for now #ifdef GBSANITYCHECK // check for corruption here, do not do it again in Msg5 if we pass if ( ! g_errno ) { // && g_conf.m_doErrorCorrection ) { int32_t i; for ( i = 0 ; i < m_numFileNums ; i++ ) if ( ! m_lists[i].checkList_r ( false, false ) ) break; if ( i < m_numFileNums ) { g_errno = ECORRUPTDATA; m_errno = ECORRUPTDATA; max = g_conf.m_corruptRetries; // try 100 times log("db: Encountered corrupt list in file %s.", base->getFile(m_fileNums[i])->getFilename()); } else m_listsChecked = true; } #endif // try to fix this error i've seen if ( g_errno == EBADENGINEER && max == -1 ) max = 100; // . if we had a ETRYAGAIN error, then try again now // . it usually means the whole file or a part of it was deleted // before we could finish reading it, so we should re-read all now // . RdbMerge deletes BigFiles after it merges them and also chops // off file heads // . now that we have threads i'd imagine we'd get EBADFD or something // . i've also seen "illegal seek" as well if ( m_errno && (m_retryNum < max || max < 0) && // this will complete in due time, we can't call a sleep wrapper // on it because the read is really still pending... m_errno != EDISKSTUCK ) { // print the error static time_t s_time = 0; time_t now = getTime(); if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) { log("net: Had error reading %s: %s. Retrying. " "(retry #%" PRId32")", base->m_dbname,mstrerror(m_errno) , m_retryNum ); s_time = now; } // send email alert if in an infinite loop, but don't send // more than once every 2 hours static int32_t s_lastSendTime = 0; if ( m_retryNum == 100 && getTime() - s_lastSendTime > 3600*2){ // remove this for now it is going off all the time //g_pingServer.sendEmail(NULL,//g_hostdb.getMyHost(), // "100 read retries",true); s_lastSendTime = getTime(); } // clear g_errno cuz we should for call to readList() g_errno = 0; // free the list buffer since if we have 1000 Msg3s retrying // it will totally use all of our memory for ( int32_t i = 0 ; i < m_numChunks ; i++ ) m_lists[i].destructor(); // count retries m_retryNum++; // backoff scheme, wait 100ms more each time int32_t wait ; if ( m_retryNum == 1 ) wait = 10; else wait = 200 * m_retryNum; // . don't wait more than 10 secs between tries // . i've seen gf0 and gf16 get mega saturated if ( wait > 10000 ) wait = 10000; // wait 500 ms if ( g_loop.registerSleepCallback ( wait , // ms this , doneSleepingWrapper3, m_niceness)) return false; // otherwise, registration failed log( "net: Failed to register sleep callback for retry. " "Abandoning read. This is bad."); // return, g_errno should be set g_errno = EBUFTOOSMALL; m_errno = EBUFTOOSMALL; return true; } // if we got an error and should not retry any more then give up if ( g_errno ) { log( "net: Had error reading %s: %s. Giving up after %" PRId32" " "retries.", base->m_dbname,mstrerror(g_errno) , m_retryNum ); return true; } // note it if the retry finally worked if ( m_retryNum > 0 ) log(LOG_INFO,"disk: Read succeeded after retrying %" PRId32" times.", (int32_t)m_retryNum); // count total bytes for logging int32_t count = 0; // . constrain all lists to make merging easier // . if we have only one list, then that's nice cuz the constrain // will allow us to send it right away w/ zero copying // . if we have only 1 list, it won't be merged into a final list, // that is, we'll just set m_list = &m_lists[i] for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) { QUICKPOLL(m_niceness); // count total bytes for logging count += m_lists[i].getListSize(); // . hint offset is relative to the offset of first key we read // . if that key was only 6 bytes RdbScan shift the list buf // down 6 bytes to make the first key 12 bytes... a // requirement for all RdbLists // . don't inc it, though, if it was 0, pointing to the start // of the list because our shift won't affect that if ( m_scans[i].m_shifted == 6 && m_hintOffsets[i] > 0 ) m_hintOffsets[i] += 6; // posdb double compression if ( m_scans[i].m_shifted == 12 && m_hintOffsets[i] > 0 ) m_hintOffsets[i] += 12; // . don't constrain on minRecSizes here because it may // make our endKey smaller, which will cause problems // when Msg5 merges these lists. // . If all lists have different endKeys RdbList's merge // chooses the min and will merge in recs beyond that // causing a bad list BECAUSE we don't check to make // sure that recs we are adding are below the endKey // . if we only read from one file then constrain based // on minRecSizes so we can send the list back w/o merging // OR if just merging with RdbTree's list int32_t mrs ; // . constrain to m_minRecSizesOrig, not m_minRecSizes cuz // that could be adjusted by compensateForNegativeRecs() // . but, really, they should be the same if we only read from // the root file if ( m_numFileNums == 1 ) mrs = m_minRecSizesOrig; else mrs = -1; // . this returns false and sets g_errno on error // . like if data is corrupt BigFile *ff = base->getFile(m_fileNums[i]); // if we did a merge really quick and delete one of the // files we were reading, i've seen 'ff' be NULL char *filename = "lostfilename"; if ( ff ) filename = ff->getFilename(); // compute cache info RdbCache *rpc = getDiskPageCache ( m_rdbId ); if ( ! m_allowPageCache ) rpc = NULL; int64_t vfd ; if ( ff ) vfd = ff->getVfd(); key192_t ck ; if ( ff ) ck = makeCacheKey ( vfd , m_scans[i].m_offset , m_scans[i].m_bytesToRead ); if ( m_validateCache && ff && rpc && vfd != -1 ) { bool inCache; char *rec; int32_t recSize; inCache = rpc->getRecord ( (collnum_t)0 , // collnum (char *)&ck , &rec , &recSize , true , // copy? -1 , // maxAge, none true ); // inccounts? if ( inCache && // 1st byte is RdbScan::m_shifted ( m_lists[i].m_listSize != recSize-1 || memcmp ( m_lists[i].m_list , rec+1,recSize-1) || *rec != m_scans[i].m_shifted ) ) { log("msg3: cache did not validate"); char *xx=NULL;*xx=0; } mfree ( rec , recSize , "vca" ); } /////// // // STORE IN PAGE CACHE // /////// // store what we read in the cache. don't bother storing // if it was a retry, just in case something strange happened. // store pre-constrain call is more efficient. if ( m_retryNum<=0 && ff && rpc && vfd != -1 && ! m_scans[i].m_inPageCache ) rpc->addRecord ( (collnum_t)0 , // collnum (char *)&ck , // rec1 is this little thingy &m_scans[i].m_shifted, 1, // rec2 m_lists[i].getList() , m_lists[i].getListSize() , 0 ); // timestamp. 0 = now QUICKPOLL(m_niceness); // if from our 'page' cache, no need to constrain if ( ! m_lists[i].constrain ( m_startKey , m_constrainKey , // m_endKey mrs , // m_minRecSizes m_hintOffsets[i] , //m_hintKeys [i] , &m_hintKeys [i*m_ks] , filename,//ff->getFilename() , m_niceness ) ) { log("net: Had error while constraining list read from " "%s: %s/%s. vfd=%" PRId32" parts=%" PRId32". " "This is likely caused by corrupted " "data on disk.", mstrerror(g_errno), ff->getDir(), ff->getFilename(), ff->m_vfd , (int32_t)ff->m_numParts ); continue; } } // print the time if ( g_conf.m_logTimingDb ) { int64_t now = gettimeofdayInMilliseconds(); int64_t took = now - m_startTime; log(LOG_TIMING, "net: Took %" PRId64" ms to read %" PRId32" lists of %" PRId32" bytes total" " from %s (niceness=%" PRId32").", took,m_numFileNums,count,base->m_dbname,m_niceness); } return true; }
// . return false if blocked, true otherwise // . set g_errno on error // . read list of keys in [startKey,endKey] range // . read at least "minRecSizes" bytes of keys in that range // . the "m_endKey" of resulting, merged list may have a smaller endKey // than the argument, "endKey" due to limitation by "minRecSizes" // . resulting list will contain ALL keys between ITS [m_startKey,m_endKey] // . final merged list "should" try to have a size of at least "minRecSizes" // but due to negative/postive rec elimination may be less // . the endKey of the lists we read may be <= "endKey" provided // . we try to shrink the endKey if minRecSizes is >= 0 in order to // avoid excessive reading // . by shrinking the endKey we cannot take into account the size of deleted // records, so therefore we may fall short of "minRecSizes" in actuality, // in fact, the returned list may even be empty with a shrunken endKey // . we merge all lists read from disk into the provided "list" // . caller should call Msg3.getList(int32_t i) and Msg3:getNumLists() to retrieve // . this makes the query engine faster since we don't need to merge the docIds // and can just send them across the network separately and they will be // hashed into IndexTable's table w/o having to do time-wasting merging. // . caller can specify array of filenums to read from so incremental syncing // in Sync class can just read from titledb*.dat files that were formed // since the last sync point. bool Msg3::readList ( char rdbId , collnum_t collnum , const char *startKeyArg , const char *endKeyArg , int32_t minRecSizes , // max size of scan int32_t startFileNum , // first file to scan int32_t numFiles , // rel. to startFileNum void *state , // for callback void (* callback ) ( void *state ) , int32_t niceness , int32_t retryNum , int32_t maxRetries , bool compensateForMerge , bool justGetEndKey , bool allowPageCache , bool hitDisk ) { // set this to true to validate m_validateCache = false;//true; // clear, this MUST be done so if we return true g_errno is correct g_errno = 0; // assume lists are not checked for corruption m_listsChecked = false; // warn if ( minRecSizes < -1 ) { log(LOG_LOGIC,"db: Msg3 got minRecSizes of %" PRId32", changing " "to -1.",minRecSizes); minRecSizes = -1; } // reset m_alloc and data in all lists in case we are a re-call reset(); // warning if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg3."); // remember the callback m_rdbId = rdbId; m_collnum = collnum; m_callback = callback; m_state = state; m_niceness = niceness; m_numScansCompleted = 0; m_retryNum = retryNum; m_maxRetries = maxRetries; m_compensateForMerge = compensateForMerge; m_allowPageCache = allowPageCache; m_hitDisk = hitDisk; m_hadCorruption = false; // get keySize of rdb m_ks = getKeySizeFromRdbId ( m_rdbId ); // reset the group error m_errno = 0; // . reset all our lists // . these are reset in call the RdbScan::setRead() below //for ( int32_t i = 0 ; i < MAX_RDB_FILES ; i++ ) m_lists[i].reset(); // . ensure startKey last bit clear, endKey last bit set // . no! this warning is now only in Msg5 // . if RdbMerge is merging some files, not involving the root // file, then we can expect to get a lot of unmatched negative recs. // . as a consequence, our endKeys may often be negative. This means // it may not annihilate with the positive key, but we should only // miss like this at the boundaries of the lists we fetch. // . so in that case RdbList::merge will stop merging once the // minRecSizes limit is reached even if it means ending on a negative // rec key //if ( (startKey.n0 & 0x01) == 0x01 ) if ( !KEYNEG(startKeyArg) ) log(LOG_REMIND,"net: msg3: StartKey lastbit set."); if ( KEYNEG(endKeyArg) ) log(LOG_REMIND,"net: msg3: EndKey lastbit clear."); // declare vars here becaues of 'goto skip' below int32_t mergeFileNum = -1 ; int32_t max ; // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base = getRdbBase( m_rdbId, m_collnum ); if ( ! base ) { return true; } // store the file numbers in the array, these are the files we read m_numFileNums = 0; // save startFileNum here, just for recall m_startFileNum = startFileNum; m_numFiles = numFiles; // . if we have a merge going on, we may have to change startFileNum // . if some files get unlinked because merge completes then our // reads will detect the error and loop back here // . we launch are reads right after this without giving up the cpu // and we use file descriptors, so any changes to Rdb::m_files[] // should not hurt us // . WARNING: just make sure you don't lose control of cpu until after // you call RdbScan::set() // . we use hasMergeFile() instead of isMerging() because he may not // be merging cuz he got suspended or he restarted and // hasn't called attemptMerge() yet, but he may still contain it if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG, "net: msg3: " "c=%" PRId32" hmf=%" PRId32" sfn=%" PRId32" msfn=%" PRId32" nf=%" PRId32" db=%s.", (int32_t)compensateForMerge,(int32_t)base->hasMergeFile(), (int32_t)startFileNum,(int32_t)base->m_mergeStartFileNum-1, (int32_t)numFiles,base->m_dbname); int32_t pre = -10; if ( compensateForMerge && base->hasMergeFile() && startFileNum >= base->m_mergeStartFileNum - 1 && (startFileNum > 0 || numFiles != -1) ) { // now also include the file being merged into, but only // if we are reading from a file being merged... if ( startFileNum < base->m_mergeStartFileNum + base->m_numFilesToMerge - 1 ) //m_fileNums [ m_numFileNums++ ] = // base->m_mergeStartFileNum - 1; pre = base->m_mergeStartFileNum - 1; // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG, "net: msg3: startFileNum from %" PRId32" to %" PRId32" (mfn=%" PRId32")", startFileNum,startFileNum+1,mergeFileNum); // if merge file was inserted before us, inc our file number startFileNum++; } // adjust num files if we need to, as well if ( compensateForMerge && base->hasMergeFile() && startFileNum < base->m_mergeStartFileNum - 1 && numFiles != -1 && startFileNum + numFiles - 1 >= base->m_mergeStartFileNum - 1 ) { // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG,"net: msg3: numFiles up one."); // if merge file was inserted before us, inc our file number numFiles++; } // . how many rdb files does this base have? // . IMPORTANT: this can change since files are unstable because they // might have all got merged into one! // . so do this check to make sure we're safe... especially if // there was an error before and we called readList() on ourselves max = base->getNumFiles(); // -1 means we should scan ALL the files in the base if ( numFiles == -1 ) numFiles = max; // limit it by startFileNum, however if ( numFiles > max - startFileNum ) numFiles = max - startFileNum; // set g_errno and return true if it is < 0 if ( numFiles < 0 ) { log(LOG_LOGIC, "net: msg3: readList: numFiles = %" PRId32" < 0 (max=%" PRId32")(sf=%" PRId32")", numFiles , max , startFileNum ); g_errno = EBADENGINEER; // force core dump char *xx=NULL;*xx=0; return true; } // . allocate buffer space // . m_scans, m_startpg, m_endpg, m_hintKeys, m_hintOffsets, // m_fileNums, m_lists int32_t chunk = sizeof(RdbScan) + // m_scans 4 + // m_startpg 4 + // m_endpg //sizeof(key_t) + // m_hintKeys m_ks + // m_hintKeys 4 + // m_hintOffsets 4 + // m_fileNums sizeof(RdbList) ; // m_lists int32_t nn = numFiles; if ( pre != -10 ) nn++; m_numChunks = nn; int32_t need = nn * (chunk); m_alloc = m_buf; if ( need > (int32_t)MSG3_BUF_SIZE ) { m_allocSize = need; m_alloc = (char *)mcalloc ( need , "Msg3" ); if ( ! m_alloc ) { log("disk: Could not allocate %" PRId32" bytes read " "structures to read %s.",need,base->m_dbname); return true; } } char *p = m_alloc; m_scans = (RdbScan *)p; p += nn * sizeof(RdbScan); m_startpg = (int32_t *)p; p += nn * 4; m_endpg = (int32_t *)p; p += nn * 4; //m_hintKeys = (key_t *)p; p += nn * sizeof(key_t); m_hintKeys = (char *)p; p += nn * m_ks; m_hintOffsets = (int32_t *)p; p += nn * 4; m_fileNums = (int32_t *)p; p += nn * 4; m_lists = (RdbList *)p; p += nn * sizeof(RdbList); // sanity check if ( p - m_alloc != need ) { log(LOG_LOGIC,"disk: Bad malloc in Msg3.cpp."); char *xx = NULL; *xx = 0; } // call constructors for ( int32_t i = 0 ; i < nn ; i++ ) m_lists[i].constructor(); // make fix from up top if ( pre != -10 ) m_fileNums [ m_numFileNums++ ] = pre; // store them all for ( int32_t i = startFileNum ; i < startFileNum + numFiles ; i++ ) m_fileNums [ m_numFileNums++ ] = i; // . remove file nums that are being unlinked after a merge now // . keep it here (below skip: label) so sync point reads can use it int32_t n = 0; for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) { // skip those that are being unlinked after the merge if ( base->m_isUnlinking && m_fileNums[i] >= base->m_mergeStartFileNum && m_fileNums[i] < base->m_mergeStartFileNum + base->m_numFilesToMerge ) continue; // otherwise, keep it m_fileNums[n++] = m_fileNums[i]; } m_numFileNums = n; // . if root file is being merged, he's file #0, & root file is file #1 // . this is a hack so caller gets what he wants //if ( startFileNum == 0 && base->getFileId(0) == 0 && numFiles == 1 ) // numFiles = 2; // remember the file range we should scan m_numScansStarted = 0; m_numScansCompleted = 0; //m_startKey = startKey; //m_endKey = endKey; //m_constrainKey = endKey; // set in case justGetEndKey is true KEYSET(m_startKey,startKeyArg,m_ks); KEYSET(m_endKey,endKeyArg,m_ks); KEYSET(m_constrainKey,endKeyArg,m_ks);//set incase justGetEndKey istrue m_minRecSizes = minRecSizes; m_compensateForMerge = compensateForMerge; // bail if 0 files to scan -- no! need to set startKey/endKey if ( numFiles == 0 ) return true; // don't read anything if endKey < startKey //if ( m_startKey > m_endKey ) return true; if ( KEYCMP(m_startKey,m_endKey,m_ks)>0 ) return true; // keep the original in tact in case g_errno == ETRYAGAIN //m_endKeyOrig = endKey; KEYSET(m_endKeyOrig,endKeyArg,m_ks); m_minRecSizesOrig = minRecSizes; // start reading at this key m_fileStartKey = startKeyArg; // start the timer, keep it fast for clusterdb though if ( g_conf.m_logTimingDb ) m_startTime = gettimeofdayInMilliseconds(); // translate base to an id, for the sake of m_msg0 //char baseId = m_msg0->getRdbId ( base ); // map ptrs RdbMap **maps = base->getMaps(); // . we now boost m_minRecSizes to account for negative recs // . but not if only reading one list, cuz it won't get merged and // it will be too big to send back if ( m_numFileNums > 1 ) compensateForNegativeRecs ( base ); // . often endKey is too big for an efficient read of minRecSizes bytes // because we end up reading too much from all the files // . this will set m_startpg[i], m_endpg[i] for each RdbScan/RdbFile // to ensure we read "minRecSizes" worth of records, not much more // . returns the new endKey for all ranges // . now this just overwrites m_endKey //m_endKey = setPageRanges ( base , setPageRanges ( base , m_fileNums , m_numFileNums , m_fileStartKey , // start reading @ key m_endKey , // stop reading @ key m_minRecSizes ); // . NEVER let m_endKey be a negative key, because it will // always be unmatched, since delbit is cleared // . adjusting it here ensures our generated hints are valid // . we will use this key to call constrain() with //m_constrainKey = m_endKey; //if ( ( m_constrainKey.n0 & 0x01) == 0x00 ) // m_constrainKey -= (uint32_t)1; KEYSET(m_constrainKey,m_endKey,m_ks); if ( KEYNEG(m_constrainKey) ) KEYSUB(m_constrainKey,m_ks); // Msg5 likes to get the endkey for getting the list from the tree if ( justGetEndKey ) return true; // sanity check if ( m_numFileNums > nn ) { log(LOG_LOGIC,"disk: Failed sanity check in Msg3."); char *xx = NULL; *xx = 0; } // debug msg //log("msg3 getting list (msg5=%" PRIu32")",m_state); // . MDW removed this -- go ahead an end on a delete key // . RdbMerge might not pick it up this round, but oh well // . so we can have both positive and negative co-existing in same file // make sure the last bit is set so we don't end on a delete key //m_endKey.n0 |= 0x01LL; // . now start reading/scanning the files // . our m_scans array starts at 0 for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) { // get the page range //int32_t p1 = m_startpg [ i ]; //int32_t p2 = m_endpg [ i ]; //#ifdef GBSANITYCHECK int32_t fn = m_fileNums[i]; // this can happen somehow! if ( fn < 0 ) { log(LOG_LOGIC,"net: msg3: fn=%" PRId32". Bad engineer.",fn); continue; } // sanity check if ( i > 0 && m_fileNums[i-1] >= fn ) { log(LOG_LOGIC, "net: msg3: files must be read in order " "from oldest to newest so RdbList::indexMerge_r " "works properly. Otherwise, corruption will " "result. "); char *xx = NULL; *xx = 0; return true; } // . sanity check? // . no, we must get again since we turn on endKey's last bit int32_t p1 , p2; maps[fn]->getPageRange ( m_fileStartKey , m_endKey , &p1 , &p2 , NULL ); //if ( p1 != p1c || p2 != p2c ) { // fprintf(stderr,"Msg3::bad page range\n"); // sleep(50000); //} // sanity check, each endpg's key should be > endKey //if ( p2 < maps[fn]->getNumPages() && // maps[fn]->getKey ( p2 ) <= m_endKey ) { // fprintf(stderr,"Msg3::bad page range 2\n"); // sleep(50000); //} //#endif //int32_t p1 , p2; //maps[fn]->getPageRange (startKey,endKey,minRecSizes,&p1,&p2); // now get some read info int64_t offset = maps[fn]->getAbsoluteOffset ( p1 ); int32_t bytesToRead = maps[fn]->getRecSizes ( p1, p2, false); // max out the endkey for this list // debug msg //#ifdef _DEBUG_ //if ( minRecSizes == 2000000 ) //log("Msg3:: reading %" PRId32" bytes from file #%" PRId32,bytesToRead,i); //#endif // inc our m_numScans m_numScansStarted++; // . keep stats on our disk accesses // . count disk seeks (assuming no fragmentation) // . count disk bytes read if ( bytesToRead > 0 ) { base->m_rdb->didSeek ( ); base->m_rdb->didRead ( bytesToRead ); } // . the startKey may be different for each RdbScan class // . RdbLists must have all keys within their [startKey,endKey] // . therefore set startKey individually from first page in map // . this endKey must be >= m_endKey // . this startKey must be < m_startKey //key_t startKey = maps[fn]->getKey ( p1 ); //key_t endKey = maps[fn]->getKey ( p2 ); char startKey2 [ MAX_KEY_BYTES ]; char endKey2 [ MAX_KEY_BYTES ]; maps[fn]->getKey ( p1 , startKey2 ); maps[fn]->getKey ( p2 , endKey2 ); //char *startKey = maps[fn]->getKeyPtr ( p1 ); //char *endKey = maps[fn]->getKeyPtr ( p2 ); // store in here m_startpg [ i ] = p1; m_endpg [ i ] = p2; // . we read UP TO that endKey, so reduce by 1 // . but iff p2 is NOT the last page in the map/file // . maps[fn]->getKey(lastPage) will return the LAST KEY // and maps[fn]->getOffset(lastPage) the length of the file //if ( maps[fn]->getNumPages()!=p2) endKey -=(uint32_t)1; if ( maps[fn]->getNumPages() != p2 ) KEYSUB(endKey2,m_ks); // otherwise, if we're reading all pages, then force the // endKey to virtual inifinite //else endKey.setMax(); else KEYMAX(endKey2,m_ks); // . set up the hints // . these are only used if we are only reading from 1 file // . these are used to call constrain() so we can constrain // the end of the list w/o looping through all the recs // in the list int32_t h2 = p2 ; // decrease by one page if we're on the last page if ( h2 > p1 && maps[fn]->getNumPages() == h2 ) h2--; // . decrease hint page until key is <= endKey on that page // AND offset is NOT -1 because the old way would give // us hints passed the endkey // . also decrease so we can constrain on minRecSizes in // case we're the only list being read // . use >= m_minRecSizes instead of >, otherwise we may // never be able to set "size" in RdbList::constrain() // because "p" could equal "maxPtr" right away while ( h2 > p1 && //( maps[fn]->getKey (h2) > m_constrainKey || (KEYCMP(maps[fn]->getKeyPtr(h2),m_constrainKey,m_ks)>0|| maps[fn]->getOffset(h2) == -1 || maps[fn]->getAbsoluteOffset(h2) - offset >= m_minRecSizes ) ) h2--; // now set the hint m_hintOffsets [ i ] = maps[fn]->getAbsoluteOffset ( h2 ) - maps[fn]->getAbsoluteOffset ( p1 ) ; //m_hintKeys [ i ] = maps[fn]->getKey ( h2 ); KEYSET(&m_hintKeys[i*m_ks],maps[fn]->getKeyPtr(h2),m_ks); // reset g_errno before calling setRead() g_errno = 0; // . this fix is now in RdbList::checklist_r() // . we can now have dup keys, so, we may read in // a rec with key "lastMinKey" even though we don't read // in the first key on the end page, so don't subtract 1... //if ( endKey != m_endKeyOrig ) // endKey += (uint32_t) 1; // timing debug if ( g_conf.m_logTimingDb ) log(LOG_TIMING, "net: msg: reading %" PRId32" bytes from %s file #%" PRId32" " "(niceness=%" PRId32")", bytesToRead,base->m_dbname,i,m_niceness); // log huge reads, those hurt us if ( bytesToRead > 150000000 ) { logf(LOG_INFO,"disk: Reading %" PRId32" bytes at offset %" PRId64" " "from %s.", bytesToRead,offset,base->m_dbname); } // if any keys in the map are the same report corruption char tmpKey [16]; char lastTmpKey[16]; int32_t ccount = 0; if ( bytesToRead > 10000000 && bytesToRead / 2 > m_minRecSizes && base->m_fixedDataSize >= 0 ) { for ( int32_t pn = p1 ; pn <= p2 ; pn++ ) { maps[fn]->getKey ( pn , tmpKey ); if ( KEYCMP(tmpKey,lastTmpKey,m_ks) == 0 ) ccount++; gbmemcpy(lastTmpKey,tmpKey,m_ks); } } if ( ccount > 10 ) { logf(LOG_INFO,"disk: Reading %" PRId32" bytes from %s file #" "%" PRId32" when min " "required is %" PRId32". Map is corrupt and has %" PRId32" " "identical consecutive page keys because the " "map was \"repaired\" because out of order keys " "in the index.", (int32_t)bytesToRead, base->m_dbname,fn, (int32_t)m_minRecSizes, (int32_t)ccount); m_numScansCompleted++; m_errno = ECORRUPTDATA; m_hadCorruption = true; //m_maxRetries = 0; break; } //////// // // try to get from PAGE CACHE // //////// BigFile *ff = base->getFile(m_fileNums[i]); RdbCache *rpc = getDiskPageCache ( m_rdbId ); if ( ! m_allowPageCache ) rpc = NULL; // . vfd is unique 64 bit file id // . if file is opened vfd is -1, only set in call to open() int64_t vfd = ff->getVfd(); key192_t ck = makeCacheKey ( vfd , offset, bytesToRead); char *rec; int32_t recSize; bool inCache = false; if ( rpc && vfd != -1 && ! m_validateCache ) inCache = rpc->getRecord ( (collnum_t)0 , // collnum (char *)&ck , &rec , &recSize , true , // copy? -1 , // maxAge, none true ); // inccounts? m_scans[i].m_inPageCache = false; if ( inCache ) { m_scans[i].m_inPageCache = true; m_numScansCompleted++; // now we have to store this value, 6 or 12 so // we can modify the hint appropriately m_scans[i].m_shifted = *rec; m_lists[i].set ( rec +1, recSize-1 , rec , // alloc recSize , // allocSize startKey2 , endKey2 , base->m_fixedDataSize , true , // owndata base->useHalfKeys() , getKeySizeFromRdbId ( m_rdbId ) ); continue; } // . do the scan/read of file #i // . this returns false if blocked, true otherwise // . this will set g_errno on error bool done = m_scans[i].setRead (base->getFile(m_fileNums[i]), base->m_fixedDataSize , offset , bytesToRead , startKey2 , endKey2 , m_ks , &m_lists[i] , this , doneScanningWrapper , base->useHalfKeys() , m_rdbId, m_niceness , m_allowPageCache , m_hitDisk ) ; // . damn, usually the above will indirectly launch a thread // to do the reading, but it sets g_errno to EINTR, // "interrupted system call"! // . i guess the thread does the read w/o blocking and then // queues the signal on g_loop's queue before it exits // . try ignoring, and keep going if ( g_errno == EINTR ) { log("net: Interrupted system call while reading file. " "Ignoring."); g_errno = 0; } // debug msg //fprintf(stderr,"Msg3:: reading %" PRId32" bytes from file #%" PRId32"," // "done=%" PRId32",offset=%" PRId64",g_errno=%s," // "startKey=n1=%" PRIu32",n0=%" PRIu64", " // "endKey=n1=%" PRIu32",n0=%" PRIu64"\n", // bytesToRead,i,(int32_t)done,offset,mstrerror(g_errno), // m_startKey,m_endKey); //if ( bytesToRead == 0 ) // fprintf(stderr,"shit\n"); // if it did not block then it completed, so count it if ( done ) m_numScansCompleted++; // break on an error, and remember g_errno in case we block if ( g_errno && g_errno != ENOTHREADSLOTS ) { int32_t tt = LOG_WARN; if ( g_errno == EFILECLOSED ) tt = LOG_INFO; log(tt,"disk: Reading %s had error: %s.", base->m_dbname, mstrerror(g_errno)); m_errno = g_errno; break; } } // debug test //if ( rand() % 100 <= 10 ) m_errno = EIO; // if we blocked, return false if ( m_numScansCompleted < m_numScansStarted ) return false; // . if all scans completed without blocking then wrap it up & ret true // . doneScanning may now block if it finds data corruption and must // get the list remotely return doneScanning(); }
// . return false if blocked, true otherwise // . sets g_errno on error bool RdbMerge::getNextList ( ) { // return true if g_errno is set if ( g_errno || m_doneMerging ) return true; // it's suspended so we count this as blocking if ( m_isSuspended ) { m_isReadyToSave = true; return false; } // if the power is off, suspend the merging if ( ! g_process.m_powerIsOn ) { m_isReadyToSave = true; doSleep(); return false; } // no chop threads m_numThreads = 0; // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true; // . if a contributor has just surpassed a "part" in his BigFile // then we can delete that part from the BigFile and the map for ( long i = m_startFileNum ; i < m_startFileNum + m_numFiles; i++ ){ RdbMap *map = base->m_maps[i]; long page = map->getPage ( m_startKey ); long long offset = map->getAbsoluteOffset ( page ); BigFile *file = base->m_files[i]; long part = file->getPartNum ( offset ) ; if ( part == 0 ) continue; // i've seen this bug happen if we chop a part off on our // last dump and the merge never completes for some reason... // so if we're in the last part then don't chop the part b4 us if ( part >= file->m_maxParts - 1 ) continue; // if we already unlinked part # (part-1) then continue if ( ! file->doesPartExist ( part - 1 ) ) continue; // . otherwise, excise from the map // . we must be able to chop the mapped segments corresponding // EXACTLY to the part file // . therefore, PAGES_PER_SEGMENT define'd in RdbMap.h must // evenly divide MAX_PART_SIZE in BigFile.h // . i do this check in RdbMap.cpp if ( ! map->chopHead ( MAX_PART_SIZE ) ) { // we had an error! log("db: Failed to remove data from map for " "%s.part%li.", file->getFilename(),part); return true; } // . also, unlink any part files BELOW part # "part" // . this returns false if it blocked, true otherwise // . this sets g_errno on error // . now we just unlink part file #(part-1) explicitly if ( ! file->chopHead ( part - 1 , chopWrapper , this ) ) m_numThreads++; if ( ! g_errno ) continue; log("db: Failed to unlink file %s.part%li.", file->getFilename(),part); return true; } // wait for file to be unlinked before getting list if ( m_numThreads > 0 ) return false; // otherwise, get it now return getAnotherList ( ); }
bool Syncdb::init ( ) { // reset loopReset(); m_doRcp = false; m_rcpStarted = false; // setup quick tree if ( ! m_qt.set ( 0 , // fixedDataSize 300000 , // 300k nodes true , // balance? -1 , // maxmem, no max false , // ownData? "tresyncdb" , false , // dataInPtrs? "quicktree" , // dbname 16 , // keySize false ))// useProtection? return false; BigFile f; f.set ( g_hostdb.m_dir , "quicktree.dat" ); // only load if it exists bool exists = f.doesExist(); // load it if ( exists && ! m_qt.fastLoad( &f , &m_stack ) ) return log("sync: quicktree.dat load failed: %s", mstrerror(g_errno)); // done f.close(); // assume permanently out of sync long val = 2; // load the insync.dat file f.set ( g_hostdb.m_dir , "insync.dat" ); // fail on open failure if ( ! f.open ( O_RDONLY ) ) return false; // if not there, permanently out of sync if ( ! f.doesExist() ) log("sync: insync.dat does not exist. Assuming host is " "unrecoverable."); else { // get the value char buf[20]; long n = f.read ( &buf , 10 , 0 ) ; if ( n <= 0 ) return log("sync: read insync.dat: %s", mstrerror(g_errno)); // must be digit if ( ! is_digit ( buf[0] ) ) return log("sync: insync.dat has no number in it."); // unlink it if ( ! f.unlink() ) return log("sync: failed to unlink insync.dat: %s", mstrerror(g_errno)); // get the value val = atol ( buf ); } // bad val? if ( val < 0 || val > 2 ) return log("sync: insync.dat had bad value of %li",val); // report if in sync or not if ( val == 0 ) log("sync: insync.dat says out of sync"); if ( val == 1 ) log("sync: insync.dat says in sync"); if ( val == 2 ) log("sync: insync.dat says PERMANENTLY out of sync"); // set it Host *h = g_hostdb.m_myHost; if ( val == 1 ) h->m_inSync = 1; if ( val == 2 ) h->m_isPermanentOutOfSync = 1; // call this once per second if ( ! g_loop.registerSleepCallback ( 1000 , NULL , sleepWrapper ) ) return false; // 10 MB long maxTreeMem = 10000000; // . what's max # of tree nodes? // . key+4+left+right+parents+dataPtr = 12+4 +4+4+4+4 = 32 // . 28 bytes per record when in the tree long maxTreeNodes = maxTreeMem / ( 16 + 1000 ); // . initialize our own internal rdb // . records are actual msg4 requests received from Msg4 // . the key is formed calling Syncdb::makeKey() which is based on // the tid, sid and zid of the msg4 request, where tid is the // twin hostid we are chatting with in our group, sid is the // ORIGINAL sending hostid of the msg4 request, and zid is the // kinda transaction #, and is unique. if ( ! m_rdb.init ( g_hostdb.m_dir , "syncdb" , true , // dedup -1 , // dataSize is variable 50 , // min files to merge maxTreeMem , maxTreeNodes , // maxTreeNodes , true , // balance tree? 50000 , // maxCacheMem , 100 , // maxCacheNodes , false , // half keys? false , // save cache? NULL , // page cache false , // is titledb false , // preload disk page cache 16 , // key size false , // bias disk page cache? true ))// is collectionless? return false; // add the coll //if ( ! g_syncdb.m_rdb.addColl ( "dummy" ) ) return true; // reset quick tree? if ( ! h->m_isPermanentOutOfSync ) return true; // clear it all! m_qt.clear(); // add the base since it is a collectionless rdb return m_rdb.addRdbBase1 ( NULL ); }
// . sets m_fileOffset and m_bf // . returns false and sets g_errno on error // . returns false if nothing to read too... but does not set g_errno bool ImportState::setCurrentTitleFileAndOffset ( ) { // leave m_bf and m_fileOffset alone if there is more to read if ( m_fileOffset < m_bfFileSize ) return true; CollectionRec *cr = g_collectiondb.getRec ( m_collnum ); if ( ! cr ) return false; log("import: import finding next file"); // if ( m_offIsValid ) { // //*off = m_fileOffset; // return &m_bf; // } //m_offIsValid = true; // look for titledb0001.dat etc. files in the // workingDir/inject/ subdir SafeBuf ddd; ddd.safePrintf("%sinject",cr->m_importDir.getBufStart()); // now use the one provided. we should also provide the # of threads if ( cr->m_importDir.getBufStart() && cr->m_importDir.getBufStart()[0] ) { ddd.reset(); ddd.safeStrcpy ( cr->m_importDir.getBufStart() ); } // // assume we are the first filename // set s_fileId to the minimum // Dir dir; dir.set(ddd.getBufStart()); if ( ! dir.open() ) return false; // assume none long minFileId = -1; // getNextFilename() writes into this char pattern[64]; strcpy ( pattern , "titledb*" ); char *filename; while ( ( filename = dir.getNextFilename ( pattern ) ) ) { // filename must be a certain length long filenameLen = gbstrlen(filename); // we need at least "titledb0001.dat" if ( filenameLen < 15 ) continue; // ensure filename starts w/ our m_dbname if ( strncmp ( filename , "titledb", 7 ) != 0 ) continue; // skip if not .dat file if ( ! strstr ( filename , ".dat" ) ) continue; // then a 4 digit number should follow char *s = filename + 7; if ( ! isdigit(*(s+0)) ) continue; if ( ! isdigit(*(s+1)) ) continue; if ( ! isdigit(*(s+2)) ) continue; if ( ! isdigit(*(s+3)) ) continue; // convert digit to id long id = atol(s); // . do not accept files we've already processed // . -1 means we haven't processed any yet if ( m_bfFileId >= 0 && id <= m_bfFileId ) continue; // the min of those we haven't yet processed/injected if ( id < minFileId || minFileId < 0 ) minFileId = id; } // get where we left off if ( ! m_loadedPlaceHolder ) { // read where we left off from file if possible char fname[256]; sprintf(fname,"%slasttitledbinjectinfo.dat",g_hostdb.m_dir); SafeBuf ff; ff.fillFromFile(fname); if ( ff.length() > 1 ) { m_loadedPlaceHolder = true; // get the placeholder sscanf ( ff.getBufStart() , "%llu,%lu" , &m_fileOffset , &minFileId ); } } // if no files! return false to indicate we are done if ( minFileId == -1 ) return false; // set up s_bf then //if ( m_bfFileId != minFileId ) { SafeBuf tmp; tmp.safePrintf("titledb%04li-000.dat" //,dir.getDirname() ,minFileId); m_bf.set ( dir.getDirname() ,tmp.getBufStart() ); if ( ! m_bf.open( O_RDONLY ) ) { log("inject: import: could not open %s%s for reading", dir.getDirname(),tmp.getBufStart()); return false; } m_bfFileId = minFileId; // reset ptr into file //*off = 0; // and set this m_bfFileSize = m_bf.getFileSize(); m_fileOffset = 0; //} log("import: importing from file %s",m_bf.getFilename()); return true;//&m_bf; }
// // . ENTRY POINT FOR IMPORTING TITLEDB RECS FROM ANOTHER CLUSTER // . when user clicks 'begin' in import page we come here.. // . so when that parm changes in Parms.cpp we sense that and call // beginImport(CollectionRec *cr) // . or on startup we call resumeImports to check each coll for // an import in progress. // . search for files named titledb*.dat // . if none found just return // . when msg7 inject competes it calls this // . call this from sleep wrapper in Process.cpp // . returns false if would block (outstanding injects), true otherwise // . sets g_errno on error bool ImportState::importLoop ( ) { CollectionRec *cr = g_collectiondb.getRec ( m_collnum ); if ( ! cr || g_hostdb.m_hostId != 0 ) { // if coll was deleted! log("import: collnum %li deleted while importing into", (long)m_collnum); //if ( m_numOut > m_numIn ) return true; // delete the entire import state i guess // what happens if we have a msg7 reply come back in? // it should see the collrec is NULL and just fail. mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } INJECTLOOP: // stop if waiting on outstanding injects long long out = m_numOut - m_numIn; if ( out >= cr->m_numImportInjects ) { g_errno = 0; return false; } if ( ! cr->m_importEnabled ) { // wait for all to return if ( out > 0 ) return false; // then delete it log("import: collnum %li import loop disabled", (long)m_collnum); mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } // scan each titledb file scanning titledb0001.dat first, // titledb0003.dat second etc. //long long offset = -1; // . when offset is too big for current m_bigFile file then // we go to the next and set offset to 0. // . sets m_bf and m_fileOffset if ( ! setCurrentTitleFileAndOffset ( ) ) {//cr , -1 ); log("import: import: no files to read"); //goto INJECTLOOP; return true; } // this is -1 if none remain! if ( m_fileOffset == -1 ) { log("import: import fileoffset is -1. done."); return true; } long long saved = m_fileOffset; //Msg7 *msg7; //GigablastRequest *gr; //SafeBuf *sbuf = NULL; long need = 12; long dataSize = -1; //XmlDoc xd; key_t tkey; bool status; SafeBuf tmp; SafeBuf *sbuf = &tmp; long long docId; long shardNum; long key; Multicast *mcast; char *req; long reqSize; if ( m_fileOffset >= m_bfFileSize ) { log("inject: import: done processing file %li %s", m_bfFileId,m_bf.getFilename()); goto nextFile; } // read in title rec key and data size status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset ); //if ( n != 12 ) goto nextFile; if ( g_errno ) { log("inject: import: reading file error: %s. advancing " "to next file",mstrerror(g_errno)); goto nextFile; } m_fileOffset += 12; // if negative key, skip if ( (tkey.n0 & 0x01) == 0 ) { goto INJECTLOOP; } // if non-negative then read in size status = m_bf.read ( &dataSize , 4 , m_fileOffset ); if ( g_errno ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); goto nextFile; } m_fileOffset += 4; need += 4; need += dataSize; need += 4; // collnum, first 4 bytes if ( dataSize < 0 || dataSize > 500000000 ) { log("main: could not scan in titledb rec of " "corrupt dataSize of %li. BAILING ENTIRE " "SCAN of file %s",dataSize,m_bf.getFilename()); goto nextFile; } //gr = &msg7->m_gr; //XmlDoc *xd = getAvailXmlDoc(); //msg7 = getAvailMsg7(); mcast = getAvailMulticast(); // if none, must have to wait for some to come back to us if ( ! mcast ) { // restore file offset //m_fileOffset = saved; // no, must have been a oom or something log("import: import no mcast available"); return true;//false; } // this is for holding a compressed titlerec //sbuf = &mcast->m_sbuf;//&gr->m_sbuf; // point to start of buf sbuf->reset(); // ensure we have enough room sbuf->reserve ( need ); // collnum first 4 bytes sbuf->pushLong( (long)m_collnum ); // store title key sbuf->safeMemcpy ( &tkey , sizeof(key_t) ); // then datasize if any. neg rec will have -1 datasize if ( dataSize >= 0 ) sbuf->pushLong ( dataSize ); // then read data rec itself into it, compressed titlerec part if ( dataSize > 0 ) { // read in the titlerec after the key/datasize status = m_bf.read ( sbuf->getBuf() , dataSize , m_fileOffset ); if ( g_errno ) { // n != dataSize ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); // essentially free up this msg7 now //msg7->m_inUse = false; //msg7->reset(); goto nextFile; } // advance m_fileOffset += dataSize; // it's good, count it sbuf->m_length += dataSize; } // set xmldoc from the title rec //xd->set ( sbuf.getBufStart() ); //xd->m_masterState = NULL; //xd->m_masterCallback ( titledbInjectLoop ); // we use this so we know where the doc we are injecting // was in the foregien titledb file. so we can update our bookmark // code. mcast->m_hackFileOff = saved;//m_fileOffset; mcast->m_hackFileId = m_bfFileId; // // inject a title rec buf this time, we are doing an import // FROM A TITLEDB FILE!!! // //gr->m_titleRecBuf = &sbuf; // break it down into gw // xd.set2 ( sbuf.getBufStart() , // sbuf.length() , // max size // cr->m_coll, // use our coll // NULL , // pbuf for page parser // 1 , // niceness // NULL ); //sreq ); // // note it // log("import: importing %s",xd.m_firstUrl.getUrl()); // now we can set gr for the injection // TODO: inject the whole "sbuf" so we get sitenuminlinks etc // all exactly the same... // gr->m_url = xd.getFirstUrl()->getUrl(); // gr->m_queryToScrape = NULL; // gr->m_contentDelim = 0; // gr->m_contentTypeStr = g_contentTypeStrings [xd.m_contentType]; // gr->m_contentFile = NULL; // gr->m_content = xd.ptr_utf8Content; // gr->m_diffbotReply = NULL; // gr->m_injectLinks = false; // gr->m_spiderLinks = true; // gr->m_shortReply = false; // gr->m_newOnly = false; // gr->m_deleteUrl = false; // gr->m_recycle = true; // recycle content? or sitelinks? // gr->m_dedup = false; // gr->m_hasMime = false; // gr->m_doConsistencyTesting = false; // gr->m_getSections = false; // gr->m_gotSections = false; // gr->m_charset = xd.m_charset; // gr->m_hopCount = xd.m_hopCount; // // point to next doc in the titledb file // //m_fileOffset += need; // get docid from key docId = g_titledb.getDocIdFromKey ( &tkey ); // get shard that holds the titlerec for it shardNum = g_hostdb.getShardNumFromDocId ( docId ); // for selecting which host in the shard receives it key = (long)docId; m_numOut++; // then index it. master callback will be called //if ( ! xd->index() ) return false; // TODO: make this forward the request to an appropriate host!! // . gr->m_sbuf is set to the titlerec so this should handle that // and use XmlDoc::set4() or whatever // if ( msg7->injectTitleRec ( msg7 , // state // gotMsg7ReplyWrapper , // callback // cr )) { // // it didn't block somehow... // msg7->m_inUse = false; // msg7->gotMsg7Reply(); // } req = sbuf->getBufStart(); reqSize = sbuf->length(); if ( reqSize != need ) { char *xx=NULL;*xx=0 ; } // do not free it, let multicast free it after sending it sbuf->detachBuf(); if ( ! mcast->send ( req , reqSize , 0x07 , true , // ownmsg? shardNum, false, // send to whole shard? key , // for selecting host in shard mcast , // state NULL , // state2 gotMulticastReplyWrapper , 999999 ) ) { // total timeout in seconds log("import: import mcast had error: %s",mstrerror(g_errno)); m_numIn++; } goto INJECTLOOP; nextFile: // invalidate this flag //m_offIsValid = false; // . and call this function. we add one to m_bfFileId so we // do not re-get the file we just injected. // . sets m_bf and m_fileOffset // . returns false if nothing to read if ( ! setCurrentTitleFileAndOffset ( ) ) { //cr , m_bfFileId+1 ); log("import: import: no files left to read"); //goto INJECTLOOP; return true; } // if it returns NULL we are done! log("main: titledb injection loop completed. waiting for " "outstanding injects to return."); if ( m_numOut > m_numIn ) return false; log("main: all injects have returned. DONE."); // dummy return return true; }
int main(int argc, char* argv[]) { int i; int nums; FDATA fdata; Hash AddHash; BigFile DestBigFile; BigFile SourceBigFile; BigFileEntry *bfe; int numok,numnew,numupdated; int sourcetype; bool update; char *p1=0; char *p2=0; char *p3=0; int tosspath=0; DataHandle addhandle; int vargc; char **vargv; vargc=argc; vargv=argv; /* handle encrypted file */ kGUIProt DestProt; bool usedestprot; kGUIProt SourceProt; bool usesourceprot; kGUISystemBig sysbig; kGUI::SetSystem(&sysbig); signal(SIGINT, sigint_handler); optcompress=false; optrecursive=true; optverify=false; optdelete=false; optmissing=false; usedestprot=false; usesourceprot=false; #if 0 p1="/source/kgui/_data.big"; p2="/source/kgui/big"; p3="/source/kgui/big/"; optverify=true; #endif for(i=1; i<vargc; ++i) { if(vargv[i][0]=='-') { switch(vargv[i][1]) { case 'c': optcompress=true; break; case 'd': optdelete=true; break; case 'm': optmissing=true; /* delete missing files from subdir */ break; case 'v': optverify=true; break; case 'r': optrecursive=false; break; case 'k': if(vargv[i][2]=='d') /* encryption key on destination file */ { printf("using dest key\n"); if(DestProt.SetKey(vargv[i+1],atoi(vargv[i+2]),atoi(vargv[i+3]),true)==false) { printf("Error loading dest keyfile '%s'\n",vargv[i+1]); return(0); } usedestprot=true; i+=3; } else if(vargv[i][2]=='s') /* encryption key on source file */ { printf("using source key\n"); if(SourceProt.SetKey(vargv[i+1],atoi(vargv[i+2]),atoi(vargv[i+3]),true)==false) { printf("Error loading source keyfile '%s'\n",vargv[i+1]); return(0); } usesourceprot=true; i+=3; } optrecursive=false; break; default: printf("Unknown parm '%s'\n",vargv[i]); return(0); break; } } else { if(!p1) p1=vargv[i]; else if(!p2) p2=vargv[i]; else if(!p3) { p3=vargv[i]; tosspath=(int)strlen(p3); } else { printf("Unknown parm '%s'\n",vargv[i]); return(0); } } } /* need at least 1 parm */ if(!p1) { printf("kguibig: (c) 2005 Kevin Pickelll\n"); printf("usage: kguibig bigfile.big path [root]\n"); printf(" -c = compress\n"); printf(" -v = verify\n"); printf(" -r = don't recurse\n"); printf(" -k[d,s] = filename offset len // source/dest key\n"); return(0); } DestBigFile.SetFilename(p1); if(usedestprot==true) DestBigFile.SetEncryption(&DestProt); DestBigFile.Load(true); if(DestBigFile.IsBad()==true) { printf("Dest file exists and is not a bigfile, or decryption key is incorrect!\n"); return(0); } /* list, verify or compress ? */ if(!p2) { if(optcompress) { } else { unsigned long crc; unsigned long vfsize; BigFileEntry *sfe; unsigned char copybuf[65536]; DataHandle checkhandle; /* verify or list */ nums=DestBigFile.GetNumEntries(); for(i=0; ((i<nums) && (g_userabort==false)); ++i) { sfe=(BigFileEntry *)DestBigFile.GetEntry(i); if(optverify) { /* check crc and print if no match */ printf("%d%c",i,13); vfsize=sfe->m_size; crc=0; DestBigFile.CopyArea(&checkhandle,sfe->m_offset,sfe->m_size,sfe->m_time); checkhandle.Open(); while(vfsize>sizeof(copybuf)) { checkhandle.Read(©buf,(unsigned long)sizeof(copybuf)); crc=DestBigFile.CrcBuffer(crc,copybuf,sizeof(copybuf)); vfsize-=sizeof(copybuf); }; /* write remainder */ if(vfsize>0) { checkhandle.Read(©buf,vfsize); crc=DestBigFile.CrcBuffer(crc,copybuf,vfsize); } checkhandle.Close(); if(crc!=sfe->m_crc) printf("CRC Error on file '%s' %06x<>%06x\n",sfe->GetName()->GetString(),(int)crc,sfe->m_crc); } else /* assume list if verify is not set */ printf("%s, len=%d,crc=%06x\n",sfe->GetName()->GetString(),sfe->m_size,sfe->m_crc); } } return(0); } AddHash.Init(16,sizeof(FDATA)); /* is p2 a bigfile? */ if(strstr(p2,".big")) { SourceBigFile.SetFilename(p2); if(usesourceprot==true) SourceBigFile.SetEncryption(&SourceProt); SourceBigFile.Load(true); if(SourceBigFile.IsBad()==false) sourcetype=SOURCE_BIG; else { printf("Source file exists and is not a bigfile, or decryption key is incorrect!\n"); return(0); } } else if(kGUI::IsDir(p2)==false) { fdata.time=kGUI::SysFileTime(p2); //fdata.root=p3; AddHash.Add(p2,&fdata); sourcetype=SOURCE_FILE; } else { unsigned int df; const char *name; kGUIDir dir; printf("loading directory!\n"); dir.LoadDir(p2,true,true); for(df=0; df<dir.GetNumFiles(); ++df) { name=dir.GetFilename(df); fdata.time=kGUI::SysFileTime(name); AddHash.Add(name,&fdata); } // scandir(&AddHash,p2); sourcetype=SOURCE_DIR; } /* now, look for differences between bigfile and files in the addhash list */ numok=0; numnew=0; numupdated=0; /* todo: optdelete function */ /* add from source bigfile to dest bigfile will not work */ /* if source is encrypted so I need to rewrite addfile to use a datahandle */ /* instead of a filestream */ if(sourcetype==SOURCE_BIG) { BigFileEntry *sfe; nums=SourceBigFile.GetNumEntries(); for(i=0; ((i<nums) && (g_userabort==false)); ++i) { sfe=(BigFileEntry *)SourceBigFile.GetEntry(i); bfe=DestBigFile.Locate(sfe->GetName()->GetString()); if(!bfe) { // printf("File '%s' not in destination set!\n",she->m_string); update=true; ++numnew; } else { if(sfe->m_time==bfe->m_time) { update=false; ++numok; } else { int deltatime; deltatime=abs(sfe->m_time-bfe->m_time); if(deltatime==46400 || deltatime==3600) { update=false; ++numok; } else { printf("File '%s' %d,%d times are different!(%d)\n",sfe->GetName()->GetString(),sfe->m_time,bfe->m_time,deltatime); ++numupdated; update=true; } } } if(update==true) { SourceBigFile.CopyArea(&addhandle,sfe->m_offset,sfe->m_size,sfe->m_time); // addsize=sfe->m_size; // addtime=sfe->m_time; // fseek(addhandle,sfe->m_offset,SEEK_SET); /* add the file to the bigfile */ DestBigFile.AddFile(sfe->GetName()->GetString(),&addhandle,false); } } DestBigFile.UpdateDir(); } else { HashEntry *she; FDATA *sfdata; nums=AddHash.GetNum(); she=AddHash.GetFirst(); for(i=0; ((i<nums) && (g_userabort==false)); ++i) { sfdata=(FDATA *)she->m_data; bfe=DestBigFile.Locate(she->m_string+tosspath); if(!bfe) { // printf("File '%s' not in destination set!\n",she->m_string+tosspath); ++numnew; update=true; } else { if(sfdata->time==bfe->m_time) { update=false; ++numok; } else { int deltatime; deltatime=(abs((int)sfdata->time-bfe->m_time)); if(deltatime==46400 || deltatime==3600) { update=false; ++numok; } else { printf("File '%s' %d,%d times are different!(%d)\n",she->m_string+tosspath,(int)sfdata->time,bfe->m_time,deltatime); ++numupdated; update=true; } } } if(update==true) { addhandle.SetFilename(she->m_string); /* add the file to the bigfile */ DestBigFile.AddFile(she->m_string+tosspath,&addhandle,false); } she=she->m_next; } DestBigFile.UpdateDir(); } printf("numok=%d,numnew=%d,numupdated=%d\n",numok,numnew,numupdated); return 0; }