// init our rdb bool Titledb::init ( ) { // key sanity tests int64_t uh48 = 0x1234567887654321LL & 0x0000ffffffffffffLL; int64_t docId = 123456789; key96_t k = makeKey(docId,uh48,false); if ( getDocId(&k) != docId ) { g_process.shutdownAbort(true);} if ( getUrlHash48(&k) != uh48 ) { g_process.shutdownAbort(true);} const char *url = "http://.ezinemark.com/int32_t-island-child-custody-attorneys-new-york-visitation-lawyers-melville-legal-custody-law-firm-45f00bbed18.html"; Url uu; uu.set(url); const char *d1 = uu.getDomain(); int32_t dlen1 = uu.getDomainLen(); int32_t dlen2 = 0; const char *d2 = getDomFast ( url , &dlen2 ); if ( !d1 || !d2 ) { g_process.shutdownAbort(true); } if ( dlen1 != dlen2 ) { g_process.shutdownAbort(true); } // another one url = "http://ok/"; uu.set(url); const char *d1a = uu.getDomain(); dlen1 = uu.getDomainLen(); dlen2 = 0; const char *d2a = getDomFast ( url , &dlen2 ); if ( d1a || d2a ) { g_process.shutdownAbort(true); } if ( dlen1 != dlen2 ) { g_process.shutdownAbort(true); } // . what's max # of tree nodes? // . assume avg TitleRec size (compressed html doc) is about 1k we get: // . NOTE: overhead is about 32 bytes per node int32_t maxTreeNodes = g_conf.m_titledbMaxTreeMem / (1*1024); // initialize our own internal rdb return m_rdb.init ( "titledb" , -1 , // fixed record size //g_conf.m_titledbMinFilesToMerge , // this should not really be changed... -1, g_conf.m_titledbMaxTreeMem , maxTreeNodes , false, // half keys? 12, // key size false, //isCollectionLess false); //useIndexFile // validate //return verify ( ); }
// init our rdb bool Titledb::init ( ) { // key sanity tests int64_t uh48 = 0x1234567887654321LL & 0x0000ffffffffffffLL; int64_t docId = 123456789; key_t k = makeKey(docId,uh48,false); if ( getDocId(&k) != docId ) { char *xx=NULL;*xx=0;} if ( getUrlHash48(&k) != uh48 ) { char *xx=NULL;*xx=0;} char *url = "http://.ezinemark.com/int32_t-island-child-custody-attorneys-new-york-visitation-lawyers-melville-legal-custody-law-firm-45f00bbed18.html"; Url uu; uu.set(url); char *d1 = uu.getDomain(); int32_t dlen1 = uu.getDomainLen(); int32_t dlen2 = 0; char *d2 = getDomFast ( url , &dlen2 ); if ( dlen1 != dlen2 ) { char *xx=NULL;*xx=0; } // another one url = "http://ok/"; uu.set(url); d1 = uu.getDomain(); dlen1 = uu.getDomainLen(); dlen2 = 0; d2 = getDomFast ( url , &dlen2 ); if ( dlen1 != dlen2 ) { char *xx=NULL;*xx=0; } int64_t maxMem = 200000000; // 200MB // . what's max # of tree nodes? // . assume avg TitleRec size (compressed html doc) is about 1k we get: // . NOTE: overhead is about 32 bytes per node int32_t maxTreeNodes = maxMem / (1*1024); // . we now use a disk page cache for titledb as opposed to the // old rec cache. i am trying to do away with the Rdb::m_cache rec // cache in favor of cleverly used disk page caches, because // the rec caches are not real-time and get stale. // . just hard-code 30MB for now int32_t pcmem = 30000000; // = g_conf.m_titledbMaxDiskPageCacheMem; // f**k that we need all the mem! //pcmem = 0; // do not use any page cache if doing tmp cluster in order to // prevent swapping if ( g_hostdb.m_useTmpCluster ) pcmem = 0; int32_t pageSize = GB_INDEXDB_PAGE_SIZE; // init the page cache // . MDW: "minimize disk seeks" not working otherwise i'd enable it! if ( ! m_pc.init ( "titledb", RDB_TITLEDB, pcmem , pageSize ) ) return log("db: Titledb init failed."); // each entry in the cache is usually just a single record, no lists //int32_t maxCacheNodes = g_conf.m_titledbMaxCacheMem / (10*1024); // initialize our own internal rdb if ( ! m_rdb.init ( g_hostdb.m_dir , "titledb" , true , // dedup same keys? -1 , // fixed record size //g_hostdb.m_groupMask , //g_hostdb.m_groupId , //g_conf.m_titledbMinFilesToMerge , // this should not really be changed... -1,//3,//230 minfilestomerge mintomerge maxMem, // g_conf.m_titledbMaxTreeMem , maxTreeNodes , // now we balance so Sync.cpp can ordered huge list true , // balance tree? // turn off cache for now because the page cache // is just as fast and does not get out of date // so bad?? //0 , 0,//g_conf.m_titledbMaxCacheMem , 0,//maxCacheNodes , false ,// half keys? false ,// g_conf.m_titledbSav &m_pc , // page cache ptr true ) )// is titledb? return false; return true; // validate //return verify ( ); }