bool canSubmit ( unsigned long h , long now , long maxAddUrlsPerIpDomPerDay ) { // . sometimes no limit // . 0 means no limit because if they don't want any submission they // can just turn off add url and we want to avoid excess // troubleshooting for why a url can't be added if ( maxAddUrlsPerIpDomPerDay <= 0 ) return true; // init the table if ( ! s_init ) { s_htable.set ( 50000 ); s_init = true; } // clean out table every 24 hours if ( now - s_lastTime > 24*60*60 ) { s_lastTime = now; s_htable.clear(); } // . if table almost full clean out ALL slots // . TODO: just clean out oldest slots if ( s_htable.getNumSlotsUsed() > 47000 ) s_htable.clear (); // . how many times has this IP domain submitted? // . allow 10 times per day long n = s_htable.getValue ( h ); // if over 24hr limit then bail if ( n >= maxAddUrlsPerIpDomPerDay ) return false; // otherwise, inc it n++; // add to table, will replace old values s_htable.addKey ( h , n ); return true; }
uint8_t CountryCode::getLanguageFromDMOZ(long catid) { if(!m_init) return(0); catcountryrec_t ccr; ccr.lval = 0L; if(s_catToCountry.getNumSlotsUsed() < 1) return(0); long slot = s_catToCountry.getSlot((long)catid); if(slot < 0) return(0); ccr.lval = s_catToCountry.getValueFromSlot(slot); return(ccr.sval.lang); }
void CountryCode::debugDumpNumbers(void) { long slot; catcountryrec_t ccr; for(slot = 0; slot < s_catToCountry.getNumSlotsUsed(); slot++) { ccr.lval = 0L; ccr.lval = s_catToCountry.getValueFromSlot(slot); if(ccr.lval) log( "Slot %ld has lang %d, country %d (%ld)\n", slot, ccr.sval.lang, ccr.sval.country, ccr.lval); } }
// Do not call this function lightly, it takes an hour to run int CountryCode::createHashTable(void) { if(!fillRegexTable()) return(0); char tmpbuf[2048]; HashTable ht; unsigned long long entries = 0UL; long catid; long numcats = g_categories->m_numCats; catcountryrec_t ccr; SafeBuf sb(tmpbuf, 2048); log( "cat: Creating category country/language table.\n"); if(!ht.set(2,NULL,0,"ctrycode")) { log( "cat: Could not allocate memory for table.\n"); return(0); } for(long idx = 0; idx < numcats; idx++) { catid = g_categories->m_cats[idx].m_catid; sb.reset(); g_categories->printPathFromId(&sb, catid, true); if(!sb.getBufStart()) continue; if(!(numcats % 1000)) log( "init: %ld/%ld Generated %llu so far...\n", numcats, idx, entries); ccr.lval = 0L; ccr.sval.country = lookupCountryFromDMOZTopic(sb.getBufStart(), sb.length()); ccr.sval.lang = s_getLangIdxFromDMOZ(sb.getBufStart(), sb.length()); if(!ccr.lval) continue; if(ccr.sval.lang > 27 || ccr.sval.country > s_numCountryCodes) { char *xx = NULL; *xx = 0; } if(!ht.addKey(catid, ccr.lval)) { log( "init: Could not add %ld (%ld)\n", catid, ccr.lval); continue; } entries++; } ht.save(g_hostdb.m_dir, "catcountry.dat"); log( "Added %llu country entries from DMOZ to %s/catcountry.dat.\n", entries,g_hostdb.m_dir); log( "Slots %ld, Used Slots %ld.\n", ht.getNumSlots(), ht.getNumSlotsUsed()); freeRegexTable(); return(1); }
long CountryCode::getNumEntries(void) { if(!m_init) return(0); return(s_catToCountry.getNumSlotsUsed()); }