C++ (Cpp) HashTableX::getScore примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: HashTableX

Метод/Функция: getScore

Примеров на hotexamples.com: 8

C++ (Cpp) HashTableX::getScore - 8 примеров найдено. Это лучшие примеры C++ (Cpp) кода для HashTableX::getScore, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

set(26)

addKey(17)

isInTable(10)

getValueFromSlot(10)

addTerm(8)

getScore(8)

getValue(6)

getNumSlots(6)

reset(5)

getSlot(5)

save(3)

removeSlot(3)

load(2)

getKeyFromSlot(2)

getNextSlot(2)

getNumSlotsUsed(2)

removeKey(1)

addTerm32(1)

m_callback(1)

isEmpty(1)

clear(1)

getScore32(1)

getKey32FromSlot(1)

getValFromSlot(1)

Пример #1

Показать файл

Файл: iana_charset.cpp Проект: privacore/open-source-search-engine

// Slightly modified from getTextEntity
int16_t get_iana_charset(const char *cs, int len)
{
    if (!s_isInitialized){
	// set up the hash table
	if ( ! s_table.set ( 8,4,4096,NULL,0,false,"ianatbl") ) {
		log(LOG_WARN, "build: Could not init table of IANA Charsets.");
		return csUnknown;
	}
	// now add in all the charset entries
	int32_t n = (int32_t)sizeof(s_charsets) / (int32_t)sizeof(IANACharset);
	// turn off quickpolling
	char saved = g_conf.m_useQuickpoll;
	g_conf.m_useQuickpoll = false;
	for ( int32_t i = 0 ; i < n ; i++ ) {
	    int64_t h = hash64Lower_a ( s_charsets[i].name, strlen(s_charsets[i].name) );
	    // store the charset index in the hash table as score
		if ( ! s_table.addTerm(h, i+1) ) {
			log(LOG_WARN, "build: add term failed");
			return csUnknown;
		}
	}
	g_conf.m_useQuickpoll = saved;
	s_isInitialized = true;
    }
    int64_t h = hash64Lower_a ( cs , len );
    // get the entity index from table (stored in the score field)
    int32_t i = (int32_t) s_table.getScore(h);
    // return 0 if no match
    if ( i == 0 ) return csUnknown;
    // return the iso character
    return (int16_t)s_charsets[i-1].mib_enum;
}

Пример #2

Показать файл

Файл: iana_charset.cpp Проект: BILObilo/open-source-search-engine

// Slightly modified from getTextEntity
short get_iana_charset(char *cs, int len)
{
    if (!s_isInitialized){
	// set up the hash table
	if ( ! s_table.set ( 8,4,4096,NULL,0,false,0,"ianatbl") )
	    return log("build: Could not init table of "
		       "IANA Charsets.");
	// now add in all the charset entries
	long n = (long)sizeof(s_charsets) / (long)sizeof(IANACharset);
	// turn off quickpolling
	char saved = g_conf.m_useQuickpoll;
	g_conf.m_useQuickpoll = false;
	for ( long i = 0 ; i < n ; i++ ) {
	    long long h = hash64Lower_a ( s_charsets[i].name, strlen(s_charsets[i].name) );
	    // store the charset index in the hash table as score
		if ( ! s_table.addTerm(&h, i+1) ) 
		return log("build: add term failed");
	}
	g_conf.m_useQuickpoll = saved;
	s_isInitialized = true;
    }
    long long h = hash64Lower_a ( cs , len );
    // get the entity index from table (stored in the score field)
    long i = (long) s_table.getScore ( &h );
    // return 0 if no match
    if ( i == 0 ) return csUnknown;
    // return the iso character
    return (short)s_charsets[i-1].mib_enum;
}

Пример #3

Показать файл

Файл: AdultBit.cpp Проект: BlaBlaNet/open-source-search-engine

bool AdultBit::isDirty ( char *s , int32_t len ) {

	static bool       s_isInitialized = false;
	static char      *s_dirty[] = {
		"anal",
		"analsex",
		"b*****b",
		"blowjobs",
		"boob",
		"boobs",
		"clitoris",
		"c**k",
		"cocks",
		"cum",
		"dick",
		"dicks",
		"g******g",
		"gangbangs",
		"gangbanging",
		"movie",
		"movies",
		"oral",
		"oralsex",
		"p**n",
		"porno",
		"pussy",
		"pussies",
		"sex",
		"sexy",
		"tit",
		"t**s",
		"video",
		"videos",
		"xxx",
		"xxxx",
		"xxxx"
	};

	if ( ! s_isInitialized ) {
		// set up the hash table
		if ( ! s_dtable.set ( 8,4,sizeof(s_dirty  )*2,NULL,0,false,0,
				      "adulttab")) 
			return log("build: Error initializing "
				    "dirty word hash table." );
		// now add in all the dirty words
		int32_t n = (int32_t)sizeof(s_dirty)/ sizeof(char *); 
		for ( int32_t i = 0 ; i < n ; i++ ) {
			int64_t h = hash64b ( s_dirty  [i] );
			if ( ! s_dtable.addTerm (&h, i+1) ) return false;
		}
		s_isInitialized = true;
	} 

	// compute the hash of the word "s"
	int64_t h = hash64Lower_a ( s , len );

	// get from table
	return s_dtable.getScore ( &h );
}

Пример #4

Показать файл

Файл: Entities.cpp Проект: privacore/open-source-search-engine

// . is "s" an HTML entity? (ascii representative of an iso char)
// . return the 32-bit unicode char it represents
// . returns 0 if none
// . JAB: const-ness for optimizer...
static const Entity *getTextEntity ( const char *s , int32_t len ) {
	if ( !initEntityTable()) return 0;
	// take the ; off, if any
	if ( s[len-1] == ';' ) len--;
	// compute the hash of the entity including &, but not ;
	int64_t h = hash64 ( s , len );
	// get the entity index from table (stored in the score field)
	int32_t i = (int32_t) s_table.getScore(h);
	// return 0 if no match
	if ( i == 0 )
		return NULL;
	// point to the utf8 char. these is 1 or 2 bytes it seems
	return s_entities+i-1;
}

Пример #5

Показать файл

Файл: Entities.cpp Проект: lemire/open-source-search-engine

// . is "s" an HTML entity? (ascii representative of an iso char)
// . return the 32-bit unicode char it represents
// . returns 0 if none
// . JAB: const-ness for optimizer...
uint32_t getTextEntity ( const char *s , int32_t len ) {
	if ( !initEntityTable()) return 0;
	// take the ; off, if any
	if ( s[len-1] == ';' ) len--;
	// compute the hash of the entity including &, but not ;
	int64_t h = hash64 ( s , len );
	// get the entity index from table (stored in the score field)
	int32_t i = (int32_t) s_table.getScore ( &h );
	// return 0 if no match
	if ( i == 0 ) return 0;
	// point to the utf8 char. these is 1 or 2 bytes it seems
	char *p = (char *)s_entities[i-1].utf8;
	// encode into unicode
	uint32_t c = utf8Decode ( p );
	// return that
	return c;
}

Пример #6

Показать файл

Файл: AdultBit.cpp Проект: BlaBlaNet/open-source-search-engine

bool AdultBit::isObscene ( char *s , int32_t len ) {

	static bool       s_isInitialized = false;
	static char      *s_obscene[] = {
		"c**t",
		"clits",
//		"cum",    magna cum laude
		"cums",
		"cumshot",
		"c**t",
		"cunts",
		"milf",
		"rimjob",
		"felch",
		"f**k",
		"f****d",
		"f****r",
		"f*****g",
		"f***s",
		"w***e",
		"w****s"
	};

	if ( ! s_isInitialized ) {
		// set up the hash table
		if ( ! s_otable.set ( 8,4,sizeof(s_obscene)*2,NULL,0,false,0,
				      "obscenetab") ) 
			return log("build: Error initializing "
				    "obscene word hash table." );
		// now add in all the stop words
		int32_t n = sizeof(s_obscene) / sizeof(char *);
		for ( int32_t i = 0 ; i < n ; i++ ) {
			int64_t h = hash64b ( s_obscene[i] );
			if ( ! s_otable.addTerm ( &h, i+1 ) ) return false;
		}
		s_isInitialized = true;
	} 

	// compute the hash of the word "s"
	int64_t h = hash64Lower_a ( s , len );

	// get from table
	return s_otable.getScore ( &h );
}

Пример #7

Показать файл

Файл: Entities.cpp Проект: BlaBlaNet/open-source-search-engine

// . is "s" an HTML entity? (ascii representative of an iso char)
// . return the 32-bit unicode char it represents
// . returns 0 if none
// . JAB: const-ness for optimizer...
uint32_t getTextEntity ( char *s , int32_t len ) {
	if ( !initEntityTable()) return 0;
	// take the ; off, if any
	if ( s[len-1] == ';' ) len--;
	// compute the hash of the entity including &, but not ;
	int64_t h = hash64 ( s , len );
	// get the entity index from table (stored in the score field)
	int32_t i = (int32_t) s_table.getScore ( &h );
	// return 0 if no match
	if ( i == 0 ) return 0;
	// point to the utf8 char. these is 1 or 2 bytes it seems
	char *p = (char *)s_entities[i-1].utf8;
	// encode into unicode
	uint32_t c = utf8Decode ( p );
	// return that
	return c;
	// return the iso character
	//printf("Converted text entity \"");
	//for(int si=0;si<len;si++)putchar(s[si]);
	//printf("\" to 0x%x(%d)\"%c\"\n",s_entities[i-1].c,s_entities[i-1].c, 
	//	   s_entities[i-1].c);
	//return (uint32_t)s_entities[i-1].c;
}

Пример #8

Показать файл

Файл: Msg51.cpp Проект: privacore/open-source-search-engine

// . cluster the docids based on the clusterRecs
// . returns false and sets g_errno on error
// . if maxDocIdsPerHostname is -1 do not do hostname clsutering
bool setClusterLevels ( const key96_t   *clusterRecs,
			const int64_t *docIds,
			int32_t       numRecs              ,
			int32_t       maxDocIdsPerHostname ,
			bool       doHostnameClustering ,
			bool       familyFilter         ,
			bool       isDebug              ,
			// output to clusterLevels[]
			char    *clusterLevels        ) {
	
	if ( numRecs <= 0 ) return true;

	// skip if not clustering on anything
	//if ( ! doHostnameClustering && ! familyFilter ) {
	//	memset ( clusterLevels, CR_OK, numRecs );
	//	return true;
	//}

	// how many negative site hashes do we have?
	// count how many docids we got, they are a cgi value, so represented
	// in ascii separated by +'s. i.e. "12345+435322+3439333333"
	//HashTableT <int64_t,char> sht;
	//if ( ! hashFromString ( &sht , noSiteIds ) ) return false;
	//bool checkNegative = ( sht.getNumSlotsUsed() > 0 );

	HashTableX ctab;
	// init to 2*numRecs for speed. use 0 for niceness!
	if ( ! ctab.set ( 8 , 4 , numRecs * 2,NULL,0,false,"clustertab" ) )
		return false;

	// time it
	u_int64_t startTime = gettimeofdayInMilliseconds();

	// init loop counter vars
	int32_t           count = 0;
	uint32_t  score = 0;
	char          *crec ;
	int64_t      h  ;
	char          *level ;
	bool           fakeIt ;

	for(int32_t i=0; i<numRecs; i++) {
		crec = (char *)&clusterRecs[i];
		// . set this cluster level
		// . right now will be CR_ERROR_CLUSTERDB or CR_OK...
		level = &clusterLevels[i];

		// sanity check
		if ( *level == CR_UNINIT ) gbshutdownLogicError();
		// and the adult bit, for cleaning the results
		if ( familyFilter && g_clusterdb.hasAdultContent ( crec ) ) {
			*level = CR_DIRTY;
			continue;
		}
		// if error looking up in clusterdb, use a 8 bit domainhash from docid
		fakeIt = (*level==CR_ERROR_CLUSTERDB);
		// assume ok, show it, it is visible
		*level = CR_OK;
		// site hash comes next
		if(!doHostnameClustering)
			continue;

		// . get the site hash
		// . these are only 32 bits!
		if(fakeIt)
			h = Titledb::getDomHash8FromDocId(docIds[i]);
		else
			h = g_clusterdb.getSiteHash26 ( crec );

		// inc this count!
		if ( fakeIt ) {
			g_stats.m_filterStats[CR_ERROR_CLUSTERDB]++;
		}

		// if it matches a siteid on our black list
		//if ( checkNegative && sht.getSlot((int64_t)h) > 0 ) {
		//	*level = CR_BLACKLISTED_SITE; goto loop; }
		// look it up
		score = ctab.getScore(h) ;
		// if still visible, just continue
		if ( score < (uint32_t)maxDocIdsPerHostname ) {
			if ( ! ctab.addTerm(h))
				return false;
			continue;
		}
		// otherwise, no lonegr visible
		*level = CR_CLUSTERED;
	}


	// debug
	for ( int32_t i = 0 ; i < numRecs && isDebug ; i++ ) {
		crec = (char *)&clusterRecs[i];
		uint32_t siteHash26=g_clusterdb.getSiteHash26(crec);
		logf(LOG_DEBUG,"query: msg51: hit #%" PRId32") sitehash26=%" PRIu32" "
		     "rec.n0=%" PRIx64" docid=%" PRId64" cl=%" PRId32" (%s)",
		     (int32_t)count++,
		     (int32_t)siteHash26,
		     clusterRecs[i].n0,
		     (int64_t)docIds[i],
		     (int32_t)clusterLevels[i],
		     g_crStrings[(int32_t)clusterLevels[i]] );
	}


	//log(LOG_DEBUG,"build: numVisible=%" PRId32" numClustered=%" PRId32" numErrors=%" PRId32,
	//    *numVisible,*numClustered,*numErrors);
	// show time
	uint64_t took = gettimeofdayInMilliseconds() - startTime;
	if ( took > 3 )
		log(LOG_INFO,"build: Took %" PRId64" ms to do clustering.",took);

	// we are all done
	return true;
}