void HashQueue::position(const char * data, ULng32 dataLength) { // if we do not have entries in this hash queue, do not even // bother to calculate the hash value. if (!entries_) { current_ = NULL; return; }; // if we are in a global scan, reset it. globalScan_ = FALSE; // set the hashValue_, currentChain_, and current_ if (! sequentialAdd()) { // set the hashValue_, currentChain_, and current_ getHashValue(data, dataLength); } else { // This will return the same hash value for all calls. getHashValue(NULL, 0); } while (current_ && (current_->hashValue_ != hashValue_)) current_ = current_->next_; };
void HashQueue::insert(const char * data, ULng32 dataLength, void * entry) { if (! sequentialAdd()) { // set the hashValue_, currentChain_, and current_ getHashValue(data, dataLength); } else { // This will return the same hash value for all calls. getHashValue(NULL, 0); } // current_ points to the current head of the chain now! if (! sequentialAdd()) { // insert the new entry as the new head hashTable_[currentChain_] = new(heap_) HashQueueEntry(entry, NULL, current_, hashValue_); // adjust the prev pointer of the former head of the chain if (current_) current_->prev_ = hashTable_[currentChain_]; } else { // find the last entry in this chain. while (current_ && current_->next_) current_ = current_->next_; HashQueueEntry * hqe = new(heap_) HashQueueEntry(entry, current_, NULL, hashValue_); if (current_) current_->next_ = hqe; else hashTable_[currentChain_] = hqe; } // we got a new entry entries_++; // reset all positioning info currentChain_ = hashTableSize_; current_ = 0; hashValue_ = 0; };
int main(int argc, char *argv[]) { char cmd[10]; while (~scanf("%s", cmd)) { if (cmd[0] == 'I') { unsigned long long int input; scanf("%llu", &input); Hash[getHashValue(input)] = input; } else if (cmd[0] == 'D') { Hash[getHashValue(input)] = } } return 0; }
void HashToBucket(const Genome& genome, HashTable& hash_table, const set<uint32_t>& extremal_large_bucket) { fprintf(stderr, "[HASH TO BUCKET]\n"); hash_table.index.resize(hash_table.index_size, 0); uint32_t size = 0, hash_value = 0; for (uint32_t i = 0; i < genome.num_of_chroms; ++i) { if (genome.length[i] < MINIMALSEEDLEN) continue; size = genome.start_index[i + 1] - MINIMALSEEDLEN; for (uint32_t j = genome.start_index[i]; j < size; ++j) { hash_value = getHashValue(&(genome.sequence[j])); /* Extremal Large Bucket IS DELETED */ if (extremal_large_bucket.find(hash_value) != extremal_large_bucket.end()) { continue; } hash_table.index[hash_table.counter[hash_value]++] = j; } } for (uint32_t i = hash_table.counter_size - 1; i >= 1; --i) { hash_table.counter[i] = hash_table.counter[i - 1]; } hash_table.counter[0] = 0; }
void HashQueue::insert(char * data, ULng32 dataLength, void * entry) { // set the hashValue_, currentChain_, and current_ getHashValue(data, dataLength); // current_ points to the current head of the chain now! // insert the new entry as the new head hashTable_[currentChain_] = new(heap_) HashQueueEntry(entry, NULL, current_, hashValue_); // we got a new entry entries_++; // adjust the prev pointer of the former head of the chain if (current_) current_->prev_ = hashTable_[currentChain_]; // reset all positioning info currentChain_ = hashTableSize_; current_ = 0; hashValue_ = 0; };
void SimFieldDictionary::setFieldValue(StringTableEntry slotName, const char *value) { U32 bucket = getHashValue(slotName); Entry **walk = &mHashTable[bucket]; while(*walk && (*walk)->slotName != slotName) walk = &((*walk)->next); Entry *field = *walk; if( !value || !*value ) { if(field) { mVersion++; if( field->value ) dFree(field->value); *walk = field->next; freeEntry(field); } } else { if(field) { if( field->value ) dFree(field->value); field->value = dStrdup(value); } else addEntry( bucket, slotName, 0, dStrdup( value ) ); } }
void HashTable::add( Object& objectToAdd ) { hashValueType index = getHashValue( objectToAdd ); if( table[ index ] == 0 ) table[index] = new List; ((List *)table[ index ])->add( objectToAdd ); itemsInContainer++; }
Object& HashTable::findMember( Object& testObject ) const { hashValueType index = getHashValue( testObject ); if( index >= table.limit() || table[ index ] == 0 ) { return NOOBJECT; } return ((List *)table[ index ])->findMember( testObject ); }
U32 SimFieldDictionary::getFieldType(StringTableEntry slotName) const { U32 bucket = getHashValue( slotName ); for( Entry *walk = mHashTable[bucket]; walk; walk = walk->next ) if( walk->slotName == slotName ) return walk->type ? walk->type->getTypeID() : TypeString; return TypeString; }
const char *SimFieldDictionary::getFieldValue(StringTableEntry slotName) { U32 bucket = getHashValue(slotName); for(Entry *walk = mHashTable[bucket];walk;walk = walk->next) if(walk->slotName == slotName) return walk->value; return NULL; }
void HashTable::detach( Object& objectToDetach, DeleteType dt ) { hashValueType index = getHashValue( objectToDetach ); if( table[ index ] != 0 ) { unsigned listSize = ((List *)table[ index ])->getItemsInContainer(); ((List *)table[ index ])->detach( objectToDetach, delItem(dt) ); if( ((List *)table[ index ])->getItemsInContainer() != listSize ) itemsInContainer--; } }
SimFieldDictionary::Entry *SimFieldDictionary::findDynamicField(const String &fieldName) const { U32 bucket = getHashValue( fieldName ); for( Entry *walk = mHashTable[bucket]; walk; walk = walk->next ) { if( fieldName.equal(walk->slotName, String::NoCase) ) return walk; } return NULL; }
SimFieldDictionary::Entry *SimFieldDictionary::findDynamicField( StringTableEntry fieldName) const { U32 bucket = getHashValue( fieldName ); for( Entry *walk = mHashTable[bucket]; walk; walk = walk->next ) { if( walk->slotName == fieldName ) { return walk; } } return NULL; }
void HashQueue::position(char * data, ULng32 dataLength) { // if we do not have entries in this hash queue, do not even // bother to calculate the hash value. if (!entries_) { current_ = NULL; return; }; // if we are in a global scan, reset it. globalScan_ = FALSE; // set the hashValue_, currentChain_, and current_ getHashValue(data, dataLength); while (current_ && (current_->hashValue_ != hashValue_)) current_ = current_->next_; };
void SimFieldDictionary::setFieldType(StringTableEntry slotName, ConsoleBaseType *type) { // If the field exists on the object, set the type U32 bucket = getHashValue( slotName ); for( Entry *walk = mHashTable[bucket]; walk; walk = walk->next ) { if( walk->slotName == slotName ) { // Found and type assigned, let's bail walk->type = type; return; } } // Otherwise create the field, and set the type. Assign a null value. addEntry( bucket, slotName, type ); }
void CountBucketSize(const Genome& genome, HashTable& hash_table, set<uint32_t>& extremal_large_bucket) { fprintf(stderr, "[COUNT BUCKET SIZE]\n"); hash_table.counter_size = power(4, F2SEEDKEYWIGTH); hash_table.counter.resize(hash_table.counter_size + 1, 0); uint32_t size = 0, hash_value = 0; for (uint32_t i = 0; i < genome.num_of_chroms; ++i) { if (genome.length[i] < MINIMALSEEDLEN) continue; size = genome.start_index[i + 1] - MINIMALSEEDLEN; for (uint32_t j = genome.start_index[i]; j < size; ++j) { hash_value = getHashValue(&(genome.sequence[j])); hash_table.counter[hash_value]++; } } ////////////////////////////////////////////////////// // Erase Extremal Large Bucket for (uint32_t i = 0; i < hash_table.counter_size; ++i) { if (hash_table.counter[i] >= 500000) { fprintf(stderr, "[NOTICE: ERASE THE BUCKET %u SINCE ITS SIZE IS %u]\n", i, hash_table.counter[i]); hash_table.counter[i] = 0; extremal_large_bucket.insert(i); } } for (uint32_t i = 1; i <= hash_table.counter_size; ++i) { hash_table.counter[i] += hash_table.counter[i - 1]; } hash_table.index_size = hash_table.counter[hash_table.counter_size]; for (uint32_t i = hash_table.counter_size - 1; i >= 1; --i) { hash_table.counter[i] = hash_table.counter[i - 1]; } hash_table.counter[0] = 0; }
int tcp_stream_check(struct traffic *traffic) { // Todo make sure things are sanatized before we get here. e.g. all // flags set should be detected sooner. TcpSession *tsess; u_char flags; // If we receive a SYN ACK then the server accepts the session // which makes it more probable that it's worth to create a session switch(traffic->tcphdr->th_flags) { case (TH_SYN): tcp_stream_add(traffic); return 0; break; } if((tsess = getHashValue(session,traffic)) == NULL) { // Not part of an existing session, perhaps this connection // already existed when we fired up the IPS.. so add it anyway // unless we're inline and TCP_STRICT is set to 1 if(CONFIG_TCP_STRICT==1 && CONFIG_DIVERT_ENABLE==1) { return 1; } else { //printf("Adding stream (CONFIG_TCP_STRICT=0)\n"); tcp_stream_add(traffic); return 0; } } // Stream reassembly //Update the seentime (for timeout) gettimeofday( &tsess->seentime, NULL ); // Cleanup the flags flags = traffic->tcphdr->th_flags; flags &= ~TH_PUSH; flags &= ~TH_URG; switch(tsess->state) { case TCPS_NEW: switch(flags) { case (TH_SYN) + (TH_ACK) : tsess->state = TCPS_SYN_RECEIVED; break; case (TH_RST) : tcp_stream_del(traffic); break; default: //TODO break; } break; case TCPS_SYN_RECEIVED: switch(flags) { case (TH_ACK) : tsess->timeout = TCPS_TIMEOUT_ESTABLISHED; tsess->state = TCPS_ESTABLISHED; default: //TODO break; } break; case TCPS_ESTABLISHED: switch(flags) { case (TH_FIN): case (TH_FIN) + (TH_ACK) : tsess->state = TCPS_CLOSING; tsess->timeout = TCPS_TIMEOUT_CLOSING; break; default: //TODO check if ok tsess->timeout = TCPS_TIMEOUT_CONNECTION; break; } break; case TCPS_CLOSING: switch(flags) { case (TH_FIN): case (TH_FIN) + (TH_ACK): case (TH_RST): tsess->state = TCPS_LAST_ACK; break; default: //TODO break; } break; case TCPS_LAST_ACK: switch(flags) { case (TH_ACK) : tsess->state = TCPS_CLOSED; tcp_stream_del(traffic); break; default: //TODO break; } break; default: break; } return 0; }
int32_t ReallocLexicon::addPosting(char *term, offset posting, unsigned int hashValue) { // search the hashtable for the given term unsigned int hashSlot = hashValue % HASHTABLE_SIZE; int termID = hashtable[hashSlot]; int previous = termID; int stemmingLevel = owner->STEMMING_LEVEL; while (termID >= 0) { if (terms[termID].hashValue == hashValue) { if (strcmp(term, terms[termID].term) == 0) break; } previous = termID; termID = terms[termID].nextTerm; } // if the term cannot be found in the lexicon, add a new entry if (termID < 0) { // termID < 0 means the term does not exist so far: create a new entry if (termCount >= termSlotsAllocated) extendTermsArray(); // add new term slot as head of hash list termID = termCount++; strcpy(terms[termID].term, term); terms[termID].hashValue = hashValue; terms[termID].nextTerm = hashtable[hashSlot]; hashtable[hashSlot] = termID; terms[termID].numberOfPostings = 1; terms[termID].lastPosting = posting; terms[termID].postings = NULL; // set "stemmedForm" according to the situation; apply stemming if // STEMMING_LEVEL > 0 int len = strlen(term); if (term[len - 1] == '$') terms[termID].stemmedForm = -1; else if (stemmingLevel > 0) { char stem[MAX_TOKEN_LENGTH * 2]; Stemmer::stemWord(term, stem, LANGUAGE_ENGLISH, false); if (stem[0] == 0) terms[termID].stemmedForm = termID; else if ((stemmingLevel < 2) && (strcmp(stem, term) == 0)) terms[termID].stemmedForm = termID; else { len = strlen(stem); if (len >= MAX_TOKEN_LENGTH - 1) { stem[MAX_TOKEN_LENGTH - 1] = '$'; stem[MAX_TOKEN_LENGTH] = 0; } else { stem[len] = '$'; stem[len + 1] = 0; } int32_t stemmed = addPosting(stem, posting, getHashValue(stem)); terms[termID].stemmedForm = stemmed; } } else terms[termID].stemmedForm = termID; } // end if (termID < 0) else { // move term to front of list in hashtable if (previous != termID) { terms[previous].nextTerm = terms[termID].nextTerm; terms[termID].nextTerm = hashtable[hashSlot]; hashtable[hashSlot] = termID; } // we only add more than the first posting if: // - we are in STEMMING_LEVEL < 3 (means: we keep non-stemmed terms) or // - the term is not stemmable (stemmedForm == termID) or // - the term is already the stemmed form (stemmedForm < 0) if ((stemmingLevel >= 3) && (terms[termID].stemmedForm >= 0) && (terms[termID].stemmedForm != termID)) goto beforeAddingPostingForStemmedForm; if (posting <= terms[termID].lastPosting) { snprintf(errorMessage, sizeof(errorMessage), "Postings not monotonically increasing: %lld, %lld", (long long)terms[termID].lastPosting, (long long)posting); log(LOG_ERROR, LOG_ID, errorMessage); goto addPosting_endOfBitgeficke; } if (terms[termID].numberOfPostings <= 1) { if (terms[termID].numberOfPostings == 0) { // we have no postings yet for this guy; this can only happen if it is // one of the survivor terms from an earlier part of the text collection; // data has already been initialized, so we don't need to do anything here terms[termID].lastPosting = posting; } else { // in this case, no chunk has been created yet; so, create the first // chunk and move both the first and the new posting into that chunk memoryOccupied += INITIAL_CHUNK_SIZE + sizeof(void*); byte *chunkData = terms[termID].postings = (byte*)malloc(INITIAL_CHUNK_SIZE); terms[termID].bufferSize = INITIAL_CHUNK_SIZE; int posInChunk = 0; offset value = terms[termID].lastPosting; while (value >= 128) { chunkData[posInChunk++] = 128 + (value & 127); value >>= 7; } chunkData[posInChunk++] = value; value = posting - terms[termID].lastPosting; while (value >= 128) { chunkData[posInChunk++] = 128 + (value & 127); value >>= 7; } chunkData[posInChunk++] = value; terms[termID].bufferPos = posInChunk; } } // end if (terms[termID].numberOfPostings <= 1) else { // we already have stuff in the chunks, so just append... int posInChunk = terms[termID].bufferPos; int sizeOfChunk = terms[termID].bufferSize; byte *chunkData = terms[termID].postings; // let "value" contain the Delta value with respect to the previous posting offset value = posting - terms[termID].lastPosting; if (posInChunk < sizeOfChunk - 6) { // if we have enough free space (42 bits are enough here because we probably // cannot have more than 2^42 postings in memory at the same time) while (value >= 128) { chunkData[posInChunk++] = 128 + (value & 127); value >>= 7; } chunkData[posInChunk++] = value; } else { // if less than 42 bits are free, we might have to allocate a new chunk... while (true) { if (posInChunk >= sizeOfChunk) { int newSize = sizeOfChunk + ((sizeOfChunk * CHUNK_GROWTH_RATE) >> 5); if (newSize < sizeOfChunk + INITIAL_CHUNK_SIZE) newSize = sizeOfChunk + INITIAL_CHUNK_SIZE; memoryOccupied += (newSize - sizeOfChunk); terms[termID].bufferSize = sizeOfChunk = newSize; terms[termID].postings = chunkData = (byte*)realloc(chunkData, newSize); } if (value < 128) { chunkData[posInChunk++] = value; break; } else { chunkData[posInChunk++] = 128 + (value & 127); value >>= 7; } } } // end else [less than 42 bits free in current chunk]
virtual HashCode hash() { return getHashValue(); }
U32 SimFieldDictionary::getHashValue( const String& fieldName ) { return getHashValue( StringTable->insert( fieldName ) ); }
void PairEndMapping(const string& org_read, const Genome& genome, const HashTable& hash_table, const char& strand, const bool& AG_WILDCARD, const uint32_t& max_mismatches, TopCandidates& top_match) { uint32_t read_len = org_read.size(); if (read_len < MINIMALREADLEN) { return; } /* return the maximal seed length for a particular read length */ uint32_t seed_pattern_repeats = (read_len - SEEPATTERNLEN + 1) / SEEPATTERNLEN; uint32_t seed_len = seed_pattern_repeats * SEEPATTERNCAREDWEIGHT; string read; if (AG_WILDCARD) { G2A(org_read, read_len, read); } else { C2T(org_read, read_len, read); } uint32_t cur_max_mismatches = max_mismatches; for (uint32_t seed_i = 0; seed_i < SEEPATTERNLEN; ++seed_i) { /* all exact matches are covered by the first seed */ if (!top_match.Empty() && top_match.Full() && top_match.Top().mismatch == 0 && seed_i) break; #if defined SEEDPATTERN3 || SEEDPATTERN5 /* all matches with 1 mismatch are covered by the first two seeds */ if (!top_match.Empty() && top_match.Full() && top_match.Top().mismatch == 1 && seed_i >= 2) break; #endif #ifdef SEEDPATTERN7 /* all matches with 1 mismatch are covered by the first two seeds */ if (!top_match.Empty() && top_match.Full() && top_match.Top().mismatch == 1 && seed_i >= 4) break; #endif string read_seed = read.substr(seed_i); uint32_t hash_value = getHashValue(read_seed.c_str()); pair<uint32_t, uint32_t> region; region.first = hash_table.counter[hash_value]; region.second = hash_table.counter[hash_value + 1]; if (region.first == region.second) continue; IndexRegion(read_seed, genome, hash_table, seed_len, region); if (region.second - region.first + 1 > 5000) { continue; } for (uint32_t j = region.first; j <= region.second; ++j) { uint32_t genome_pos = hash_table.index[j]; uint32_t chr_id = getChromID(genome.start_index, genome_pos); if (genome_pos - genome.start_index[chr_id] < seed_i) continue; genome_pos = genome_pos - seed_i; if (genome_pos + read_len >= genome.start_index[chr_id + 1]) continue; /* check the position */ uint32_t num_of_mismatch = 0; uint32_t num_of_nocared = seed_pattern_repeats * SEEPATTERNNOCAREDWEIGHT + seed_i; for (uint32_t p = 0; p < num_of_nocared && num_of_mismatch <= cur_max_mismatches; ++p) { if (genome.sequence[genome_pos + F2NOCAREDPOSITION[seed_i][p]] != read[F2NOCAREDPOSITION[seed_i][p]]) { num_of_mismatch++; } } for (uint32_t p = seed_pattern_repeats * SEEPATTERNLEN + seed_i; p < read_len && num_of_mismatch <= cur_max_mismatches; ++p) { if (genome.sequence[genome_pos + p] != read[p]) { num_of_mismatch++; } } if (num_of_mismatch > max_mismatches) { continue; } top_match.Push(CandidatePosition(genome_pos, strand, num_of_mismatch)); if (top_match.Full()) { cur_max_mismatches = top_match.Top().mismatch; } } } }
unsigned int HashDictionary::getHashIndex(const int p_key, const unsigned int p_count) const { return (getHashValue(p_key) + p_count * getDoubleHashValue(p_key)) % hashTable_.size(); }
ByteArray Nilsimsa::getHashValue() { byte hash[SIZE_HASH]; getHashValue(hash); return ByteArray(hash, hash + SIZE_HASH); }
MeshCleaner::MeshCleaner(udword nbVerts, const Point* srcVerts, udword nbTris, const udword* srcIndices) { Point* cleanVerts = (Point*)ICE_ALLOC(sizeof(Point)*nbVerts); ASSERT(cleanVerts); // memcpy(cleanVerts, srcVerts, nbVerts*sizeof(Point)); udword* indices = (udword*)ICE_ALLOC(sizeof(udword)*nbTris*3); udword* remapTriangles = (udword*)ICE_ALLOC(sizeof(udword)*nbTris); const float meshWeldTolerance = 0.01f; udword* vertexIndices = null; if(meshWeldTolerance!=0.0f) { vertexIndices = (udword*)ICE_ALLOC(sizeof(udword)*nbVerts); const float weldTolerance = 1.0f / meshWeldTolerance; // snap to grid for(udword i=0; i<nbVerts; i++) { vertexIndices[i] = i; cleanVerts[i] = Point( floorf(srcVerts[i].x*weldTolerance + 0.5f), floorf(srcVerts[i].y*weldTolerance + 0.5f), floorf(srcVerts[i].z*weldTolerance + 0.5f)); } } else { memcpy(cleanVerts, srcVerts, nbVerts*sizeof(Point)); } const udword maxNbElems = TMax(nbTris, nbVerts); const udword hashSize = NextPowerOfTwo(maxNbElems); const udword hashMask = hashSize-1; udword* hashTable = (udword*)ICE_ALLOC(sizeof(udword)*(hashSize + maxNbElems)); ASSERT(hashTable); memset(hashTable, 0xff, hashSize * sizeof(udword)); udword* const next = hashTable + hashSize; udword* remapVerts = (udword*)ICE_ALLOC(sizeof(udword)*nbVerts); memset(remapVerts, 0xff, nbVerts * sizeof(udword)); for(udword i=0;i<nbTris*3;i++) { const udword vref = srcIndices[i]; if(vref<nbVerts) remapVerts[vref] = 0; } udword nbCleanedVerts = 0; for(udword i=0;i<nbVerts;i++) { if(remapVerts[i]==0xffffffff) continue; const Point& v = cleanVerts[i]; const udword hashValue = getHashValue(v) & hashMask; udword offset = hashTable[hashValue]; while(offset!=0xffffffff && cleanVerts[offset]!=v) offset = next[offset]; if(offset==0xffffffff) { remapVerts[i] = nbCleanedVerts; cleanVerts[nbCleanedVerts] = v; if(vertexIndices) vertexIndices[nbCleanedVerts] = i; next[nbCleanedVerts] = hashTable[hashValue]; hashTable[hashValue] = nbCleanedVerts++; } else remapVerts[i] = offset; } udword nbCleanedTris = 0; for(udword i=0;i<nbTris;i++) { udword vref0 = *srcIndices++; udword vref1 = *srcIndices++; udword vref2 = *srcIndices++; if(vref0>=nbVerts || vref1>=nbVerts || vref2>=nbVerts) continue; // PT: you can still get zero-area faces when the 3 vertices are perfectly aligned const Point& p0 = srcVerts[vref0]; const Point& p1 = srcVerts[vref1]; const Point& p2 = srcVerts[vref2]; const float area2 = ((p0 - p1)^(p0 - p2)).SquareMagnitude(); if(area2==0.0f) continue; vref0 = remapVerts[vref0]; vref1 = remapVerts[vref1]; vref2 = remapVerts[vref2]; if(vref0==vref1 || vref1==vref2 || vref2==vref0) continue; indices[nbCleanedTris*3+0] = vref0; indices[nbCleanedTris*3+1] = vref1; indices[nbCleanedTris*3+2] = vref2; nbCleanedTris++; } ICE_FREE(remapVerts); udword nbToGo = nbCleanedTris; nbCleanedTris = 0; memset(hashTable, 0xff, hashSize * sizeof(udword)); Indices* I = reinterpret_cast<Indices*>(indices); bool idtRemap = true; for(udword i=0;i<nbToGo;i++) { const Indices& v = I[i]; const udword hashValue = getHashValue(v) & hashMask; udword offset = hashTable[hashValue]; while(offset!=0xffffffff && I[offset]!=v) offset = next[offset]; if(offset==0xffffffff) { remapTriangles[nbCleanedTris] = i; if(i!=nbCleanedTris) idtRemap = false; I[nbCleanedTris] = v; next[nbCleanedTris] = hashTable[hashValue]; hashTable[hashValue] = nbCleanedTris++; } } ICE_FREE(hashTable); if(vertexIndices) { for(udword i=0;i<nbCleanedVerts;i++) cleanVerts[i] = srcVerts[vertexIndices[i]]; ICE_FREE(vertexIndices); } mNbVerts = nbCleanedVerts; mNbTris = nbCleanedTris; mVerts = cleanVerts; mIndices = indices; if(idtRemap) { ICE_FREE(remapTriangles); mRemap = NULL; } else { mRemap = remapTriangles; } }