int insertElement(char * element) { //! insert an element //! returns the number of collisions which occurred before the element was inserted int ok; int i; int hashRes; int nrOfColissions; i = 0; ok = 0; nrOfColissions = 0; if (MAX_FILL_FACTOR < getFillFactor()) resizeHashTable(); while (ok == 0) { hashRes = hashFunction(element, i); if (hashTable[hashRes] == NULL) { hashTable[hashRes] = element; ok = 1; } else nrOfColissions++; i++; } return nrOfColissions; }
void LanguageModel::removeTermDescriptor(int termID) { LocalLock lock(this); if ((termID < 0) || (termID >= termSlotsUsed)) return; unsigned int hashSlot; int previous, id; // remove term descriptor with ID "termID" from hash table hashSlot = simpleHashFunction(terms[termID].stemmed) % hashTableSize; id = hashTable[hashSlot]; previous = -1; while (id >= 0) { assert(id < termSlotsUsed); if (id == termID) { if (previous >= 0) terms[previous].next = terms[id].next; else hashTable[hashSlot] = terms[id].next; break; } previous = id; id = terms[id].next; } // copy term descriptor with ID (termSlotsUsed-1) to now-free slot and // update hash table if (termID != termSlotsUsed - 1) { memcpy(&terms[termID], &terms[termSlotsUsed - 1], sizeof(LanguageModelTermDescriptor)); hashSlot = simpleHashFunction(terms[termID].stemmed) % hashTableSize; id = hashTable[hashSlot]; if (id == termSlotsUsed - 1) hashTable[hashSlot] = termID; else { while (id >= 0) { if (terms[id].next == termSlotsUsed - 1) { terms[id].next = termID; break; } id = terms[id].next; } } } // end if (termID != termSlotsUsed - 1) // decrease slot use counter and resize arrays if appropriate termSlotsUsed--; if ((termSlotsUsed > INITIAL_TERM_SLOTS) && (termSlotsUsed < 0.5 * termSlotsAllocated)) { termSlotsAllocated = (int)(termSlotsUsed * ARRAY_GROWTH_RATE); terms = typed_realloc(LanguageModelTermDescriptor, terms, termSlotsAllocated); } if ((termSlotsUsed < hashTableSize/2) && (hashTableSize > INITIAL_HASHTABLE_SIZE)) { int nhs = (int)(hashTableSize / ARRAY_GROWTH_RATE); if (nhs < INITIAL_HASHTABLE_SIZE) nhs = INITIAL_HASHTABLE_SIZE; resizeHashTable(nhs); } } // end of removeTermDescriptor(int)
void LanguageModel::restrictToMostFrequent(int newTermCount) { assert(newTermCount > 0); LocalLock lock(this); if (newTermCount < termSlotsUsed) { qsort(terms, termSlotsUsed, sizeof(LanguageModelTermDescriptor), sortByTF); resizeHashTable(hashTableSize); while (termSlotsUsed > newTermCount) removeTermDescriptor(termSlotsUsed - 1); } } // end of restrictToMostFrequent(int)
int LanguageModel::addTermDescriptor() { if (termSlotsUsed >= termSlotsAllocated) { termSlotsAllocated = (int)(termSlotsUsed * ARRAY_GROWTH_RATE); terms = typed_realloc(LanguageModelTermDescriptor, terms, termSlotsAllocated); } if (termSlotsUsed >= hashTableSize) resizeHashTable((int)(termSlotsUsed * ARRAY_GROWTH_RATE)); terms[termSlotsUsed].next = -1; return termSlotsUsed++; } // end of addTermDescriptor()
int insertElement(char * element) { //! insert an element //! returns the number of collisions which occurred before the element was inserted if (getFillFactor()>MAX_FILL_FACTOR){ resizeHashTable(); } int hash = hashFunction3(element, 0); int nrCol = 0; while (nrCol<size && hashTable[hash]!=NULL){ nrCol++; hash = hashFunction3(element, nrCol); } hashTable[hash] = (char*)malloc(MAX_STRING_LENGTH+1); strcpy(hashTable[hash], element); return nrCol; }
LanguageModel::LanguageModel(char *fileName) { FILE *f = NULL; if ((fileName != NULL) && (fileName[0] != 0)) f = fopen(fileName, "r"); if (f == NULL) { snprintf(errorMessage, sizeof(errorMessage), "Unable to open file: %s", fileName); log(LOG_ERROR, LOG_ID, errorMessage); initialize(); corpusSize = 1.0; documentCount = 1.0; stemmed = false; } else { initialize(); int s; char line[1024]; getNextNonCommentLine(f, line, sizeof(line)); sscanf(line, "%d", &s); stemmed = (s != 0); getNextNonCommentLine(f, line, sizeof(line)); sscanf(line, "%d%lf%lf", &termSlotsUsed, &corpusSize, &documentCount); termSlotsAllocated = termSlotsUsed + 32; if (termSlotsAllocated < INITIAL_TERM_SLOTS) termSlotsAllocated = INITIAL_TERM_SLOTS; terms = typed_realloc(LanguageModelTermDescriptor, terms, termSlotsAllocated); for (int i = 0; i < termSlotsUsed; i++) { getNextNonCommentLine(f, line, sizeof(line)); long long tf, df; int status = sscanf(line, "%s%s%lld%lld", terms[i].term, terms[i].stemmed, &tf, &df); terms[i].termFrequency = tf; terms[i].documentCount = df; assert(status == 4); assert(strlen(terms[i].term) > 0); assert(strlen(terms[i].stemmed) > 0); } resizeHashTable(termSlotsAllocated); fclose(f); } } // end of LanguageModel(char*)
/* inserts value and returns the number of collisions detected until value inserted */ int insertValue(char* string) { int collisions = 0; int pos = hash(string); char* buffer = (char*) calloc(strlen(string)+1, sizeof(char)); strcpy(buffer, string); while (*(hash_table+pos) != NULL) { collisions++; buffer = (char*) realloc(buffer, (strlen(buffer)+2) * sizeof(char)); *(buffer+strlen(buffer)+1) = 0; *(buffer+strlen(buffer)) = *(string+(collisions-1)%strlen(string)); pos = hash(buffer); } *(hash_table+pos) = string; free(buffer); hash_size++; resizeHashTable(); return collisions; }
void addToHashTable(int N) { char **content = readFromFile(N); collisions = (int*) malloc (sizeof(int) *N); initHashTable(N); int nr=0; int k; float fill_factor; for(k=0; k<N; k++) { fill_factor = getFillFactor(); if(fill_factor >MAX_FILL_FACTOR) resizeHashTable(); collisions[k] += insertElement(content[k], size, hashTable); nr+=1; //printContentToConsole(hashTable, size); } //printf("\n%d end", nr); }
int insertElement(char * element,int N) { //! insert an element //! returns the number of collisions which occurred before the element was inserted int bucket,nrofcol=0; bucket=hashFunction(element,nrofcol); while(nrofcol<size) { if(hashTable[bucket]==NULL) { hashTable[bucket]=element; return nrofcol; } nrofcol++; bucket++; if(nrofcol>nrmax) nrmax=nrofcol; } if(getFillFactor()>MAX_FILL_FACTOR) resizeHashTable(N); return 0; }
int main() { int N = _1000; char ** content = readFromFile(N); //printContentToConsole(content,N); initHashTable(N); int collisions=0; int resizes=0; int i; float Fillfactor; for(i=0; i<N; i++) { collisions = collisions+insertElement(content[i]); Fillfactor= getFillFactor(); if(Fillfactor>MAX_FILL_FACTOR && size<N) { resizes++; resizeHashTable(N); i=0; } } printf("Total number of collisions: %d \nTimes the has table was resized: %d \n", collisions, resizes); }
FILE* uploadAndRunFile() { FILE *fp; char * input = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH); char * line = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH); char * token; HashObjectPtr extraJob; char delims[] = ",.;:\"&!? -_\n\t\r@()#$%^*+={}[]|<>/~`"; printf("%s\n", "Please enter the path to the file\n\n"); fgets(input, MAX_INPUT_LENGTH, stdin); int j = 0; for (; j < strlen(input); j++) { if (input[j] == '\n') { input[j] = '\0'; break; } } fp = fopen(input, "r+"); if (fp != NULL) { while(!feof(fp)) { fgets(line, MAX_INPUT_LENGTH, fp); if (strtok(line, delims) != NULL) { token = strtok(line, delims); extraJob = createHashObject(token); HashInsert(table, extraJob); while (token != NULL) { token = strtok(NULL, delims); if (token != NULL) { token = strtok(NULL, delims); if (table->maxChainReached) { HashTablePtr oldTable = table; table = resizeHashTable(table); FreeHashTable(oldTable); } HashObjectPtr job = createHashObject(token); HashInsert(table, job); } } } } fclose(fp); free(input); free(line); return fp; } else { printf("%s\n", "That path was invalid, please try again\n\n"); printOptions(); free(input); free(line); return NULL; } }
bool hashTableInsert(hashTable_t * ht, UINT64 value) { // See if we have reached our size limit. if (ht->entries == ht->size) resizeHashTable(ht); int bucket = hash(value) % ht->size; // We need to empty the low order bit so that we can tell the difference between values and ptrs. value = makeValue(value); UINT64 curvalue = ht->table[bucket]; // The empty case should be most common. if (isEmpty(curvalue)) { ht->table[bucket] = value; ht->entries += 1; return true; } // The value case should be next most common. if (isValue(curvalue)) { // The value is already here. if (curvalue == value) return false; // We have a collision and need to add an overflow node. hashNode_t * node = getHashNode(); ht->table[bucket] = (UINT64)node; node->values[0] = curvalue; // Note that this test doesn't cost us anything as it happens at compile time. if (HASHNODE_PAYLOAD_SIZE >= 2) { node->values[1] = value; } else { // We need to add a second new node. hashNode_t * secondNode = getHashNode(); node->next = secondNode; secondNode->values[0] = value; } ht->entries += 1; return true; } // The overflow node case. hashNode_t * curNode = makePtr(curvalue); while (true) { for (int i=0; i<HASHNODE_PAYLOAD_SIZE; i++) { // Check if we have an empty slot. if (curNode->values[i] == 0) { curNode->values[i] = value; ht->entries += 1; return true; } // Check if the value matches the current value. if (curNode->values[i] == value) return false; } if (curNode->next == NULL) break; curNode = curNode->next; } // If we are here, we need a new node. hashNode_t * node = getHashNode(); curNode->next = node; node->values[0] = value; ht->entries += 1; return true; }