/* inserts an entry at the given position; * does not call httpHeaderEntryClone() so one should not reuse "*e" */ void httpHeaderInsertEntry(HttpHeader * hdr, HttpHeaderEntry * e, int pos) { assert(hdr && e); assert_eid(e->id); debug(55, 7) ("%p adding entry: %d at %d\n", hdr, e->id, hdr->entries.count); if (CBIT_TEST(hdr->mask, e->id)) Headers[e->id].stat.repCount++; else CBIT_SET(hdr->mask, e->id); arrayInsert(&hdr->entries, e, pos); /* increment header length, allow for ": " and crlf */ hdr->len += strLen(e->name) + 2 + strLen(e->value) + 2; }
WordData * parseAndHuff(char * reviews_path, int * word_count) { int i = 0; FILE * reviews_stream = fopen(reviews_path, "r"); if (reviews_stream == NULL) { printf("reviews stream failed to open\n"); return NULL; } //create BST of WordData structs by name int num_unique_words = 0; int word_buffer_position = 0; int max_word_size = 200; WordData * w = NULL; char * word_buffer = malloc(max_word_size * sizeof(char)); while(!feof(reviews_stream)) { int current = fgetc(reviews_stream); //fputc((char)current, stderr); //Test: is current a ' ', '\t' or '\n' (word separators): insert the buffer, then insert the current character if (current == ' ' || current == '\t' || current == '\n' || current == '.' || current == ',') { //insert word up untill current character unless the last word was a word separator if (word_buffer[0] != ' ' && word_buffer[0] != '\t' && word_buffer[0] != '\n' && word_buffer[0] != '.' && word_buffer[0] != ',') { //printf("word inserted: (%s) \tcurent character is: (%c)\n",word_buffer, current); w = WordData_insert(w, word_buffer, &num_unique_words); //printf("w->word: %s, w->frequency: %d, w->left: %p, w->right: %p\n",w->word, w->frequency, w->left, w->right); } //reset the buffer position, add current character to the buffer, and insert word word_buffer_position = 0; word_buffer[word_buffer_position] = (char)current; word_buffer[word_buffer_position+1] = '\0'; //printf("word inserted: (%s) \n",word_buffer); w = WordData_insert(w, word_buffer, &num_unique_words); } else { word_buffer[word_buffer_position] = (char)current; word_buffer[word_buffer_position+1] = '\0'; word_buffer_position++; } i++; } free(word_buffer); fclose(reviews_stream); //printTree(w); *word_count = num_unique_words; //printf("num of unique words is: %d\n",num_unique_words); //create array from BST and sort it by frequency int index = 0; WordData ** array = (WordData**)malloc(num_unique_words * sizeof(WordData *)); arrayBuild(array, w, &index, num_unique_words); qsort(array, (size_t)num_unique_words, sizeof(WordData *), comparFrequency); //printf("printing sorted array\n"); /*for (i = 0; i < num_unique_words; i++) { printf("%d) frequency: %d, leaf: %d, word: (%s)\n",i, array[i]->frequency, array[i]->leaf, array[i]->word ); }*/ //printf("number of unique words is: %d\n", num_unique_words); //build the huffman tree int max_non_leaves = 1000; int num_non_leaves = 0; WordData ** non_leaves = malloc(max_non_leaves * sizeof(WordData *)); int remaining_nodes = num_unique_words-1; int combined_frequency; int f_smallest_ind; int s_smallest_ind; while(remaining_nodes >= 1) { //ensure there is enough space in the array of non leaves if (num_non_leaves >= max_non_leaves-1) { max_non_leaves *= 2; non_leaves = realloc(non_leaves, max_non_leaves * sizeof(WordData *)); } //take 2 lowest frequency nodes, combine them f_smallest_ind = findSmallest(array, remaining_nodes); if (remaining_nodes == 1) { s_smallest_ind = f_smallest_ind? 0: 1; } else if (f_smallest_ind != remaining_nodes && array[f_smallest_ind-1]->frequency > array[remaining_nodes]->frequency) { s_smallest_ind = remaining_nodes; } else { s_smallest_ind = f_smallest_ind-1; } WordData * f_smallest = array[f_smallest_ind]; WordData * s_smallest = array[s_smallest_ind]; //ensure all leaf's children point to null if (f_smallest->leaf == 1) { f_smallest->left = NULL; f_smallest->right = NULL; } if (s_smallest->leaf == 1) { s_smallest->left = NULL; s_smallest->right = NULL; } //strcat(f_smallest->word,s_smallest->word) //printf("first smallest(%d)(%d): %s, second smallest(%d)(%d): %s\n",f_smallest_ind, f_smallest->frequency, f_smallest->word, // s_smallest_ind, s_smallest->frequency, s_smallest->word); combined_frequency = f_smallest->frequency + s_smallest->frequency; non_leaves[num_non_leaves] = WordData_create(0, "non leaf" , combined_frequency, f_smallest, s_smallest); //add that node back into the array, decrement array size arrayInsert(array, non_leaves[num_non_leaves], remaining_nodes, s_smallest_ind, f_smallest_ind); remaining_nodes--; num_non_leaves++; } WordData * huffman_tree = array[0]; free(array); free(non_leaves); //printTree(huffman_tree); return huffman_tree; }