void FreeDocTable(DocTable table) { Verify333(table != NULL); // STEP 1. Verify333(table->docid_to_docname != NULL); FreeHashTable(table->docid_to_docname, free); Verify333(table->docname_to_docid != NULL); FreeHashTable(table->docname_to_docid, free); free(table); }
static void ResizeHashtable(HashTable ht) { // Resize if the load factor is > 3. if (ht->num_elements < 3 * ht->num_buckets) return; // This is the resize case. Allocate a new hashtable, // iterate over the old hashtable, do the surgery on // the old hashtable record and free up the new hashtable // record. HashTable newht = AllocateHashTable(ht->num_buckets * 9); // Give up if out of memory. if (newht == NULL) return; // Loop through the old ht with an iterator, // inserting into the new HT. HTIter it = HashTableMakeIterator(ht); if (it == NULL) { // Give up if out of memory. FreeHashTable(newht, &HTNullFree); return; } while (!HTIteratorPastEnd(it)) { HTKeyValue item, dummy; Verify333(HTIteratorGet(it, &item) == 1); if (InsertHashTable(newht, item, &dummy) != 1) { // failure, free up everything, return. HTIteratorFree(it); FreeHashTable(newht, &HTNullFree); return; } HTIteratorNext(it); } // Worked! Free the iterator. HTIteratorFree(it); // Sneaky: swap the structures, then free the new table, // and we're done. { HashTableRecord tmp; tmp = *ht; *ht = *newht; *newht = tmp; FreeHashTable(newht, &HTNullFree); } return; }
static void TestFindOrInsert() { struct HashTable* ht; int i; int iterations = 1000000; int range = 30; /* random number between 1 and 30 */ ht = AllocateHashTable(4, 0); /* value is 4 bytes, 0: don't copy keys */ /* We'll test how good rand() is as a random number generator */ for (i = 0; i < iterations; ++i) { int key = rand() % range; HTItem* bck = HashFindOrInsert(ht, key, 0); /* initialize to 0 */ bck->data++; /* found one more of them */ } for (i = 0; i < range; ++i) { HTItem* bck = HashFind(ht, i); if (bck) { printf("%3d: %d\n", bck->key, bck->data); } else { printf("%3d: 0\n", i); } } FreeHashTable(ht); }
HashTable BuildWordHT(char *filename) { char *filecontent; HashTable tab; HWSize_t filelen, i; if (filename == NULL) return NULL; // STEP 6. // Use ReadFile() to slurp in the file contents. If the // file turns out to be empty (i.e., its length is 0), // or you couldn't read the file at all, return NULL to indicate // failure. filecontent = ReadFile(filename, &filelen); if (filecontent == NULL || filelen == 0) return NULL; // Verify that the file contains only ASCII text. We won't try to index any // files that contain non-ASCII text; unfortunately, this means we aren't // Unicode friendly. for (i = 0; i < filelen; i++) { if ((filecontent[i] == '\0') || ((unsigned char) filecontent[i] > ASCII_UPPER_BOUND)) { free(filecontent); return NULL; } } // Great! Let's split the file up into words. We'll allocate the hash // table that will store the WordPositions structures associated with each // word. Since our hash table dynamically grows, we'll start with a small // number of buckets. tab = AllocateHashTable(64); // Loop through the file, splitting it into words and inserting a record for // each word. LoopAndInsert(tab, filecontent); // If we found no words, return NULL instead of a // zero-sized hashtable. if (NumElementsInHashTable(tab) == 0) { FreeHashTable(tab, &WordHTFree); tab = NULL; } // Now that we've finished parsing the document, we can free up the // filecontent buffer and return our built-up table. free(filecontent); filecontent = NULL; return tab; }
static void TestInsert() { struct HashTable* ht; HTItem* bck; ht = AllocateHashTable(1, 0); /* value is 1 byte, 0: don't copy keys */ HashInsert(ht, PTR_KEY(ht, "January"), 31); /* 0: don't overwrite old val */ bck = HashInsert(ht, PTR_KEY(ht, "February"), 28); bck = HashInsert(ht, PTR_KEY(ht, "March"), 31); bck = HashFind(ht, PTR_KEY(ht, "February")); assert(bck); assert(bck->data == 28); FreeHashTable(ht); }
/* duck.print(output) */ int DuckPrint(int argument_count, void* data) { int error = 0; VALUE argument = GetRecord("output", gCurrentContext); duck_print_records = CreateHashTable(); PrintValue(argument); FreeHashTable(duck_print_records); duck_print_records = NULL; gLastExpression.type = VAL_NIL; gLastExpression.data.primitive = 0; return error; }
char* ToString(VALUE value) { char* string; unsigned int size; duck_print_records = CreateHashTable(); size = 512; string = (char*)malloc(size * sizeof(char)); sprintf(string, ""); PrintValueString(&string, &size, value); FreeHashTable(duck_print_records); duck_print_records = NULL; return string; }
gar_list* gar_index(void* gar, size_t length) { gar_list* list = malloc(sizeof(gar_list)); size_t size; char name[100]; unsigned long i; name[99] = '\0'; list->gar = gar; list->length = length; list->ht = AllocateHashTable(0, 1); for (; length >= 512; gar += 512, length -= 512) if (!*(char*)gar) return list; else { size = strtol(gar + 124, NULL, 8); if (((char*)gar)[156] == '0' || ((char*)gar)[156] == '\0') { for (i = 5; i < 99; i ++) switch ((name[i - 5] = ((char*)gar)[i])) { case '/': name[i - 5] = '.'; break; case ' ': name[i - 5] = '\0'; case '\0': goto copied; } copied: HashInsert(list->ht, PTR_KEY(list->ht, name), (ulong)gar + 512); } size = size ? ((size - 1) / 512 + 1) * 512 : 0; if (length < size) break; gar += size; length -= size; } FreeHashTable(list->ht); free(list); return NULL; }
// Main program. int main(int argc, char** argv) { int cnt = 0; // Load and recreate an InvertedIndex from index file. InitializeHashTable(&Index); ptr = ReadFile(file, &Index); // Perform the tests. RUN_TEST(TestGETLINKS1, "GetLinks Test case 1"); RUN_TEST(TestGETLINKS2, "GetLinks Test case 2"); RUN_TEST(TestGETLINKS3, "GetLinks Test case 3"); RUN_TEST(TestGETLINKS4, "GetLinks Test case 4"); RUN_TEST(TestGETLINKS5, "GetLinks Test case 5"); RUN_TEST(TestGETLINKS6, "GetLinks Test case 6"); RUN_TEST(TestGETLINKS7, "GetLinks Test case 7"); RUN_TEST(TestAND1, "And Test case 1"); RUN_TEST(TestAND2, "And Test case 2"); RUN_TEST(TestAND3, "And Test case 3"); RUN_TEST(TestOR1, "Or Test case 1"); RUN_TEST(TestOR2, "Or Test case 2"); RUN_TEST(TestSORT1, "Sort Test case 1"); RUN_TEST(TestDISPLAY1, "Display Test case 1"); // Cleanup. CleanHashTable(ptr); FreeHashTable(ptr); if (!cnt) { printf("All passed!\n"); return 0; } else { printf("Some fails!\n"); return 1; } }
int main (int argc, char *argv[]) { paper_rec_t DedupeRecord; dd_uint64_t Unique_CRID; /* Unique CR_ID = (C_ID << 16) | CR_ID */ long victim_index = 0, cache_size, window_size, bloom_filter_size; long i, j=0, temp_index; int Initial_Flag = 0, cache_algorithm; dd_uint8_t *sha1_value=NULL; int nLen = 0; long max_counter=0; HTItem *chunk_item, *item; long byte_len, temp, offset, ver, temp1; /* to read a trace file */ unsigned long hash1, hash2; /* Heap Data structure variables */ Cache_Memory Dataitem; std::vector<Cache_Memory>::iterator itr; unsigned long writeCounter = 0; unsigned long access_counter; long file_position; FILE *fp1, *fp; size_t keySize=0,iCnt; clock_t start = clock(); time_t begin,end; time(&begin); if (argc < 5) { /* 0 1 2 3 4 */ fprintf(stderr, "usage: %s <Cache Size> <Window Size> <Cache Algorithm (0, 1, 2)> <Trace File>\n", argv[0]); fprintf(stderr, " - Cache Size: Dedupe read cache size in terms of # of data chunk (e.g. 500 chunks = 4MB (500*8KB))\n"); fprintf(stderr, " - Window Size: Future sliding window size in terms of TIMES of cache size.\n"); fprintf(stderr, " - Cache Algorithm: 0 (Belady MIN), 1 (Belady MIN with a future window), 2 (Lookahead read cache)\n"); fprintf(stderr, " - Trace File: Trace file name with path\n"); exit(1); } cache_size = atol(argv[1]); assert(cache_size > 0); /* cache size must be positive */ window_size = atol(argv[2]); assert(window_size > 0); /* window size must be positive */ cache_algorithm = atoi(argv[3]); assert((cache_algorithm == 0)||(cache_algorithm == 1)||(cache_algorithm == 2)); /* there are only three selections */ bloom_filter_size = cache_size*2; //No. of Hash functions for BF is 2 bloom_filter = (long *)malloc(sizeof(long)*bloom_filter_size); ht_cache = AllocateHashTable(SHA1_VALUE_LENGTH, 1); heap = newMinHeap((u32)cache_size); if((fp1 = fopen(argv[4], "rb")) == NULL){ //for reading data one by one DEBUG_INFO("File open error....1\n"); exit (-1); } if((fp = fopen(argv[4], "rb")) == NULL){ //for searching its future reference distance DEBUG_INFO("File open error....2\n"); exit (-1); } long c=0, d=0; u32 itemIndex; keySize = sizeof(DedupeRecord.fp_bytes); DEBUG_INFO("Record Size is: %d\n",keySize); while (1) { fread(&DedupeRecord, sizeof(struct _paper_rec_t),1, fp1); /*if(DedupeRecord.fp_bytes[0] == 0) DedupeRecord.fp_bytes[0] = '0';*/ /*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt) printf("%c",DedupeRecord.fp_bytes[iCnt]);*/ //DEBUG_INFO("Reading chunks : %ld\n", c++); c++; if(c%1000 == 0){ printf("Reading Chunks: %ld\n",c); } if (c % 10000 == 0) { printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess); } if(feof(fp1)) break; file_position = ftell(fp1); /* initially fill the cache. During this initialization process, we do not count the cache hit ratio. */ if (Initial_Flag == 0) { // Temporally store this current access chunk with its future reference distance in the cache chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes)); //Update Bloom filter counters hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; max_counter = bloom_filter[hash1]++; if((bloom_filter[hash2]++) > max_counter) max_counter = bloom_filter[hash2]; if(chunk_item) { //Cache Hit itemIndex = (u32)chunk_item->data; DEBUG_INFO("Index its updating is %ld:\n",itemIndex); heapUpdate(heap,max_counter,itemIndex,&ht_cache); } else { heapInsert(heap,DedupeRecord.fp_bytes, max_counter,&ht_cache); //Sandeep - Insert into Heap and Heapify cache_counter++; } if(cache_counter == cache_size) { DEBUG_INFO("\n#### Cache Initialization complete~!!####\n"); Initial_Flag = 1; //Sandeep - Construct Heap and Heapify //fnBuildHeap(cache_heap); #ifdef DEBUG printf("Heap Size is: %d\n",cache_heap.size()); /*PrintHashTable(ht_cache,-1,2); fnPrintHeap(cache_heap);*/ #endif } } else { /* Once the cache is full of data initially, we start to measure the cache hit ratio from now. */ totalAccess++; if((totalAccess % 100) == 0) { DEBUG_INFO("[CHECK] Current Access Number: %ld\n", totalAccess); } Unique_CRID = (DedupeRecord.cmc_id << 16) | DedupeRecord.creg_id; chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes)); if(chunk_item) { //Cache Hit Hit_Count++; DEBUG_INFO("Cache Hit\n"); //Update Bloom filter counters hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; //DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]); max_counter = bloom_filter[hash1]++; if((bloom_filter[hash2]++) > max_counter) max_counter = bloom_filter[hash2]; itemIndex = (ulong)chunk_item->data; DEBUG_INFO("Index its updating is %ld:\n",itemIndex); assert(itemIndex>=0 && itemIndex<=cache_size); heapUpdate(heap,max_counter,itemIndex,&ht_cache); //Sandeep - Update heap counter val for this chunk with max_counter //fnUpdateHeap(cache_heap, Read_Cache[(ulong)chunk_item->data],max_counter); } else { heapPopMin(heap,&sha1_value,&access_counter,&ht_cache); if(!sha1_value) ERROR("SHA1 Value in main is NULL\n"); /*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt) printf("%c",sha1_value[iCnt]);*/ //Update Bloom filter counters hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size; hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size; //DEBUG_INFO("### In Main before decrement %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]); //Decrement BF counters bloom_filter[hash1]--; bloom_filter[hash2]--; free(sha1_value); //GP - Increment the BF counters for this chunk hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size; //DEBUG_INFO("### Returned hash values are in main cache_miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]); max_counter = bloom_filter[hash1]++; if((bloom_filter[hash2]++) > max_counter) max_counter = bloom_filter[hash2]; heapInsert(heap,DedupeRecord.fp_bytes,max_counter,&ht_cache); if(cache_algorithm == LOOKAHEAD){ /* Check if any other chunks in the current CR will appear within the future window. If we found one, we add such chunk(s) in the cache. */ Check_Unique_CRID(fp, Unique_CRID, file_position, 0, cache_size, window_size*cache_size, bloom_filter_size); } } } //else } //while printf("\n###################################################################\n"); printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess); printf("Cache size: %ld, window size: %ld\n", cache_size, window_size*cache_size); printf("Dedupe trace: %s\n", argv[4]); printf("###################################################################\n"); fclose(fp1); fclose(fp); FreeHashTable(ht_cache); deleteMinHeap(heap); time(&end); printf("###################################################################\n"); printf("Total time taken is %f \n",((double)clock()-start)/CLOCKS_PER_SEC); printf("###################################################################\n"); return 0; }
int main(int argc, char **argv) { HashObjectPtr p; char * line = (char *)malloc(sizeof(char)*MAX_INPUT_LENGTH); printOptions(); while (fgets(line, MAX_INPUT_LENGTH, stdin)!=NULL) { switch (line[0]) { case 'c': if (table == NULL) { if ((table = createNewHashTable()) == NULL) { printOptions(); break; } else { printf("\n\nSuccess. What would you like to do now?\n\n"); printOptions(); } } else { FreeHashTable(table); if ((table = createNewHashTable()) == NULL) { printOptions(); break; } else { printf("\n\nSuccess. What would you like to do now?\n\n"); printOptions(); } } break; case 'l': if (table == NULL) { printf("You need to first instantiate a hashtable, use the 'c' option\n\n"); break; } else { if ((p = wordSearch(table)) != NULL) { printf("\n\nSuccess. What would you like to do now?\n\n"); printOptions(); } else { printOptions(); } } break; case 'f': if (table == NULL) { printf("You need to first instantiate a hashtable, use the 'c' option\n\n"); break; } if (uploadAndRunFile(table) != NULL) { printf("\n\nSuccess. What would you like to do now?\n\n"); printOptions(); } break; case 'p': if (table == NULL) { printf("You need to first instantiate a hashtable, use the 'c' option\n\n"); break; } printf("\n\nPrinting Hash Table\n\n"); PrintHash(table); printf("\n\nSuccess. What would you like to do now?\n\n"); printOptions(); break; case 'r': if (table == NULL) { printf("You need to first instantiate a hashtable, use the 'c' option\n\n"); break; } removeThings(); break; case 'q': FreeHashTable(table); free(line); printf("\n\nGoodbye\n\n"); return 0; case 's': if (table == NULL) { printf("You need to first instantiate a hashtable, use the 'c' option\n\n"); break; } break; default: printf("%s\n", "That is an unrecognized entry, please try again\n\n"); printOptions(); break; } } free(line); return 0; }
FILE* uploadAndRunFile() { FILE *fp; char * input = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH); char * line = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH); char * token; HashObjectPtr extraJob; char delims[] = ",.;:\"&!? -_\n\t\r@()#$%^*+={}[]|<>/~`"; printf("%s\n", "Please enter the path to the file\n\n"); fgets(input, MAX_INPUT_LENGTH, stdin); int j = 0; for (; j < strlen(input); j++) { if (input[j] == '\n') { input[j] = '\0'; break; } } fp = fopen(input, "r+"); if (fp != NULL) { while(!feof(fp)) { fgets(line, MAX_INPUT_LENGTH, fp); if (strtok(line, delims) != NULL) { token = strtok(line, delims); extraJob = createHashObject(token); HashInsert(table, extraJob); while (token != NULL) { token = strtok(NULL, delims); if (token != NULL) { token = strtok(NULL, delims); if (table->maxChainReached) { HashTablePtr oldTable = table; table = resizeHashTable(table); FreeHashTable(oldTable); } HashObjectPtr job = createHashObject(token); HashInsert(table, job); } } } } fclose(fp); free(input); free(line); return fp; } else { printf("%s\n", "That path was invalid, please try again\n\n"); printOptions(); free(input); free(line); return NULL; } }
int main (int argc, char *argv[]) { // Check arguments // Check that there are two arguments passed. if (argc != 3) { printf("Please input exactly two arguments.\n"); printf("Usage: ./query [INDEX_FILE] [HTML_DIRECTORY]\n"); return 1; } // Check that the index file exists. if (!IsFile(argv[1])) { printf("Please input an existing [INDEX_FILE].\n"); return 1; } // Since the index file is valid, copy file name to file. file = calloc(1, strlen(argv[1]) + 1); strcpy(file, argv[1]); // Check that the html directory exists. if (!IsDir(argv[2])) { printf("Please input an existing [HTML_DIRECTORY].\n"); return 1; } // Since the directory is valid, copy path to dir_path. dir_path = calloc(1, strlen(argv[2]) + 1); strcpy(dir_path, argv[2]); // Declare variables. HashTable Index; HashTable *ptr; char *query; // Load and recreate an InvertedIndex from index file. InitializeHashTable(&Index); ptr = ReadFile(file, &Index); printf("Query:> "); // Receive user queries from input. while ((query = (char *)calloc(1, MAX)) && fgets(query, MAX, stdin)) { // Check query line. // If the query line is empty, ask for input again. if (strcmp(query, "\n") == 0) { printf("Please input words.\n\n"); printf("Query:> "); free(query); continue; } // Check that only ASCII characters, or whitespace in between is passed. int i; for (i=0; i<strlen(query); i++) { if (!isalpha(query[i]) && !isspace(query[i])) { break; } } if (i != strlen(query)) { printf("Please input only ASCII characters, whitespace, or logical operators.\n\n"); printf("Query:> "); free(query); continue; } // Check that at least one word is passed, ie the query is not just whitespace. for (i=0; i<strlen(query); i++) { if (!isspace(query[i])) { break; } } if (i == strlen(query)) { printf("Please input only ASCII characters, whitespace, or logical operators.\n\n"); printf("Query:> "); free(query); continue; } // Check that no two successive logical operators are passed. if (strstr(query, "AND OR") || strstr(query, "AND AND") || strstr(query, "OR AND") || strstr(query, "OR OR")) { printf("Please input a valid query line.\n\n"); printf("Query:> "); free(query); continue; } // Get the list of DocumentNodes containing the query. if (!GetLinks(query, ptr)) { printf("Please input a valid query line.\n\n"); printf("Query:> "); // Cleanup. free(query); FreeList(0); FreeList(1); continue; } // Sort only if there are two are more documents in the list. if (final_list != NULL && final_list->next != NULL) { Sort(); // Sort by rank. } // Display results to stdout. if (!Display()) { printf("Error retrieving url from directory. Please check HTML_DIRECTORY.\n\n"); // Cleanup. FreeList(0); FreeList(1); break; } printf("\n\n"); printf("Query:> "); // Cleanup. FreeList(1); free(query); } // Cleanup. free(query); CleanHashTable(ptr); FreeHashTable(ptr); free(file); free(dir_path); return 0; }
// our main function; here, we demonstrate how to use some // of the hash table functions int main(int argc, char **argv) { ExampleValuePtr evp; HashTable ht; HTIter iter; HTKeyValue kv, old_kv; HTKey_t i; // allocate a hash table with 10,000 initial buckets ht = AllocateHashTable(10000); Verify333(ht != NULL); // insert 20,000 elements (load factor = 2.0) for (i = 0; i < 20000; i++) { evp = (ExampleValuePtr) malloc(sizeof(ExampleValue)); Verify333(evp != NULL); evp->num = i; // make sure HT has the right # of elements in it to start Verify333(NumElementsInHashTable(ht) == (HWSize_t) i); // insert a new element kv.key = FNVHashInt64((HTValue_t)i); kv.value = (HTValue_t)evp; Verify333(InsertHashTable(ht, kv, &old_kv) == 1); // make sure hash table has right # of elements post-insert Verify333(NumElementsInHashTable(ht) == (HWSize_t) (i+1)); } // look up a few values Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)100)); Verify333(((ExampleValuePtr) kv.value)->num == 100); Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)18583), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)18583)); Verify333(((ExampleValuePtr) kv.value)->num == 18583); // make sure non-existent value cannot be found Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)20000), &kv) == 0); // delete a value Verify333(RemoveFromHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1); Verify333(kv.key == FNVHashInt64((HTValue_t)100)); Verify333(((ExampleValuePtr) kv.value)->num == 100); ExampleValueFree(kv.value); // since we malloc'ed it, we must free it // make sure it's deleted Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 0); Verify333(NumElementsInHashTable(ht) == (HWSize_t) 19999); // loop through using an iterator i = 0; iter = HashTableMakeIterator(ht); Verify333(iter != NULL); while (HTIteratorPastEnd(iter) == 0) { Verify333(HTIteratorGet(iter, &kv) == 1); Verify333(kv.key != FNVHashInt64((HTValue_t)100)); // we deleted it // advance the iterator HTIteratorNext(iter); i++; } Verify333(i == 19999); // free the iterator HTIteratorFree(iter); // free the hash table FreeHashTable(ht, &ExampleValueFree); return 0; }
int main(int argc, char* argv[]) { // check command line arguments // Check that there are three input parameters. if (argc != 4) { printf("Please input three parameters: seed URL, directory, and max depth.\n"); return 1; } // Check that the seed url has proper domain (old-www). if (strncmp(argv[1], URL_PREFIX, 15) != 0) { printf("The seed URL domain must be old-www.\n"); return 1; } // Check that the directory already exists. struct stat st; if (stat(argv[2], &st) == 0 && S_ISDIR(st.st_mode)); else { // If the directory does not exist, terminate the program. printf("The directory %s cannot be found. Please enter an existing directory.\n", argv[2]); return 1; } // Check that the directory path does not have a '/' at the end for ease in writing filenames. if (argv[2][strlen(argv[2]) - 1] == '/') { printf("Please do not add '/' at the end of the directory path.\n"); return 1; } // Check the third argument. // Loop through each letter of the first argument and check that it is indeed a number. for (int i = 0; i < strlen(argv[3]); i++) { if (!isdigit((int)argv[3][i])) { printf("Please input a valid number for the depth.\n"); return 1; } } sscanf(argv[3], "%d", &depth); // Store the argument as an integer. // Check that the depth specified does not exceed max depth. if (depth > MAX) { printf("Search depth cannot exceed MAX depth of 4. Please enter a valid depth.\n"); return 1; } // init curl curl_global_init(CURL_GLOBAL_ALL); // setup seed page WebPage *page = calloc(1, sizeof(WebPage)); page->url = (char *)malloc(strlen(argv[1]) + 1); MALLOC_CHECK(stderr, page->url); // Check that memory was allocated. strcpy(page->url, argv[1]); // Copy the seed url to page->url. // get seed webpage if (!GetWebPage(page)) { printf("The seed URL is invald. Please enter a valid seed URL.\n"); FreeWebMemory(page); return 1; } // Normalize the seed url. if (!NormalizeURL(page->url)) { printf("Seed URL cannot be normalized.\n"); FreeWebMemory(page); return 1; } // write seed file strcpy(path, argv[2]); // Let var path contain the directory path. WriteFile(page, path, pageID); // add seed page to hashtable if (!AddToHashTable(page->url)) { FreeWebMemory(page); return 1; } // Initialize URLList. if (!InitList()) { FreeWebMemory(page); return 1; } // extract urls from seed page if (!CrawlPage(page)) { FreeHashTable(); // Free all memory dynamically allocated to the hash table. FreeWebMemory(page); return 1; } // while there are urls to crawl while (URLList.tail != NULL) { // get next webpage from list WebPage *next = PopList(); // write page file pageID++; if (!WriteFile(next, argv[2], pageID)) { // Check that the WriteFile worked. FreeWebMemory(next); return 1; } // extract urls from webpage and then cleanup. CrawlPage(next); FreeWebMemory(next); } // Memory cleanup. FreeHashTable(); // Free memory dynamically allocated to the hash table. FreeWebMemory(page); // Free memory dynamically allocated to the Webpage variable. // cleanup curl curl_global_cleanup(); return 0; }
int main(void) { hashADT hashTable = NewHashTable(); activityADT actividad1, actividad2, actividad3, actAux, actividad4; struct activityCDT _actividad1, _actividad2, _actividad3, _actAux, _actividad4; actInfo info1, info2, info3, info4; actividad1 = &_actividad1; /*Para no reservarles memoria y bue...*/ actividad2 = &_actividad2; actividad3 = &_actividad3; actividad4 = &_actividad4; actAux = &_actAux; info1.ID = "A"; actividad1->info = &info1; info2.ID = "B"; actividad2->info = &info2; info3.ID = "C"; actividad3->info = &info3; info4.ID = "D"; actividad4->info = &info4; printf("Prueba para ver si se insertan.\n"); printf("%s\n",InsertInTable(hashTable, actividad1)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad2)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad3)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad4)?"SI":"NO"); printf("Prueba para ver si no se insertan, dado que ya estan insertadas.\n"); printf("%s\n",InsertInTable(hashTable, actividad1)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad2)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad3)?"SI":"NO"); printf("%s\n",InsertInTable(hashTable, actividad4)?"SI":"NO"); printf("Prueba para ver si busca bien.\n"); printf("%s\n", SearchInTable(hashTable, "A")->info->ID); printf("%s\n", SearchInTable(hashTable, "B")->info->ID); printf("%s\n", SearchInTable(hashTable, "C")->info->ID); printf("%s\n", SearchInTable(hashTable, "D")->info->ID); printf("Prueba para ver si se borran.\n"); printf("%s\n",DeleteFromTable(hashTable, "A")?"SI":"NO"); printf("%s\n",DeleteFromTable(hashTable, "C")?"SI":"NO"); printf("%s\n",DeleteFromTable(hashTable, "C")?"SI":"NO"); if((actAux = SearchInTable(hashTable, "A")) == NULL) printf("No ta!\n"); if((actAux = SearchInTable(hashTable, "B")) != NULL) printf("%s\n", actAux->info->ID); if((actAux = SearchInTable(hashTable, "C")) == NULL) printf("No ta!\n"); if((actAux = SearchInTable(hashTable, "D")) != NULL) printf("%s\n", actAux->info->ID); FreeHashTable(hashTable); printf(":D\n"); return 0; }
void ClearDictionary(HASH_TABLE* table) { FreeHashTable(table); }
int main(int argc, char* argv[]) { //check argument number if (argc < 3 || argc > 4) { printf("too many or too little arguments, please try again"); exit(0); } //check directory validity if (!IsDir(argv[1])) { printf("invalid directory, please try again"); exit(0); } //Initialize variables and index int docId; int pos; char *doc; char **filenames = NULL; int num_files = 0; HashTable *WordsFound = calloc(1, sizeof(HashTable)); num_files = GetFilenamesInDir(argv[1], &filenames); //check whether the folder has files if (num_files < 0) { printf("failed to get any filenames"); exit(0); } //iterate through each file in the directory for (int i = 0; i < num_files; i++) { //check that the file is in the correct format (title is a number) int filechecker = 0; for (int c = 0; c < strlen(filenames[i]); c++) { if (!isdigit(filenames[i][c])) { filechecker = 1; } } if (filechecker == 1) { continue; } //Load the document char *word; char file[100]; strcpy(file, argv[1]); strcat(file, filenames[i]); doc = LoadDocument(file); docId = GetDocumentId(filenames[i]); free(filenames[i]); pos = 0; //Iterate through each word in the html file (doc) while ((pos = GetNextWord(doc, pos, &word)) > 0) { NormalizeWord(word); if (InHashTable(word, WordsFound) == 0) { AddToHashTable(word, WordsFound); UpdateHashTable(word, docId, WordsFound); } else { UpdateHashTable(word, docId, WordsFound); free(word); } } free(doc); } free(filenames); SaveIndexToFile(argv[2], WordsFound); //Save the index to the file specified FreeHashTable(WordsFound); //only proceed if there was a third argument specified. If so, reload the index form the file you just created if (argc == 4) { HashTable *ReloadedIndex = ReadFile(argv[2]); SaveIndexToFile(argv[3], ReloadedIndex); FreeHashTable(ReloadedIndex); } return 0; }
int main (int argc, char **argv) { /* Check Arguments */ if (!CheckArguments(argc, argv)) { exit(-1); } /* Make variables for all things needed for indexer and indexer testing */ char *page_directory; char *index_filename; char *read_index_filename; char *new_index_filename; // If argument count is 3 initialize only 2 variables else initialize all page_directory = argv[1]; index_filename = argv[2]; // Initialize hashtable, word node, and document node HashTable *index_hashtable = calloc(1, sizeof(HashTable)); /*Make array to hold filenames (just document numbers) and use GetFilenamesInDir to grab all names */ char **filename_array; int number_of_files; if ((number_of_files = GetFilenamesInDir(page_directory, &filename_array)) < 0) { fprintf(stderr, "Could not get filenames in page directory. Exiting Now.\n"); exit(-1); } /* Add page_directory to the front of the filenames */ for (int i = 0; i < number_of_files; i++) { // Make pointe to current string in filename_array char *previous_string = filename_array[i]; // Get length of full string and initialize element of filename_array to that size int len = strlen(page_directory) + strlen(previous_string) + 1; char *new_string = calloc(len, sizeof(char)); // Make new string and free previous string strcpy(new_string, page_directory); strcat(new_string, previous_string); if (previous_string) free(previous_string); filename_array[i] = new_string; } /* Populate the index data structure from the words on each doc * Then Save to an index file */ for (int i = 0; i < number_of_files; i++) { /* Check that the filenames are digits */ int continue_flag = 0; char *digit_string = filename_array[i] + strlen(page_directory); // Check that every character in the filename is a digit for (int j = 0; j < strlen(digit_string); j++) { if (!isdigit(digit_string[j])) { fprintf(stderr, "This file %s contains something other than a digit \n", filename_array[i]); continue_flag = 1; } } if (continue_flag ==1) continue; // Check that each file in the filename array is a good file char *file_name = filename_array[i]; if (!IsFile(file_name)) { fprintf(stderr, "not file\n"); continue; } // Get contents of file into a string char *document = LoadDocument(file_name); if (document == NULL) { continue; } // Get DocumentID of file (check if bad) int document_id = GetDocumentId(file_name, page_directory); if (document_id < 0) { fprintf(stderr, "Error when converting document id char to integer\n"); continue; } // Use GetNext word, with pos variable and buffer, to get every word and add the word to the data structure int pos = 0; char *word_buffer; while ((pos = GetNextWord(document, pos, &word_buffer)) > 0) { // Update the index for each word // Normalize word then update index with that word NormalizeWord(word_buffer); UpdateIndex(word_buffer, document_id, index_hashtable); free(word_buffer); } // free the string containing the html and the word in filenamearray free(document); } /* Save to index file, and check that it actually went well */ if (!SaveIndexToFile(index_hashtable, index_filename)) { fprintf(stderr, "Could not save index hashtable to file\n"); exit(-1); } for (int i = 0; i < number_of_files; i++) { free(filename_array[i]); } free(filename_array); FreeHashTable(index_hashtable); if (argc == 3) { ; } /* Read index file into data strucutres and save to new index file */ else { // Assign 2 filenames read_index_filename = argv[3]; new_index_filename = argv[4]; // Read index file into data structures HashTable *read_index = ReadFile(read_index_filename); if (read_index == NULL) { fprintf(stderr, "Error when reading index file into data structures.\n"); exit(-1); } // Save index data structures into new file if (!SaveIndexToFile(read_index, new_index_filename)) { fprintf(stderr, "Could not save read index file into new index file\n"); exit(-1); } FreeHashTable(read_index); } return 0; }
void FreeWordHT(HashTable table) { FreeHashTable(table, &WordHTFree); }