Esempio n. 1
0
void FreeDocTable(DocTable table) {
  Verify333(table != NULL);

  // STEP 1.
  Verify333(table->docid_to_docname != NULL);
  FreeHashTable(table->docid_to_docname, free);
  Verify333(table->docname_to_docid != NULL);
  FreeHashTable(table->docname_to_docid, free);

  free(table);
}
Esempio n. 2
0
static void ResizeHashtable(HashTable ht) {
  // Resize if the load factor is > 3.
  if (ht->num_elements < 3 * ht->num_buckets)
    return;

  // This is the resize case.  Allocate a new hashtable,
  // iterate over the old hashtable, do the surgery on
  // the old hashtable record and free up the new hashtable
  // record.
  HashTable newht = AllocateHashTable(ht->num_buckets * 9);

  // Give up if out of memory.
  if (newht == NULL)
    return;

  // Loop through the old ht with an iterator,
  // inserting into the new HT.
  HTIter it = HashTableMakeIterator(ht);
  if (it == NULL) {
    // Give up if out of memory.
    FreeHashTable(newht, &HTNullFree);
    return;
  }

  while (!HTIteratorPastEnd(it)) {
    HTKeyValue item, dummy;

    Verify333(HTIteratorGet(it, &item) == 1);
    if (InsertHashTable(newht, item, &dummy) != 1) {
      // failure, free up everything, return.
      HTIteratorFree(it);
      FreeHashTable(newht, &HTNullFree);
      return;
    }
    HTIteratorNext(it);
  }

  // Worked!  Free the iterator.
  HTIteratorFree(it);

  // Sneaky: swap the structures, then free the new table,
  // and we're done.
  {
    HashTableRecord tmp;

    tmp = *ht;
    *ht = *newht;
    *newht = tmp;
    FreeHashTable(newht, &HTNullFree);
  }

  return;
}
Esempio n. 3
0
static void TestFindOrInsert() {
  struct HashTable* ht;
  int i;
  int iterations = 1000000;
  int range = 30;         /* random number between 1 and 30 */

  ht = AllocateHashTable(4, 0);    /* value is 4 bytes, 0: don't copy keys */

  /* We'll test how good rand() is as a random number generator */
  for (i = 0; i < iterations; ++i) {
    int key = rand() % range;
    HTItem* bck = HashFindOrInsert(ht, key, 0);     /* initialize to 0 */
    bck->data++;                   /* found one more of them */
  }

  for (i = 0; i < range; ++i) {
    HTItem* bck = HashFind(ht, i);
    if (bck) {
      printf("%3d: %d\n", bck->key, bck->data);
    } else {
      printf("%3d: 0\n", i);
    }
  }

  FreeHashTable(ht);
}
Esempio n. 4
0
HashTable BuildWordHT(char *filename) {
  char *filecontent;
  HashTable tab;
  HWSize_t filelen, i;

  if (filename == NULL)
    return NULL;

  // STEP 6.
  // Use ReadFile() to slurp in the file contents.  If the
  // file turns out to be empty (i.e., its length is 0),
  // or you couldn't read the file at all, return NULL to indicate
  // failure.
  filecontent = ReadFile(filename, &filelen);
  if (filecontent == NULL || filelen == 0)
    return NULL;

  // Verify that the file contains only ASCII text.  We won't try to index any
  // files that contain non-ASCII text; unfortunately, this means we aren't
  // Unicode friendly.
  for (i = 0; i < filelen; i++) {
    if ((filecontent[i] == '\0') ||
        ((unsigned char) filecontent[i] > ASCII_UPPER_BOUND)) {
      free(filecontent);
      return NULL;
    }
  }

  // Great!  Let's split the file up into words.  We'll allocate the hash
  // table that will store the WordPositions structures associated with each
  // word.  Since our hash table dynamically grows, we'll start with a small
  // number of buckets.
  tab = AllocateHashTable(64);

  // Loop through the file, splitting it into words and inserting a record for
  // each word.
  LoopAndInsert(tab, filecontent);

  // If we found no words, return NULL instead of a
  // zero-sized hashtable.
  if (NumElementsInHashTable(tab) == 0) {
    FreeHashTable(tab, &WordHTFree);
    tab = NULL;
  }

  // Now that we've finished parsing the document, we can free up the
  // filecontent buffer and return our built-up table.
  free(filecontent);
  filecontent = NULL;
  return tab;
}
Esempio n. 5
0
static void TestInsert() {
  struct HashTable* ht;
  HTItem* bck;

  ht = AllocateHashTable(1, 0);    /* value is 1 byte, 0: don't copy keys */

  HashInsert(ht, PTR_KEY(ht, "January"), 31);  /* 0: don't overwrite old val */
  bck = HashInsert(ht, PTR_KEY(ht, "February"), 28);
  bck = HashInsert(ht, PTR_KEY(ht, "March"), 31);

  bck = HashFind(ht, PTR_KEY(ht, "February"));
  assert(bck);
  assert(bck->data == 28);

  FreeHashTable(ht);
}
Esempio n. 6
0
/* duck.print(output) */
int DuckPrint(int argument_count, void* data)
{
    int error = 0;
    VALUE argument = GetRecord("output", gCurrentContext);
    
    duck_print_records = CreateHashTable();

    PrintValue(argument);

    FreeHashTable(duck_print_records);
    duck_print_records = NULL;

    gLastExpression.type = VAL_NIL;
    gLastExpression.data.primitive = 0;

    return error;
}
Esempio n. 7
0
char* ToString(VALUE value)
{
    char* string;
    unsigned int size;

    duck_print_records = CreateHashTable();

    size = 512;
    string = (char*)malloc(size * sizeof(char));
    sprintf(string, "");

     PrintValueString(&string, &size, value);

    FreeHashTable(duck_print_records);
    duck_print_records = NULL;

    return string;
}
Esempio n. 8
0
gar_list* gar_index(void* gar, size_t length)
{
	gar_list* list = malloc(sizeof(gar_list));
	size_t size;
	char name[100];
	unsigned long i;
	name[99] = '\0';
	list->gar = gar;
	list->length = length;
	list->ht = AllocateHashTable(0, 1);
	for (; length >= 512; gar += 512, length -= 512)
		if (!*(char*)gar)
			return list;
		else
		{
			size = strtol(gar + 124, NULL, 8);
			if (((char*)gar)[156] == '0' || ((char*)gar)[156] == '\0')
			{
				for (i = 5; i < 99; i ++)
					switch ((name[i - 5] = ((char*)gar)[i]))
					{
						case '/':
							name[i - 5] = '.';
							break;
						case ' ':
							name[i - 5] = '\0';
						case '\0':
							goto copied;
					}
				copied:
				HashInsert(list->ht, PTR_KEY(list->ht, name), (ulong)gar + 512);
			}
			size = size ? ((size - 1) / 512 + 1) * 512 : 0;
			if (length < size)
				break;
			gar += size;
			length -= size;
		}
	FreeHashTable(list->ht);
	free(list);
	return NULL;
}
Esempio n. 9
0
// Main program.
int main(int argc, char** argv) {
  int cnt = 0;
	
  // Load and recreate an InvertedIndex from index file.
  InitializeHashTable(&Index);
  ptr = ReadFile(file, &Index);
  
  // Perform the tests.
  RUN_TEST(TestGETLINKS1, "GetLinks Test case 1");
  RUN_TEST(TestGETLINKS2, "GetLinks Test case 2");
  RUN_TEST(TestGETLINKS3, "GetLinks Test case 3");
  RUN_TEST(TestGETLINKS4, "GetLinks Test case 4");
  RUN_TEST(TestGETLINKS5, "GetLinks Test case 5");
  RUN_TEST(TestGETLINKS6, "GetLinks Test case 6");
  RUN_TEST(TestGETLINKS7, "GetLinks Test case 7");
  
  RUN_TEST(TestAND1, "And Test case 1");
  RUN_TEST(TestAND2, "And Test case 2");
  RUN_TEST(TestAND3, "And Test case 3");
  
  RUN_TEST(TestOR1, "Or Test case 1");
  RUN_TEST(TestOR2, "Or Test case 2");
  
  RUN_TEST(TestSORT1, "Sort Test case 1");
  
  RUN_TEST(TestDISPLAY1, "Display Test case 1");
  
  
  // Cleanup.
  CleanHashTable(ptr);
  FreeHashTable(ptr);
  
  if (!cnt) {
    printf("All passed!\n"); return 0;
  } else {
    printf("Some fails!\n"); return 1;
  }
}
Esempio n. 10
0
int main (int argc, char *argv[])
{

	paper_rec_t DedupeRecord;
	dd_uint64_t Unique_CRID;	/* Unique CR_ID = (C_ID << 16) | CR_ID */

	long victim_index = 0, cache_size, window_size, bloom_filter_size;
	long i, j=0, temp_index;
	int Initial_Flag = 0, cache_algorithm;

	dd_uint8_t *sha1_value=NULL;
	int nLen = 0;
	long max_counter=0;
	HTItem *chunk_item, *item;
	long byte_len, temp, offset, ver, temp1; /* to read a trace file */

	unsigned long hash1, hash2;
	/* Heap Data structure variables */
	Cache_Memory Dataitem;
	std::vector<Cache_Memory>::iterator itr;

	unsigned long writeCounter = 0;
	unsigned long access_counter;
	long file_position;
	FILE *fp1, *fp;
	size_t keySize=0,iCnt;
	clock_t start = clock();
	time_t begin,end;
	time(&begin);
	if (argc < 5) {
	    /*                      0            1            2			3                     4 		*/
		fprintf(stderr, "usage: %s <Cache Size> <Window Size> <Cache Algorithm (0, 1, 2)> <Trace File>\n", argv[0]);
		fprintf(stderr, "       - Cache Size: Dedupe read cache size in terms of # of data chunk (e.g. 500 chunks = 4MB (500*8KB))\n");
		fprintf(stderr, "       - Window Size: Future sliding window size in terms of TIMES of cache size.\n");
		fprintf(stderr, "       - Cache Algorithm: 0 (Belady MIN), 1 (Belady MIN with a future window), 2 (Lookahead read cache)\n");
		fprintf(stderr, "       - Trace File: Trace file name with path\n");
		exit(1);
	  }
 
	cache_size = atol(argv[1]);
	assert(cache_size > 0);		/* cache size must be positive */
	window_size = atol(argv[2]);
	assert(window_size > 0);	/* window size must be positive */
	cache_algorithm = atoi(argv[3]);
	assert((cache_algorithm == 0)||(cache_algorithm == 1)||(cache_algorithm == 2)); /* there are only three selections */

	bloom_filter_size = cache_size*2; //No. of Hash functions for BF is 2
	bloom_filter = (long *)malloc(sizeof(long)*bloom_filter_size);

	ht_cache = AllocateHashTable(SHA1_VALUE_LENGTH, 1);
	heap = newMinHeap((u32)cache_size);
	if((fp1 = fopen(argv[4], "rb")) == NULL){	//for reading data one by one
		DEBUG_INFO("File open error....1\n");
		exit (-1);
	}

	if((fp = fopen(argv[4], "rb")) == NULL){	//for searching its future reference distance
		DEBUG_INFO("File open error....2\n");
		exit (-1);
	}

	long c=0, d=0;
	u32 itemIndex;

	keySize = sizeof(DedupeRecord.fp_bytes);
	DEBUG_INFO("Record Size is: %d\n",keySize);
	while (1)
	{
		fread(&DedupeRecord, sizeof(struct _paper_rec_t),1, fp1);
		/*if(DedupeRecord.fp_bytes[0] == 0)
		  DedupeRecord.fp_bytes[0] = '0';*/
		/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
		  printf("%c",DedupeRecord.fp_bytes[iCnt]);*/
		//DEBUG_INFO("Reading chunks : %ld\n", c++);
		c++;
                if(c%1000 == 0){
                        printf("Reading Chunks: %ld\n",c);
                }

                if (c % 10000 == 0) {
                    printf("Cache hit ratio: %.3f = %lu / %lu \n",
                           (double) (Hit_Count * 100) / (double) totalAccess ,
                           Hit_Count,
                           totalAccess);
                }

		if(feof(fp1)) 
			break;

		file_position = ftell(fp1);

		/* initially fill the cache. During this initialization process, we do not count the cache hit ratio. */
		if (Initial_Flag == 0) {
			// Temporally store this current access chunk  with its future reference distance in the cache 

			chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

			//Update Bloom filter counters
                        hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                        hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;

                        max_counter = bloom_filter[hash1]++;
                        if((bloom_filter[hash2]++) > max_counter)
				max_counter = bloom_filter[hash2];

			if(chunk_item) { //Cache Hit
			  itemIndex = (u32)chunk_item->data;
			   DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
			   heapUpdate(heap,max_counter,itemIndex,&ht_cache);
			}
			else {
			    heapInsert(heap,DedupeRecord.fp_bytes, max_counter,&ht_cache);
			  //Sandeep - Insert into Heap and Heapify
			    cache_counter++;
			}

			if(cache_counter == cache_size) {
				DEBUG_INFO("\n#### Cache Initialization complete~!!####\n");	
				Initial_Flag = 1; 
				//Sandeep - Construct Heap and Heapify
				//fnBuildHeap(cache_heap);
				#ifdef DEBUG
				printf("Heap Size is: %d\n",cache_heap.size());
				/*PrintHashTable(ht_cache,-1,2);
				  fnPrintHeap(cache_heap);*/
                               #endif

			}
		} 
		else { /* Once the cache is full of data initially, we start to measure the cache hit ratio from now. */

			totalAccess++;
			if((totalAccess % 100) == 0) {
				DEBUG_INFO("[CHECK] Current Access Number: %ld\n", totalAccess);
			}

			Unique_CRID = (DedupeRecord.cmc_id << 16) | DedupeRecord.creg_id;

                        chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

                        if(chunk_item) { //Cache Hit
				Hit_Count++;
				DEBUG_INFO("Cache Hit\n");

                                //Update Bloom filter counters
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
                	        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
	                                max_counter = bloom_filter[hash2];
				itemIndex = (ulong)chunk_item->data;
				DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
				assert(itemIndex>=0 && itemIndex<=cache_size);
				heapUpdate(heap,max_counter,itemIndex,&ht_cache);
                                //Sandeep - Update heap counter val for this chunk with max_counter
				//fnUpdateHeap(cache_heap, Read_Cache[(ulong)chunk_item->data],max_counter);

                        }
			else {
			        heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
				if(!sha1_value)
				  ERROR("SHA1 Value in main is NULL\n");
				/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
				  printf("%c",sha1_value[iCnt]);*/
                                //Update Bloom filter counters
                                hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
                                hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
				//DEBUG_INFO("### In Main before decrement %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
				//Decrement BF counters
				bloom_filter[hash1]--;
                                bloom_filter[hash2]--;

				free(sha1_value);
				
				//GP - Increment the BF counters for this chunk
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are in main cache_miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	                        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
                	                max_counter = bloom_filter[hash2];
								
				heapInsert(heap,DedupeRecord.fp_bytes,max_counter,&ht_cache);
				 if(cache_algorithm == LOOKAHEAD){
				   /* Check if any other chunks in the current CR will appear within the future window.
				   If we found one, we add such chunk(s) in the cache. */
				   Check_Unique_CRID(fp, Unique_CRID, file_position, 0, cache_size, window_size*cache_size, bloom_filter_size);
				 }
			}

                       
		} //else

	} //while

	printf("\n###################################################################\n");
	printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess);
	printf("Cache size: %ld, window size: %ld\n", cache_size, window_size*cache_size); 
	printf("Dedupe trace: %s\n", argv[4]);
	printf("###################################################################\n");
	fclose(fp1);
	fclose(fp);

	FreeHashTable(ht_cache);
	deleteMinHeap(heap);
	time(&end);
	printf("###################################################################\n");
	printf("Total time taken is %f \n",((double)clock()-start)/CLOCKS_PER_SEC);
	printf("###################################################################\n");
	return 0;
}
Esempio n. 11
0
int main(int argc, char **argv)
{	
	HashObjectPtr p;
	char * line = (char *)malloc(sizeof(char)*MAX_INPUT_LENGTH);

	printOptions();
	while (fgets(line, MAX_INPUT_LENGTH, stdin)!=NULL)
	{
		switch (line[0])
		{
			case 'c':
			if (table == NULL)
			{
				if ((table = createNewHashTable()) == NULL)
				{
					printOptions();
					break;
				}
				else
				{
					printf("\n\nSuccess. What would you like to do now?\n\n");
					printOptions();
				}
					
			}
			else 
			{
				FreeHashTable(table);
				if ((table = createNewHashTable()) == NULL)
				{
					printOptions();
					break;
				}
				else
				{
					printf("\n\nSuccess. What would you like to do now?\n\n");
					printOptions();
				}
			}
			break;
			case 'l':
				if (table == NULL)
				{
					printf("You need to first instantiate a hashtable, use the 'c' option\n\n");
					break;
				}
				else 
				{
					if ((p = wordSearch(table)) != NULL)
					{
						printf("\n\nSuccess. What would you like to do now?\n\n");
						printOptions();
					}
					else 
					{
						printOptions();
					}
				}

				break;
			case 'f':
				if (table == NULL)
				{
					printf("You need to first instantiate a hashtable, use the 'c' option\n\n");
					break;
				}
				if (uploadAndRunFile(table) != NULL)
				{
					printf("\n\nSuccess. What would you like to do now?\n\n");
					printOptions();
				}
				
				break;
			case 'p':
				if (table == NULL)
				{
					printf("You need to first instantiate a hashtable, use the 'c' option\n\n");
					break;
				}
				printf("\n\nPrinting Hash Table\n\n");
				PrintHash(table);
				printf("\n\nSuccess. What would you like to do now?\n\n");
				printOptions();
				break;
			case 'r':
				if (table == NULL)
				{
					printf("You need to first instantiate a hashtable, use the 'c' option\n\n");
					break;
				}
				removeThings();
				break;
			case 'q':
				FreeHashTable(table);
				free(line);
				printf("\n\nGoodbye\n\n");
				return 0;
			case 's':
				if (table == NULL)
				{
					printf("You need to first instantiate a hashtable, use the 'c' option\n\n");
					break;
				}

				break;
			default:
				printf("%s\n", "That is an unrecognized entry, please try again\n\n");
				printOptions();
				break;
		}
	}
	free(line);
	return 0;
}
Esempio n. 12
0
FILE* uploadAndRunFile()
{
	FILE *fp;
	char * input = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH);
	char * line = (char*)malloc(sizeof(char)*MAX_INPUT_LENGTH);
	char * token;
	HashObjectPtr extraJob;
	char delims[] =  ",.;:\"&!? -_\n\t\r@()#$%^*+={}[]|<>/~`";

	printf("%s\n", "Please enter the path to the file\n\n");
	fgets(input, MAX_INPUT_LENGTH, stdin);

	int j = 0;
	for (; j < strlen(input); j++)
	{
		if (input[j] == '\n')
		{
			input[j] = '\0';
			break;
		}
	}
	fp = fopen(input, "r+");

	if (fp != NULL)
	{
		while(!feof(fp))
		{
			fgets(line, MAX_INPUT_LENGTH, fp);
			if (strtok(line, delims) != NULL) 
			{
				token = strtok(line, delims);
				extraJob = createHashObject(token);
				HashInsert(table, extraJob);
				while (token != NULL) 
				{
				token = strtok(NULL, delims);
					if (token != NULL) 
					{
						token = strtok(NULL, delims);
						if (table->maxChainReached)
						{
							HashTablePtr oldTable = table;
							table = resizeHashTable(table);
							FreeHashTable(oldTable);
						}

							HashObjectPtr job = createHashObject(token);
							HashInsert(table, job);
					}


				}
			}
		}
		fclose(fp);
		free(input);
		free(line);
		return fp;
	}
	else
	{
		printf("%s\n", "That path was invalid, please try again\n\n");
		printOptions();
		free(input);
		free(line);
		return NULL;
	}

}
Esempio n. 13
0
int main (int argc, char *argv[]) {
	
	// Check arguments
	
	// Check that there are two arguments passed.
	if (argc != 3) {
		printf("Please input exactly two arguments.\n");
		printf("Usage: ./query [INDEX_FILE] [HTML_DIRECTORY]\n");
		return 1;
	}
	
	// Check that the index file exists.
	if (!IsFile(argv[1])) {
		printf("Please input an existing [INDEX_FILE].\n");
		return 1;
	} 
	
	// Since the index file is valid, copy file name to file.
	file = calloc(1, strlen(argv[1]) + 1);
	strcpy(file, argv[1]);
	
	// Check that the html directory exists.
	if (!IsDir(argv[2])) {
		printf("Please input an existing [HTML_DIRECTORY].\n");
		return 1;
	}
	
	// Since the directory is valid, copy path to dir_path.
	dir_path = calloc(1, strlen(argv[2]) + 1);
	strcpy(dir_path, argv[2]);

	
	// Declare variables.
	HashTable Index;
	HashTable *ptr;
	char *query;
	
	// Load and recreate an InvertedIndex from index file.
	InitializeHashTable(&Index);
	ptr = ReadFile(file, &Index);
	
	printf("Query:> ");
	
	// Receive user queries from input.
	while ((query = (char *)calloc(1, MAX)) && fgets(query, MAX, stdin)) {
		
		// Check query line.
		
		// If the query line is empty, ask for input again.
		if (strcmp(query, "\n") == 0) {
			printf("Please input words.\n\n");
			printf("Query:> ");
			free(query);
			continue;
		}
		
		// Check that only ASCII characters, or whitespace in between is passed.
		int i;
		for (i=0; i<strlen(query); i++) {
			if (!isalpha(query[i]) && !isspace(query[i])) {
				break;				
			}
		}
		if (i != strlen(query)) {
			printf("Please input only ASCII characters, whitespace, or logical operators.\n\n");
			printf("Query:> ");
			free(query);
			continue;
		}
		
		// Check that at least one word is passed, ie the query is not just whitespace.
		for (i=0; i<strlen(query); i++) {
			if (!isspace(query[i])) {
				break;
			}
		}
		if (i == strlen(query)) {
			printf("Please input only ASCII characters, whitespace, or logical operators.\n\n");
			printf("Query:> ");
			free(query);
			continue;
		}
		
		// Check that no two successive logical operators are passed.
		if (strstr(query, "AND OR") || strstr(query, "AND AND") || strstr(query, "OR AND") || strstr(query, "OR OR")) {
			printf("Please input a valid query line.\n\n");
			printf("Query:> ");
			free(query);
			continue;
		}
		
		
		// Get the list of DocumentNodes containing the query.
		if (!GetLinks(query, ptr)) {
			printf("Please input a valid query line.\n\n");
			printf("Query:> ");
			
			// Cleanup.
			free(query);
			FreeList(0);
			FreeList(1);
			
			continue;
		}
		
		// Sort only if there are two are more documents in the list.
		if (final_list != NULL && final_list->next != NULL) {
			Sort(); // Sort by rank.
		}
		
		
		// Display results to stdout.
		if (!Display()) {
			printf("Error retrieving url from directory. Please check HTML_DIRECTORY.\n\n");
			// Cleanup.
			FreeList(0);
			FreeList(1);
			break;
		}
		printf("\n\n");
		printf("Query:> ");
		
		// Cleanup.
		FreeList(1);
		free(query);
	}
	
	// Cleanup.
	free(query);
	CleanHashTable(ptr);
	FreeHashTable(ptr);
	free(file);
	free(dir_path);
	
	return 0;
}
Esempio n. 14
0
// our main function; here, we demonstrate how to use some
// of the hash table functions
int main(int argc, char **argv) {
  ExampleValuePtr evp;
  HashTable ht;
  HTIter iter;
  HTKeyValue kv, old_kv;
  HTKey_t i;

  // allocate a hash table with 10,000 initial buckets
  ht = AllocateHashTable(10000);
  Verify333(ht != NULL);

  // insert 20,000 elements (load factor = 2.0)
  for (i = 0; i < 20000; i++) {
    evp = (ExampleValuePtr) malloc(sizeof(ExampleValue));
    Verify333(evp != NULL);
    evp->num = i;

    // make sure HT has the right # of elements in it to start
    Verify333(NumElementsInHashTable(ht) == (HWSize_t) i);

    // insert a new element
    kv.key = FNVHashInt64((HTValue_t)i);
    kv.value = (HTValue_t)evp;
    Verify333(InsertHashTable(ht, kv, &old_kv) == 1);

    // make sure hash table has right # of elements post-insert
    Verify333(NumElementsInHashTable(ht) == (HWSize_t) (i+1));
  }

  // look up a few values
  Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1);
  Verify333(kv.key == FNVHashInt64((HTValue_t)100));
  Verify333(((ExampleValuePtr) kv.value)->num == 100);

  Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)18583), &kv) == 1);
  Verify333(kv.key == FNVHashInt64((HTValue_t)18583));
  Verify333(((ExampleValuePtr) kv.value)->num == 18583);

  // make sure non-existent value cannot be found
  Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)20000), &kv) == 0);

  // delete a value
  Verify333(RemoveFromHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 1);
  Verify333(kv.key == FNVHashInt64((HTValue_t)100));
  Verify333(((ExampleValuePtr) kv.value)->num == 100);
  ExampleValueFree(kv.value);   // since we malloc'ed it, we must free it

  // make sure it's deleted
  Verify333(LookupHashTable(ht, FNVHashInt64((HTValue_t)100), &kv) == 0);
  Verify333(NumElementsInHashTable(ht) == (HWSize_t) 19999);

  // loop through using an iterator
  i = 0;
  iter = HashTableMakeIterator(ht);
  Verify333(iter != NULL);

  while (HTIteratorPastEnd(iter) == 0) {
    Verify333(HTIteratorGet(iter, &kv) == 1);
    Verify333(kv.key != FNVHashInt64((HTValue_t)100));   // we deleted it

    // advance the iterator
    HTIteratorNext(iter);
    i++;
  }
  Verify333(i == 19999);

  // free the iterator
  HTIteratorFree(iter);

  // free the hash table
  FreeHashTable(ht, &ExampleValueFree);
  return 0;
}
Esempio n. 15
0
int main(int argc, char* argv[])
{
    // check command line arguments
	
	// Check that there are three input parameters.
	if (argc != 4) {
		printf("Please input three parameters: seed URL, directory, and max depth.\n");
		return 1;
	}
	
	// Check that the seed url has proper domain (old-www).
	if (strncmp(argv[1], URL_PREFIX, 15) != 0) {
		printf("The seed URL domain must be old-www.\n");
		return 1;
	}
	
	// Check that the directory already exists.
	struct stat st;
	if (stat(argv[2], &st) == 0 && S_ISDIR(st.st_mode));
	else { // If the directory does not exist, terminate the program.
		printf("The directory %s cannot be found. Please enter an existing directory.\n", argv[2]);
		return 1;
	}
	
	// Check that the directory path does not have a '/' at the end for ease in writing filenames.
	if (argv[2][strlen(argv[2]) - 1] == '/') {
		printf("Please do not add '/' at the end of the directory path.\n");
		return 1;
	}
	
	// Check the third argument.
	// Loop through each letter of the first argument and check that it is indeed a number.
	for (int i = 0; i < strlen(argv[3]); i++) {
		if (!isdigit((int)argv[3][i])) {
 			printf("Please input a valid number for the depth.\n");
   			return 1;
   		}
	}
   	
  	sscanf(argv[3], "%d", &depth); // Store the argument as an integer.
	
	// Check that the depth specified does not exceed max depth.
	if (depth > MAX) {
		printf("Search depth cannot exceed MAX depth of 4. Please enter a valid depth.\n");
		return 1;
	}
	
    	// init curl
    	curl_global_init(CURL_GLOBAL_ALL);

    	// setup seed page
	WebPage *page = calloc(1, sizeof(WebPage));
	page->url = (char *)malloc(strlen(argv[1]) + 1);
	MALLOC_CHECK(stderr, page->url); // Check that memory was allocated.
	strcpy(page->url, argv[1]); // Copy the seed url to page->url.

    	// get seed webpage
	if (!GetWebPage(page)) {
		printf("The seed URL is invald. Please enter a valid seed URL.\n");
		FreeWebMemory(page);
		return 1;
	}
	
	// Normalize the seed url.
	if (!NormalizeURL(page->url)) {
		printf("Seed URL cannot be normalized.\n");
		FreeWebMemory(page);
		return 1;
	}
	
    	// write seed file
    	strcpy(path, argv[2]); // Let var path contain the directory path.
    	WriteFile(page, path, pageID);
	
    	// add seed page to hashtable
    	if (!AddToHashTable(page->url)) {
		FreeWebMemory(page);
    		return 1;
   	 }
	
	// Initialize URLList.
	if (!InitList()) {
		FreeWebMemory(page);
		return 1;
	}
	
    	// extract urls from seed page
    	if (!CrawlPage(page)) {
    		FreeHashTable(); // Free all memory dynamically allocated to the hash table.
		FreeWebMemory(page);
    		return 1;
    	}

	// while there are urls to crawl
    	while (URLList.tail != NULL) {

    		// get next webpage from list
    		WebPage *next = PopList();

        	// write page file
        	pageID++;
		if (!WriteFile(next, argv[2], pageID)) { // Check that the WriteFile worked.
			FreeWebMemory(next);
			return 1;
		}

        	// extract urls from webpage and then cleanup.
    		CrawlPage(next);
		FreeWebMemory(next);
    	}
    	// Memory cleanup.
	FreeHashTable(); // Free memory dynamically allocated to the hash table.
	FreeWebMemory(page); // Free memory dynamically allocated to the Webpage variable.
	
    	// cleanup curl
    	curl_global_cleanup();
    
    	return 0;
}
Esempio n. 16
0
int
main(void)
{
	hashADT hashTable = NewHashTable();
	activityADT actividad1, actividad2, actividad3, actAux, actividad4;
	struct activityCDT _actividad1, _actividad2, _actividad3, _actAux, _actividad4;
	actInfo info1, info2, info3, info4;

	actividad1 = &_actividad1;		/*Para no reservarles memoria y bue...*/
	actividad2 = &_actividad2;	
	actividad3 = &_actividad3;
	actividad4 = &_actividad4;
	actAux = &_actAux;

	info1.ID = "A";
	actividad1->info = &info1;
	info2.ID = "B";
	actividad2->info = &info2;
	info3.ID = "C";
	actividad3->info = &info3;
	info4.ID = "D";
	actividad4->info = &info4;

	printf("Prueba para ver si se insertan.\n");
	printf("%s\n",InsertInTable(hashTable, actividad1)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad2)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad3)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad4)?"SI":"NO");

	printf("Prueba para ver si no se insertan, dado que ya estan insertadas.\n");
	printf("%s\n",InsertInTable(hashTable, actividad1)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad2)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad3)?"SI":"NO");
	printf("%s\n",InsertInTable(hashTable, actividad4)?"SI":"NO");


	printf("Prueba para ver si busca bien.\n");
	printf("%s\n", SearchInTable(hashTable, "A")->info->ID);
	printf("%s\n", SearchInTable(hashTable, "B")->info->ID);
	printf("%s\n", SearchInTable(hashTable, "C")->info->ID);
	printf("%s\n", SearchInTable(hashTable, "D")->info->ID);

	printf("Prueba para ver si se borran.\n");
	printf("%s\n",DeleteFromTable(hashTable, "A")?"SI":"NO");
	printf("%s\n",DeleteFromTable(hashTable, "C")?"SI":"NO");
	printf("%s\n",DeleteFromTable(hashTable, "C")?"SI":"NO");
	if((actAux = SearchInTable(hashTable, "A")) == NULL)
		printf("No ta!\n");
	if((actAux = SearchInTable(hashTable, "B")) != NULL)
		printf("%s\n", actAux->info->ID);
	if((actAux = SearchInTable(hashTable, "C")) == NULL)
		printf("No ta!\n");
	if((actAux = SearchInTable(hashTable, "D")) != NULL)
		printf("%s\n", actAux->info->ID);
	
	FreeHashTable(hashTable);
	
	printf(":D\n");

	return 0;
}
Esempio n. 17
0
void ClearDictionary(HASH_TABLE* table)
{
    FreeHashTable(table);
}
Esempio n. 18
0
int main(int argc, char* argv[]) {
	//check argument number
	if (argc < 3 || argc > 4) {
		printf("too many or too little arguments, please try again");
		exit(0);
	}
	
	//check directory validity
	if (!IsDir(argv[1])) {
		printf("invalid directory, please try again");
		exit(0);
	}
	
	//Initialize variables and index
	int docId;
	int pos;
	char *doc;
	char **filenames = NULL;
	int num_files = 0;
	HashTable *WordsFound = calloc(1, sizeof(HashTable));
	num_files = GetFilenamesInDir(argv[1], &filenames);

	//check whether the folder has files
	if (num_files < 0) {
		printf("failed to get any filenames");
		exit(0);
	}

	//iterate through each file in the directory
	for (int i = 0; i < num_files; i++) {
		
		//check that the file is in the correct format (title is a number)
		int filechecker = 0;
		for (int c = 0; c < strlen(filenames[i]); c++) {
			if (!isdigit(filenames[i][c])) {
				filechecker = 1;
			}
		}
		if (filechecker == 1) {
			continue;
		}

		//Load the document
		char *word;
		char file[100];
		strcpy(file, argv[1]);
		strcat(file, filenames[i]);
		doc = LoadDocument(file);
		docId = GetDocumentId(filenames[i]);
		free(filenames[i]);
		
		pos = 0;
		//Iterate through each word in the html file (doc)
		while ((pos = GetNextWord(doc, pos, &word)) > 0) {
			NormalizeWord(word);
			if (InHashTable(word, WordsFound) == 0) {
				AddToHashTable(word, WordsFound);
				UpdateHashTable(word, docId, WordsFound);
			}
			else {
				UpdateHashTable(word, docId, WordsFound);
				free(word);
			}
		}
		free(doc);
	}	
	free(filenames);
	SaveIndexToFile(argv[2], WordsFound);				//Save the index to the file specified
	FreeHashTable(WordsFound);

	//only proceed if there was a third argument specified. If so, reload the index form the file you just created
	if (argc == 4) {
		HashTable *ReloadedIndex = ReadFile(argv[2]);
		SaveIndexToFile(argv[3], ReloadedIndex);
		FreeHashTable(ReloadedIndex);
	}
	return 0;
}
Esempio n. 19
0
int main (int argc, char **argv) {

	/* Check Arguments */
	if (!CheckArguments(argc, argv)) {
		exit(-1);
	}

	/* Make variables for all things needed for indexer and indexer testing */
	char *page_directory;
	char *index_filename;
	char *read_index_filename;
	char *new_index_filename;
	// If argument count is 3 initialize only 2 variables else initialize all
	page_directory = argv[1];
	index_filename = argv[2];

	// Initialize hashtable, word node, and document node
	HashTable *index_hashtable = calloc(1, sizeof(HashTable));

	/*Make array to hold filenames (just document numbers) and use GetFilenamesInDir to grab all names */
	char **filename_array;
	int number_of_files;
	if ((number_of_files = GetFilenamesInDir(page_directory, &filename_array)) < 0) {
		fprintf(stderr, "Could not get filenames in page directory. Exiting Now.\n");
		exit(-1);
	}


	/* Add page_directory to the front of the filenames */
	for (int i = 0; i < number_of_files; i++) {
		// Make pointe to current string in filename_array
		char *previous_string = filename_array[i];
		// Get length of full string and initialize element of filename_array to that size
		int len = strlen(page_directory) + strlen(previous_string) + 1;
		char *new_string = calloc(len, sizeof(char));
		// Make new string and free previous string
		strcpy(new_string, page_directory);
		strcat(new_string, previous_string);
		if (previous_string)
			free(previous_string);		

		filename_array[i] = new_string;
	}

	/* Populate the index data structure from the words on each doc
	 * Then Save to an index file                     
	 */
	for (int i = 0; i < number_of_files; i++) {

		/* Check that the filenames are digits */
		int continue_flag = 0;
		char *digit_string = filename_array[i] + strlen(page_directory);
		// Check that every character in the filename is a digit
		for (int j = 0; j < strlen(digit_string); j++) {
			if (!isdigit(digit_string[j])) {
				fprintf(stderr, "This file %s contains something other than a digit \n", filename_array[i]);
				continue_flag = 1;
			}
		}
		if (continue_flag ==1)
			continue;

		// Check that each file in the filename array is a good file
		char *file_name = filename_array[i];
		if (!IsFile(file_name)) {
			fprintf(stderr, "not file\n");	
			continue;
		}

		// Get contents of file into a string
		char *document = LoadDocument(file_name);
		if (document == NULL) {
			continue;
		}

		// Get DocumentID of file (check if bad)
		int document_id = GetDocumentId(file_name, page_directory);
		if (document_id < 0) {
			fprintf(stderr, "Error when converting document id char to integer\n");
			continue;
		}

		// Use GetNext word, with pos variable and buffer, to get every word and add the word to the data structure
		int pos = 0;
		char *word_buffer;
		while ((pos = GetNextWord(document, pos, &word_buffer)) > 0) {
			// Update the index for each word
			// Normalize word then update index with that word
			NormalizeWord(word_buffer);
			UpdateIndex(word_buffer, document_id, index_hashtable);
			free(word_buffer);
		}
		// free the string containing the html and the word in filenamearray
		free(document);
	}

	/* Save to index file, and check that it actually went well */
	if (!SaveIndexToFile(index_hashtable, index_filename)) {
		fprintf(stderr, "Could not save index hashtable to file\n");
		exit(-1);
	}

	for (int i = 0; i < number_of_files; i++) {
		free(filename_array[i]);
	}
	free(filename_array);
	FreeHashTable(index_hashtable);

	if (argc == 3) {
		;
	}
	/* Read index file into data strucutres and save to new index file */
 	else {
 		// Assign 2 filenames
 		read_index_filename = argv[3];
		new_index_filename = argv[4];
		// Read index file into data structures 
		HashTable *read_index = ReadFile(read_index_filename);
		if (read_index == NULL) {
			fprintf(stderr, "Error when reading index file into data structures.\n");
			exit(-1);
		}
		// Save index data structures into new file
		if (!SaveIndexToFile(read_index, new_index_filename)) {
			fprintf(stderr, "Could not save read index file into new index file\n");
			exit(-1);
		}
		
		FreeHashTable(read_index);
    }

	return 0;
}
Esempio n. 20
0
void FreeWordHT(HashTable table) {
  FreeHashTable(table, &WordHTFree);
}