Exemple #1
0
static void TestInsert() {
  struct HashTable* ht;
  HTItem* bck;

  ht = AllocateHashTable(1, 0);    /* value is 1 byte, 0: don't copy keys */

  HashInsert(ht, PTR_KEY(ht, "January"), 31);  /* 0: don't overwrite old val */
  bck = HashInsert(ht, PTR_KEY(ht, "February"), 28);
  bck = HashInsert(ht, PTR_KEY(ht, "March"), 31);

  bck = HashFind(ht, PTR_KEY(ht, "February"));
  assert(bck);
  assert(bck->data == 28);

  FreeHashTable(ht);
}
Exemple #2
0
void* gar_get(gar_list* list, size_t* size, const char* name)
{
	HTItem* item = HashFind(list->ht, PTR_KEY(list->ht, name));
	if (item)
	{
		if (size)
			*size = strtol(((void*)item->data) - 388, NULL, 8);
		return (void*)item->data;
	}
	return NULL;
}
Exemple #3
0
gar_list* gar_index(void* gar, size_t length)
{
	gar_list* list = malloc(sizeof(gar_list));
	size_t size;
	char name[100];
	unsigned long i;
	name[99] = '\0';
	list->gar = gar;
	list->length = length;
	list->ht = AllocateHashTable(0, 1);
	for (; length >= 512; gar += 512, length -= 512)
		if (!*(char*)gar)
			return list;
		else
		{
			size = strtol(gar + 124, NULL, 8);
			if (((char*)gar)[156] == '0' || ((char*)gar)[156] == '\0')
			{
				for (i = 5; i < 99; i ++)
					switch ((name[i - 5] = ((char*)gar)[i]))
					{
						case '/':
							name[i - 5] = '.';
							break;
						case ' ':
							name[i - 5] = '\0';
						case '\0':
							goto copied;
					}
				copied:
				HashInsert(list->ht, PTR_KEY(list->ht, name), (ulong)gar + 512);
			}
			size = size ? ((size - 1) / 512 + 1) * 512 : 0;
			if (length < size)
				break;
			gar += size;
			length -= size;
		}
	FreeHashTable(list->ht);
	free(list);
	return NULL;
}
int main (int argc, char *argv[])
{

	paper_rec_t DedupeRecord;
	dd_uint64_t Unique_CRID;	/* Unique CR_ID = (C_ID << 16) | CR_ID */

	long victim_index = 0, cache_size, window_size, bloom_filter_size;
	long i, j=0, temp_index;
	int Initial_Flag = 0, cache_algorithm;

	dd_uint8_t *sha1_value=NULL;
	int nLen = 0;
	long max_counter=0;
	HTItem *chunk_item, *item;
	long byte_len, temp, offset, ver, temp1; /* to read a trace file */

	unsigned long hash1, hash2;
	/* Heap Data structure variables */
	Cache_Memory Dataitem;
	std::vector<Cache_Memory>::iterator itr;

	unsigned long writeCounter = 0;
	unsigned long access_counter;
	long file_position;
	FILE *fp1, *fp;
	size_t keySize=0,iCnt;
	clock_t start = clock();
	time_t begin,end;
	time(&begin);
	if (argc < 5) {
	    /*                      0            1            2			3                     4 		*/
		fprintf(stderr, "usage: %s <Cache Size> <Window Size> <Cache Algorithm (0, 1, 2)> <Trace File>\n", argv[0]);
		fprintf(stderr, "       - Cache Size: Dedupe read cache size in terms of # of data chunk (e.g. 500 chunks = 4MB (500*8KB))\n");
		fprintf(stderr, "       - Window Size: Future sliding window size in terms of TIMES of cache size.\n");
		fprintf(stderr, "       - Cache Algorithm: 0 (Belady MIN), 1 (Belady MIN with a future window), 2 (Lookahead read cache)\n");
		fprintf(stderr, "       - Trace File: Trace file name with path\n");
		exit(1);
	  }
 
	cache_size = atol(argv[1]);
	assert(cache_size > 0);		/* cache size must be positive */
	window_size = atol(argv[2]);
	assert(window_size > 0);	/* window size must be positive */
	cache_algorithm = atoi(argv[3]);
	assert((cache_algorithm == 0)||(cache_algorithm == 1)||(cache_algorithm == 2)); /* there are only three selections */

	bloom_filter_size = cache_size*2; //No. of Hash functions for BF is 2
	bloom_filter = (long *)malloc(sizeof(long)*bloom_filter_size);

	ht_cache = AllocateHashTable(SHA1_VALUE_LENGTH, 1);
	heap = newMinHeap((u32)cache_size);
	if((fp1 = fopen(argv[4], "rb")) == NULL){	//for reading data one by one
		DEBUG_INFO("File open error....1\n");
		exit (-1);
	}

	if((fp = fopen(argv[4], "rb")) == NULL){	//for searching its future reference distance
		DEBUG_INFO("File open error....2\n");
		exit (-1);
	}

	long c=0, d=0;
	u32 itemIndex;

	keySize = sizeof(DedupeRecord.fp_bytes);
	DEBUG_INFO("Record Size is: %d\n",keySize);
	while (1)
	{
		fread(&DedupeRecord, sizeof(struct _paper_rec_t),1, fp1);
		/*if(DedupeRecord.fp_bytes[0] == 0)
		  DedupeRecord.fp_bytes[0] = '0';*/
		/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
		  printf("%c",DedupeRecord.fp_bytes[iCnt]);*/
		//DEBUG_INFO("Reading chunks : %ld\n", c++);
		c++;
                if(c%1000 == 0){
                        printf("Reading Chunks: %ld\n",c);
                }

                if (c % 10000 == 0) {
                    printf("Cache hit ratio: %.3f = %lu / %lu \n",
                           (double) (Hit_Count * 100) / (double) totalAccess ,
                           Hit_Count,
                           totalAccess);
                }

		if(feof(fp1)) 
			break;

		file_position = ftell(fp1);

		/* initially fill the cache. During this initialization process, we do not count the cache hit ratio. */
		if (Initial_Flag == 0) {
			// Temporally store this current access chunk  with its future reference distance in the cache 

			chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

			//Update Bloom filter counters
                        hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                        hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;

                        max_counter = bloom_filter[hash1]++;
                        if((bloom_filter[hash2]++) > max_counter)
				max_counter = bloom_filter[hash2];

			if(chunk_item) { //Cache Hit
			  itemIndex = (u32)chunk_item->data;
			   DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
			   heapUpdate(heap,max_counter,itemIndex,&ht_cache);
			}
			else {
			    heapInsert(heap,DedupeRecord.fp_bytes, max_counter,&ht_cache);
			  //Sandeep - Insert into Heap and Heapify
			    cache_counter++;
			}

			if(cache_counter == cache_size) {
				DEBUG_INFO("\n#### Cache Initialization complete~!!####\n");	
				Initial_Flag = 1; 
				//Sandeep - Construct Heap and Heapify
				//fnBuildHeap(cache_heap);
				#ifdef DEBUG
				printf("Heap Size is: %d\n",cache_heap.size());
				/*PrintHashTable(ht_cache,-1,2);
				  fnPrintHeap(cache_heap);*/
                               #endif

			}
		} 
		else { /* Once the cache is full of data initially, we start to measure the cache hit ratio from now. */

			totalAccess++;
			if((totalAccess % 100) == 0) {
				DEBUG_INFO("[CHECK] Current Access Number: %ld\n", totalAccess);
			}

			Unique_CRID = (DedupeRecord.cmc_id << 16) | DedupeRecord.creg_id;

                        chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

                        if(chunk_item) { //Cache Hit
				Hit_Count++;
				DEBUG_INFO("Cache Hit\n");

                                //Update Bloom filter counters
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
                	        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
	                                max_counter = bloom_filter[hash2];
				itemIndex = (ulong)chunk_item->data;
				DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
				assert(itemIndex>=0 && itemIndex<=cache_size);
				heapUpdate(heap,max_counter,itemIndex,&ht_cache);
                                //Sandeep - Update heap counter val for this chunk with max_counter
				//fnUpdateHeap(cache_heap, Read_Cache[(ulong)chunk_item->data],max_counter);

                        }
			else {
			        heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
				if(!sha1_value)
				  ERROR("SHA1 Value in main is NULL\n");
				/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
				  printf("%c",sha1_value[iCnt]);*/
                                //Update Bloom filter counters
                                hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
                                hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
				//DEBUG_INFO("### In Main before decrement %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
				//Decrement BF counters
				bloom_filter[hash1]--;
                                bloom_filter[hash2]--;

				free(sha1_value);
				
				//GP - Increment the BF counters for this chunk
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are in main cache_miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	                        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
                	                max_counter = bloom_filter[hash2];
								
				heapInsert(heap,DedupeRecord.fp_bytes,max_counter,&ht_cache);
				 if(cache_algorithm == LOOKAHEAD){
				   /* Check if any other chunks in the current CR will appear within the future window.
				   If we found one, we add such chunk(s) in the cache. */
				   Check_Unique_CRID(fp, Unique_CRID, file_position, 0, cache_size, window_size*cache_size, bloom_filter_size);
				 }
			}

                       
		} //else

	} //while

	printf("\n###################################################################\n");
	printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess);
	printf("Cache size: %ld, window size: %ld\n", cache_size, window_size*cache_size); 
	printf("Dedupe trace: %s\n", argv[4]);
	printf("###################################################################\n");
	fclose(fp1);
	fclose(fp);

	FreeHashTable(ht_cache);
	deleteMinHeap(heap);
	time(&end);
	printf("###################################################################\n");
	printf("Total time taken is %f \n",((double)clock()-start)/CLOCKS_PER_SEC);
	printf("###################################################################\n");
	return 0;
}
void Check_Unique_CRID(FILE *pFile, dd_uint64_t Current_Unique_CRID, long position, long distance, long cache_size, long window_size, long bloom_filter_size)
{
	
	
  long i, victim_index;
  dd_uint64_t Temp_Unique_CRID;
  paper_rec_t Temp_DedupeTrace;
  long window_counter=0;
  long max_counter=0, temp_index=0;
  dd_uint8_t *sha1_value=NULL;
  HTItem *chunk_item, *item;
  unsigned long hash1, hash2;
  unsigned long access_counter;
  size_t keySize = sizeof(Temp_DedupeTrace.fp_bytes),iCnt;
  bool flag=true;
  std::list<paper_rec_t>::iterator itr;
  /* Heap Data structure variables */
  /* Check the size of sliding window vector if its empty
   * the check unique crid has been called first time
   * initialize the vector with size of sliding window */
  if(SlidingWindow.size() == 0)
    {
	    
      fseek(pFile, position, SEEK_SET);
      while (1) 
	{  
	  fread(&Temp_DedupeTrace, sizeof(struct _paper_rec_t),1, pFile);
	  /*if(Temp_DedupeTrace.fp_bytes[0] == 0)
	    Temp_DedupeTrace.fp_bytes[0] = '0';*/
	  if(feof(pFile)) 
	    break;
	  SlidingWindow.push_back(Temp_DedupeTrace);
		
	  if(SlidingWindow.size() >= window_size) {
	    break;
	  }
	}
    }
  else if(SlidingWindow.size() == window_size){
    /* Remove one old record and insert the latest record */
    SlidingWindow.pop_front();
    fseek(pFile, position + window_size, SEEK_SET);
    fread(&Temp_DedupeTrace, sizeof(struct _paper_rec_t),1, pFile);
    SlidingWindow.push_back(Temp_DedupeTrace);
  }
  for(itr = SlidingWindow.begin();itr!=SlidingWindow.end();itr++){
    Temp_Unique_CRID = ((*itr).cmc_id << 16) | (*itr).creg_id;

    /* if any data chunk in current CR appear within the future window */
    if(Temp_Unique_CRID == Current_Unique_CRID) {

      DEBUG_INFO("[Found~!!] A chunk in current access CR will appeare within a future window.\n");

      chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,(*itr).fp_bytes));

      if(chunk_item) { //Cache Hit
	DEBUG_INFO("Cache Hit - New\n");

	//Update Bloom filter counters
	hash1 = hash_djb2((*itr).fp_bytes,keySize)%bloom_filter_size;
	hash2 = hash_sdbm((*itr).fp_bytes,keySize)%bloom_filter_size;
	/* DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);*/
	max_counter = bloom_filter[hash1]++;
	if((bloom_filter[hash2]++) > max_counter)
	  max_counter = bloom_filter[hash2];
	temp_index = (u32)chunk_item->data;
	/* DEBUG_INFO("Index its updating is %ld:\n",temp_index);*/
	heapUpdate(heap, max_counter, temp_index,&ht_cache);
				
      }
      else {
	//Sandeep - Choose victim from heap and strcpy Sha1 Value of the victim chunk to "sha1_value" variable
	/*sha1_value = fnDeleteItemHeap(cache_heap);*/
				  
	//GP - Increment the BF counters for this chunk
	hash1 = hash_djb2((*itr).fp_bytes,keySize)%bloom_filter_size;
	hash2 = hash_sdbm((*itr).fp_bytes,keySize)%bloom_filter_size;
	//DEBUG_INFO("### Returned hash values are before insert cache miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	max_counter = bloom_filter[hash1]++;
	if((bloom_filter[hash2]++) > max_counter)
	  max_counter = bloom_filter[hash2];
	flag = checkMin(heap,max_counter);
	/* If Min on heap is less than the new chuck then only replace with new
	 * else just skip */
	if(flag){
	  heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
	  if(!sha1_value)
	    ERROR("SHA1 Value in Check Unique CR is NULL\n");
	  /*for(iCnt = 0;iCnt<sizeof((*itr).fp_bytes);++iCnt)
	    printf("%c",sha1_value[iCnt]);*/
	  //Update Bloom filter counters
	  hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
	  hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
	  //DEBUG_INFO("### Before Decrementing values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	  //Decrement BF counters
	  bloom_filter[hash1]--;
	  bloom_filter[hash2]--;
	  free(sha1_value);
	  //Sandeep - Insert chunk into Heap with max_counter
	  heapInsert(heap,(*itr).fp_bytes,max_counter,&ht_cache);
	}
      }

    } 
  }
	  
}