Пример #1
0
void *map_get(struct _map *map, const char *key)
{
    int hash = hash_sdbm((unsigned char *)key, map->size);
    struct _map_entry *entry = &map->buckets[hash];

    for (; entry; entry = entry->next) { /* linear search. */
        if (entry->hash == hash)
            break;
    }
    if (!entry) /* We haven't found the key. */
        return NULL;

    return entry->value;
}
Пример #2
0
int main(int argc, char *argv[]) {
	//unsigned long hash;
	jhash_t hash;
	char *str;
	if (argc < 2) {
		printf("No string input\n");
		exit(0);
	}

	str = argv[1];
	printf("Input: %s\n", str);
	hash = hash_djb2(str);
	printf("djb2 Hash: %lu\n", hash);
	hash = hash_sdbm(str);
	printf("sdbm Hash: %lu\n", hash);
	hash = hash_lose(str);
	printf("lose Hash: %lu\n", hash);
	return 0;
}
Пример #3
0
void map_add(struct _map *map, const char *key, void *value)
{
    if (!map || !key || !value)
        return;

    struct _map_entry *entry;

    unsigned long hash = hash_sdbm((unsigned char *)key, map->size);
    struct _map_entry *last_entry = &map->buckets[hash];
    if (!map->buckets[hash].value) { /* bucket is empty. */
        entry = &map->buckets[hash];
    } else {
        for (; last_entry->next != NULL; last_entry = last_entry->next);
        entry = malloc(sizeof(struct _map_entry));
    }
    if (last_entry != entry)
        last_entry->next = entry;
    entry->value = value;
    entry->hash = hash;
    entry->next = NULL;

}
Пример #4
0
/* helper function to invoke the correct hash method */
static unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len)
{
	unsigned int hash;

	switch (px->lbprm.algo & BE_LB_HASH_FUNC) {
	case BE_LB_HFCN_DJB2:
		hash = hash_djb2(key, len);
		break;
	case BE_LB_HFCN_WT6:
		hash = hash_wt6(key, len);
		break;
	case BE_LB_HFCN_CRC32:
		hash = hash_crc32(key, len);
		break;
	case BE_LB_HFCN_SDBM:
		/* this is the default hash function */
	default:
		hash = hash_sdbm(key, len);
		break;
	}

	return hash;
}
Пример #5
0
/* =============================================================================
 * hashSegment
 * -- For hashtable
 * =============================================================================
 */
static ulong_t
hashSegment (const void* keyPtr)
{
    return (ulong_t)hash_sdbm((char*)keyPtr); /* can be any "good" hash function */
}
Пример #6
0
int main (int argc, char *argv[])
{

	paper_rec_t DedupeRecord;
	dd_uint64_t Unique_CRID;	/* Unique CR_ID = (C_ID << 16) | CR_ID */

	long victim_index = 0, cache_size, window_size, bloom_filter_size;
	long i, j=0, temp_index;
	int Initial_Flag = 0, cache_algorithm;

	dd_uint8_t *sha1_value=NULL;
	int nLen = 0;
	long max_counter=0;
	HTItem *chunk_item, *item;
	long byte_len, temp, offset, ver, temp1; /* to read a trace file */

	unsigned long hash1, hash2;
	/* Heap Data structure variables */
	Cache_Memory Dataitem;
	std::vector<Cache_Memory>::iterator itr;

	unsigned long writeCounter = 0;
	unsigned long access_counter;
	long file_position;
	FILE *fp1, *fp;
	size_t keySize=0,iCnt;
	clock_t start = clock();
	time_t begin,end;
	time(&begin);
	if (argc < 5) {
	    /*                      0            1            2			3                     4 		*/
		fprintf(stderr, "usage: %s <Cache Size> <Window Size> <Cache Algorithm (0, 1, 2)> <Trace File>\n", argv[0]);
		fprintf(stderr, "       - Cache Size: Dedupe read cache size in terms of # of data chunk (e.g. 500 chunks = 4MB (500*8KB))\n");
		fprintf(stderr, "       - Window Size: Future sliding window size in terms of TIMES of cache size.\n");
		fprintf(stderr, "       - Cache Algorithm: 0 (Belady MIN), 1 (Belady MIN with a future window), 2 (Lookahead read cache)\n");
		fprintf(stderr, "       - Trace File: Trace file name with path\n");
		exit(1);
	  }
 
	cache_size = atol(argv[1]);
	assert(cache_size > 0);		/* cache size must be positive */
	window_size = atol(argv[2]);
	assert(window_size > 0);	/* window size must be positive */
	cache_algorithm = atoi(argv[3]);
	assert((cache_algorithm == 0)||(cache_algorithm == 1)||(cache_algorithm == 2)); /* there are only three selections */

	bloom_filter_size = cache_size*2; //No. of Hash functions for BF is 2
	bloom_filter = (long *)malloc(sizeof(long)*bloom_filter_size);

	ht_cache = AllocateHashTable(SHA1_VALUE_LENGTH, 1);
	heap = newMinHeap((u32)cache_size);
	if((fp1 = fopen(argv[4], "rb")) == NULL){	//for reading data one by one
		DEBUG_INFO("File open error....1\n");
		exit (-1);
	}

	if((fp = fopen(argv[4], "rb")) == NULL){	//for searching its future reference distance
		DEBUG_INFO("File open error....2\n");
		exit (-1);
	}

	long c=0, d=0;
	u32 itemIndex;

	keySize = sizeof(DedupeRecord.fp_bytes);
	DEBUG_INFO("Record Size is: %d\n",keySize);
	while (1)
	{
		fread(&DedupeRecord, sizeof(struct _paper_rec_t),1, fp1);
		/*if(DedupeRecord.fp_bytes[0] == 0)
		  DedupeRecord.fp_bytes[0] = '0';*/
		/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
		  printf("%c",DedupeRecord.fp_bytes[iCnt]);*/
		//DEBUG_INFO("Reading chunks : %ld\n", c++);
		c++;
                if(c%1000 == 0){
                        printf("Reading Chunks: %ld\n",c);
                }

                if (c % 10000 == 0) {
                    printf("Cache hit ratio: %.3f = %lu / %lu \n",
                           (double) (Hit_Count * 100) / (double) totalAccess ,
                           Hit_Count,
                           totalAccess);
                }

		if(feof(fp1)) 
			break;

		file_position = ftell(fp1);

		/* initially fill the cache. During this initialization process, we do not count the cache hit ratio. */
		if (Initial_Flag == 0) {
			// Temporally store this current access chunk  with its future reference distance in the cache 

			chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

			//Update Bloom filter counters
                        hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                        hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;

                        max_counter = bloom_filter[hash1]++;
                        if((bloom_filter[hash2]++) > max_counter)
				max_counter = bloom_filter[hash2];

			if(chunk_item) { //Cache Hit
			  itemIndex = (u32)chunk_item->data;
			   DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
			   heapUpdate(heap,max_counter,itemIndex,&ht_cache);
			}
			else {
			    heapInsert(heap,DedupeRecord.fp_bytes, max_counter,&ht_cache);
			  //Sandeep - Insert into Heap and Heapify
			    cache_counter++;
			}

			if(cache_counter == cache_size) {
				DEBUG_INFO("\n#### Cache Initialization complete~!!####\n");	
				Initial_Flag = 1; 
				//Sandeep - Construct Heap and Heapify
				//fnBuildHeap(cache_heap);
				#ifdef DEBUG
				printf("Heap Size is: %d\n",cache_heap.size());
				/*PrintHashTable(ht_cache,-1,2);
				  fnPrintHeap(cache_heap);*/
                               #endif

			}
		} 
		else { /* Once the cache is full of data initially, we start to measure the cache hit ratio from now. */

			totalAccess++;
			if((totalAccess % 100) == 0) {
				DEBUG_INFO("[CHECK] Current Access Number: %ld\n", totalAccess);
			}

			Unique_CRID = (DedupeRecord.cmc_id << 16) | DedupeRecord.creg_id;

                        chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

                        if(chunk_item) { //Cache Hit
				Hit_Count++;
				DEBUG_INFO("Cache Hit\n");

                                //Update Bloom filter counters
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
                	        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
	                                max_counter = bloom_filter[hash2];
				itemIndex = (ulong)chunk_item->data;
				DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
				assert(itemIndex>=0 && itemIndex<=cache_size);
				heapUpdate(heap,max_counter,itemIndex,&ht_cache);
                                //Sandeep - Update heap counter val for this chunk with max_counter
				//fnUpdateHeap(cache_heap, Read_Cache[(ulong)chunk_item->data],max_counter);

                        }
			else {
			        heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
				if(!sha1_value)
				  ERROR("SHA1 Value in main is NULL\n");
				/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
				  printf("%c",sha1_value[iCnt]);*/
                                //Update Bloom filter counters
                                hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
                                hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
				//DEBUG_INFO("### In Main before decrement %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
				//Decrement BF counters
				bloom_filter[hash1]--;
                                bloom_filter[hash2]--;

				free(sha1_value);
				
				//GP - Increment the BF counters for this chunk
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are in main cache_miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	                        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
                	                max_counter = bloom_filter[hash2];
								
				heapInsert(heap,DedupeRecord.fp_bytes,max_counter,&ht_cache);
				 if(cache_algorithm == LOOKAHEAD){
				   /* Check if any other chunks in the current CR will appear within the future window.
				   If we found one, we add such chunk(s) in the cache. */
				   Check_Unique_CRID(fp, Unique_CRID, file_position, 0, cache_size, window_size*cache_size, bloom_filter_size);
				 }
			}

                       
		} //else

	} //while

	printf("\n###################################################################\n");
	printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess);
	printf("Cache size: %ld, window size: %ld\n", cache_size, window_size*cache_size); 
	printf("Dedupe trace: %s\n", argv[4]);
	printf("###################################################################\n");
	fclose(fp1);
	fclose(fp);

	FreeHashTable(ht_cache);
	deleteMinHeap(heap);
	time(&end);
	printf("###################################################################\n");
	printf("Total time taken is %f \n",((double)clock()-start)/CLOCKS_PER_SEC);
	printf("###################################################################\n");
	return 0;
}
Пример #7
0
void Check_Unique_CRID(FILE *pFile, dd_uint64_t Current_Unique_CRID, long position, long distance, long cache_size, long window_size, long bloom_filter_size)
{
	
	
  long i, victim_index;
  dd_uint64_t Temp_Unique_CRID;
  paper_rec_t Temp_DedupeTrace;
  long window_counter=0;
  long max_counter=0, temp_index=0;
  dd_uint8_t *sha1_value=NULL;
  HTItem *chunk_item, *item;
  unsigned long hash1, hash2;
  unsigned long access_counter;
  size_t keySize = sizeof(Temp_DedupeTrace.fp_bytes),iCnt;
  bool flag=true;
  std::list<paper_rec_t>::iterator itr;
  /* Heap Data structure variables */
  /* Check the size of sliding window vector if its empty
   * the check unique crid has been called first time
   * initialize the vector with size of sliding window */
  if(SlidingWindow.size() == 0)
    {
	    
      fseek(pFile, position, SEEK_SET);
      while (1) 
	{  
	  fread(&Temp_DedupeTrace, sizeof(struct _paper_rec_t),1, pFile);
	  /*if(Temp_DedupeTrace.fp_bytes[0] == 0)
	    Temp_DedupeTrace.fp_bytes[0] = '0';*/
	  if(feof(pFile)) 
	    break;
	  SlidingWindow.push_back(Temp_DedupeTrace);
		
	  if(SlidingWindow.size() >= window_size) {
	    break;
	  }
	}
    }
  else if(SlidingWindow.size() == window_size){
    /* Remove one old record and insert the latest record */
    SlidingWindow.pop_front();
    fseek(pFile, position + window_size, SEEK_SET);
    fread(&Temp_DedupeTrace, sizeof(struct _paper_rec_t),1, pFile);
    SlidingWindow.push_back(Temp_DedupeTrace);
  }
  for(itr = SlidingWindow.begin();itr!=SlidingWindow.end();itr++){
    Temp_Unique_CRID = ((*itr).cmc_id << 16) | (*itr).creg_id;

    /* if any data chunk in current CR appear within the future window */
    if(Temp_Unique_CRID == Current_Unique_CRID) {

      DEBUG_INFO("[Found~!!] A chunk in current access CR will appeare within a future window.\n");

      chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,(*itr).fp_bytes));

      if(chunk_item) { //Cache Hit
	DEBUG_INFO("Cache Hit - New\n");

	//Update Bloom filter counters
	hash1 = hash_djb2((*itr).fp_bytes,keySize)%bloom_filter_size;
	hash2 = hash_sdbm((*itr).fp_bytes,keySize)%bloom_filter_size;
	/* DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);*/
	max_counter = bloom_filter[hash1]++;
	if((bloom_filter[hash2]++) > max_counter)
	  max_counter = bloom_filter[hash2];
	temp_index = (u32)chunk_item->data;
	/* DEBUG_INFO("Index its updating is %ld:\n",temp_index);*/
	heapUpdate(heap, max_counter, temp_index,&ht_cache);
				
      }
      else {
	//Sandeep - Choose victim from heap and strcpy Sha1 Value of the victim chunk to "sha1_value" variable
	/*sha1_value = fnDeleteItemHeap(cache_heap);*/
				  
	//GP - Increment the BF counters for this chunk
	hash1 = hash_djb2((*itr).fp_bytes,keySize)%bloom_filter_size;
	hash2 = hash_sdbm((*itr).fp_bytes,keySize)%bloom_filter_size;
	//DEBUG_INFO("### Returned hash values are before insert cache miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	max_counter = bloom_filter[hash1]++;
	if((bloom_filter[hash2]++) > max_counter)
	  max_counter = bloom_filter[hash2];
	flag = checkMin(heap,max_counter);
	/* If Min on heap is less than the new chuck then only replace with new
	 * else just skip */
	if(flag){
	  heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
	  if(!sha1_value)
	    ERROR("SHA1 Value in Check Unique CR is NULL\n");
	  /*for(iCnt = 0;iCnt<sizeof((*itr).fp_bytes);++iCnt)
	    printf("%c",sha1_value[iCnt]);*/
	  //Update Bloom filter counters
	  hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
	  hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
	  //DEBUG_INFO("### Before Decrementing values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	  //Decrement BF counters
	  bloom_filter[hash1]--;
	  bloom_filter[hash2]--;
	  free(sha1_value);
	  //Sandeep - Insert chunk into Heap with max_counter
	  heapInsert(heap,(*itr).fp_bytes,max_counter,&ht_cache);
	}
      }

    } 
  }
	  
}
Пример #8
0
/* =============================================================================
 * hashSegment
 * -- For hashtable
 * =============================================================================
 */
static unsigned long
hashSegment (const void* keyPtr)
{
    return hash_sdbm((char*)keyPtr); /* can be any "good" hash function */
}