Exemple #1
0
int main(int argc, char const *argv[]) {
	/* Prueba constructores */
	/* Para numeros random */
	time_t t;
	srand((unsigned) time(&t));

	struct MinHeap* test = newMinHeap();
	int ran = (rand() % 200);

	struct MinHeap* testSize = newMinHeapSize(ran);
	/* Probamos si newHeap construye bien */
	int i; 
	for (i = 0; i < test->arrayLength; i++) {
		assert(test->array[i]->data == NULL);
	}
	/* Probamos si newHeapSize construye bien */
	for (i = 0; i < ran; i++) {
		assert(testSize->array[i]->data == NULL);
	}
	
	int a[] = {1, 2, 3, 4};
	arrayToMinHeap(a, 4);

}
int main (int argc, char *argv[])
{

	paper_rec_t DedupeRecord;
	dd_uint64_t Unique_CRID;	/* Unique CR_ID = (C_ID << 16) | CR_ID */

	long victim_index = 0, cache_size, window_size, bloom_filter_size;
	long i, j=0, temp_index;
	int Initial_Flag = 0, cache_algorithm;

	dd_uint8_t *sha1_value=NULL;
	int nLen = 0;
	long max_counter=0;
	HTItem *chunk_item, *item;
	long byte_len, temp, offset, ver, temp1; /* to read a trace file */

	unsigned long hash1, hash2;
	/* Heap Data structure variables */
	Cache_Memory Dataitem;
	std::vector<Cache_Memory>::iterator itr;

	unsigned long writeCounter = 0;
	unsigned long access_counter;
	long file_position;
	FILE *fp1, *fp;
	size_t keySize=0,iCnt;
	clock_t start = clock();
	time_t begin,end;
	time(&begin);
	if (argc < 5) {
	    /*                      0            1            2			3                     4 		*/
		fprintf(stderr, "usage: %s <Cache Size> <Window Size> <Cache Algorithm (0, 1, 2)> <Trace File>\n", argv[0]);
		fprintf(stderr, "       - Cache Size: Dedupe read cache size in terms of # of data chunk (e.g. 500 chunks = 4MB (500*8KB))\n");
		fprintf(stderr, "       - Window Size: Future sliding window size in terms of TIMES of cache size.\n");
		fprintf(stderr, "       - Cache Algorithm: 0 (Belady MIN), 1 (Belady MIN with a future window), 2 (Lookahead read cache)\n");
		fprintf(stderr, "       - Trace File: Trace file name with path\n");
		exit(1);
	  }
 
	cache_size = atol(argv[1]);
	assert(cache_size > 0);		/* cache size must be positive */
	window_size = atol(argv[2]);
	assert(window_size > 0);	/* window size must be positive */
	cache_algorithm = atoi(argv[3]);
	assert((cache_algorithm == 0)||(cache_algorithm == 1)||(cache_algorithm == 2)); /* there are only three selections */

	bloom_filter_size = cache_size*2; //No. of Hash functions for BF is 2
	bloom_filter = (long *)malloc(sizeof(long)*bloom_filter_size);

	ht_cache = AllocateHashTable(SHA1_VALUE_LENGTH, 1);
	heap = newMinHeap((u32)cache_size);
	if((fp1 = fopen(argv[4], "rb")) == NULL){	//for reading data one by one
		DEBUG_INFO("File open error....1\n");
		exit (-1);
	}

	if((fp = fopen(argv[4], "rb")) == NULL){	//for searching its future reference distance
		DEBUG_INFO("File open error....2\n");
		exit (-1);
	}

	long c=0, d=0;
	u32 itemIndex;

	keySize = sizeof(DedupeRecord.fp_bytes);
	DEBUG_INFO("Record Size is: %d\n",keySize);
	while (1)
	{
		fread(&DedupeRecord, sizeof(struct _paper_rec_t),1, fp1);
		/*if(DedupeRecord.fp_bytes[0] == 0)
		  DedupeRecord.fp_bytes[0] = '0';*/
		/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
		  printf("%c",DedupeRecord.fp_bytes[iCnt]);*/
		//DEBUG_INFO("Reading chunks : %ld\n", c++);
		c++;
                if(c%1000 == 0){
                        printf("Reading Chunks: %ld\n",c);
                }

                if (c % 10000 == 0) {
                    printf("Cache hit ratio: %.3f = %lu / %lu \n",
                           (double) (Hit_Count * 100) / (double) totalAccess ,
                           Hit_Count,
                           totalAccess);
                }

		if(feof(fp1)) 
			break;

		file_position = ftell(fp1);

		/* initially fill the cache. During this initialization process, we do not count the cache hit ratio. */
		if (Initial_Flag == 0) {
			// Temporally store this current access chunk  with its future reference distance in the cache 

			chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

			//Update Bloom filter counters
                        hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                        hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;

                        max_counter = bloom_filter[hash1]++;
                        if((bloom_filter[hash2]++) > max_counter)
				max_counter = bloom_filter[hash2];

			if(chunk_item) { //Cache Hit
			  itemIndex = (u32)chunk_item->data;
			   DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
			   heapUpdate(heap,max_counter,itemIndex,&ht_cache);
			}
			else {
			    heapInsert(heap,DedupeRecord.fp_bytes, max_counter,&ht_cache);
			  //Sandeep - Insert into Heap and Heapify
			    cache_counter++;
			}

			if(cache_counter == cache_size) {
				DEBUG_INFO("\n#### Cache Initialization complete~!!####\n");	
				Initial_Flag = 1; 
				//Sandeep - Construct Heap and Heapify
				//fnBuildHeap(cache_heap);
				#ifdef DEBUG
				printf("Heap Size is: %d\n",cache_heap.size());
				/*PrintHashTable(ht_cache,-1,2);
				  fnPrintHeap(cache_heap);*/
                               #endif

			}
		} 
		else { /* Once the cache is full of data initially, we start to measure the cache hit ratio from now. */

			totalAccess++;
			if((totalAccess % 100) == 0) {
				DEBUG_INFO("[CHECK] Current Access Number: %ld\n", totalAccess);
			}

			Unique_CRID = (DedupeRecord.cmc_id << 16) | DedupeRecord.creg_id;

                        chunk_item = HashFind(ht_cache, PTR_KEY(ht_cache,DedupeRecord.fp_bytes));

                        if(chunk_item) { //Cache Hit
				Hit_Count++;
				DEBUG_INFO("Cache Hit\n");

                                //Update Bloom filter counters
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
                	        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
	                                max_counter = bloom_filter[hash2];
				itemIndex = (ulong)chunk_item->data;
				DEBUG_INFO("Index its updating is %ld:\n",itemIndex);
				assert(itemIndex>=0 && itemIndex<=cache_size);
				heapUpdate(heap,max_counter,itemIndex,&ht_cache);
                                //Sandeep - Update heap counter val for this chunk with max_counter
				//fnUpdateHeap(cache_heap, Read_Cache[(ulong)chunk_item->data],max_counter);

                        }
			else {
			        heapPopMin(heap,&sha1_value,&access_counter,&ht_cache);
				if(!sha1_value)
				  ERROR("SHA1 Value in main is NULL\n");
				/*for(iCnt = 0;iCnt<sizeof(DedupeRecord.fp_bytes);++iCnt)
				  printf("%c",sha1_value[iCnt]);*/
                                //Update Bloom filter counters
                                hash1 = hash_djb2(sha1_value,sizeof(sha1_value))%bloom_filter_size;
                                hash2 = hash_sdbm(sha1_value,sizeof(sha1_value))%bloom_filter_size;
				//DEBUG_INFO("### In Main before decrement %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
				//Decrement BF counters
				bloom_filter[hash1]--;
                                bloom_filter[hash2]--;

				free(sha1_value);
				
				//GP - Increment the BF counters for this chunk
                                hash1 = hash_djb2(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
                                hash2 = hash_sdbm(DedupeRecord.fp_bytes,keySize)%bloom_filter_size;
				//DEBUG_INFO("### Returned hash values are in main cache_miss %ld and %ld\n",bloom_filter[hash1],bloom_filter[hash2]);
	                        max_counter = bloom_filter[hash1]++;
        	                if((bloom_filter[hash2]++) > max_counter)
                	                max_counter = bloom_filter[hash2];
								
				heapInsert(heap,DedupeRecord.fp_bytes,max_counter,&ht_cache);
				 if(cache_algorithm == LOOKAHEAD){
				   /* Check if any other chunks in the current CR will appear within the future window.
				   If we found one, we add such chunk(s) in the cache. */
				   Check_Unique_CRID(fp, Unique_CRID, file_position, 0, cache_size, window_size*cache_size, bloom_filter_size);
				 }
			}

                       
		} //else

	} //while

	printf("\n###################################################################\n");
	printf("Cache hit ratio: %.3f = %lu / %lu \n", (double) (Hit_Count * 100) / (double) totalAccess , Hit_Count, totalAccess);
	printf("Cache size: %ld, window size: %ld\n", cache_size, window_size*cache_size); 
	printf("Dedupe trace: %s\n", argv[4]);
	printf("###################################################################\n");
	fclose(fp1);
	fclose(fp);

	FreeHashTable(ht_cache);
	deleteMinHeap(heap);
	time(&end);
	printf("###################################################################\n");
	printf("Total time taken is %f \n",((double)clock()-start)/CLOCKS_PER_SEC);
	printf("###################################################################\n");
	return 0;
}
Exemple #3
0
void exampleMinHeap()
{
	// Use pooling for efficiency, if you don't want to use pooling
	// then comment out this line.
	pool_minheap(32);
	
	MinHeap* H = newMinHeap(31);
	
	minheap_add(H, 99, "99");
	minheap_add(H, 45, "45");
	minheap_add(H, 57, "57");
	minheap_add(H, 12, "12");
	minheap_add(H, 87, "87");
	minheap_add(H, 42, "42");
	minheap_add(H, 67, "67");
	
	minheap_display(H, 2, &toString);

	printf("Pop: '%s'\n", (char*)minheap_popMin(H));

	minheap_display(H, 2, &toString);

	printf("Pop: '%s'\n", (char*)minheap_popMin(H));

	minheap_display(H, 2, &toString);

	printf("Update 45 to 91\n");
	minheap_update(H, 45, 91);
	minheap_set(H, 91, "91");

	minheap_display(H, 2, &toString);

	printf("Update 91 to 1\n");
	minheap_update(H, 91, 1);
	minheap_set(H, 1, "1");
	
	minheap_display(H, 2, &toString);

	printf("Add 50\n");
	minheap_add(H, 50, "50");
	
	minheap_display(H, 2, &toString);

	printf("Pop: '%s'\n", (char*)minheap_popMin(H));

	minheap_display(H, 2, &toString);

	printf("Pop: '%s'\n", (char*)minheap_popMin(H));
	printf("Pop: '%s'\n", (char*)minheap_popMin(H));
	printf("Pop: '%s'\n", (char*)minheap_popMin(H));
	printf("Pop: '%s'\n", (char*)minheap_popMin(H));

	minheap_display(H, 2, &toString);

	minheap_clear(H);

	printf("\n");
	// Build a much bigger heap
	int total = 21;
	int keys[] = {0, 23, 3, 6, 41, 17, 21, 8, 9, 68, 2, 1, 34, 29, 38, 11, 15, 16, 45, 65, 39};
	char* items[] = {"0", "23", "3", "6", "41", "17", "21", "8", "9", "68", "2", "1", "34", "29", "38", "11", "15", "16", "45", "65", "39"};

	while (--total >= 0)
		minheap_add(H, keys[total], items[total]);

	minheap_display(H, 2, &toString);
	
	printf("Popping.. ");
	while (!minheap_isEmpty(H))
		printf("%s ", (char*)minheap_popMin(H));
	printf("\n");

	minheap_free(H);

	
	// If you're not using pooling this can be commented out. This will
	// free all pooled nodes from memory. Always call this at the end 
	// of using any List.
	unpool_minheap();
}