/**
 * Allocates an empty block
 */
struct rab_block_info *init_empty_block() {
    
    initialize_rabin_polynomial_defaults();
	struct rab_block_info *block=malloc(sizeof(struct rab_block_info));
    if(block == NULL) {
        fprintf(stderr,"Could not allocate rabin polynomial block, no memory left!\n");
        return NULL;
    }
	
	block->head=gen_new_polynomial(NULL,0,0,0);
    
	if(block->head == NULL)
        return NULL; //Couldn't allocate memory
    
	block->tail=block->head;
	block->cur_roll_checksum=0;
	block->total_bytes_read=0;
	block->window_pos=0;
	block->current_poly_finished=0;
    
    block->current_window_data=malloc(sizeof(char)*rabin_sliding_window_size);
    
	if(block->current_window_data == NULL) {
	    fprintf(stderr,"Could not allocate buffer for sliding window data!\n");
	    free(block);
	    return NULL;
	}
    int i;
	for(i=0;i<rabin_sliding_window_size;i++) {
	    block->current_window_data[i]=0;
	}
    
    return block;
}
/**
 * Initalizes the algorithm with the provided paramters
 */
int initialize_rabin_polynomial(uint64_t prime, unsigned max_size, unsigned int min_size, unsigned int average_block_size) {
    
    rabin_polynomial_prime=prime;
    rabin_polynomial_max_block_size=max_size;
    rabin_polynomial_min_block_size=min_size;
    rabin_polynomial_average_block_size=average_block_size;
    
    return initialize_rabin_polynomial_defaults();
    
    
}
struct rabin_polynomial *get_file_rabin_polys(FILE *file_to_read) {
    
    initialize_rabin_polynomial_defaults();
    
    struct rab_block_info *block=NULL;
    char *file_data=malloc(RAB_FILE_READ_BUF_SIZE);
    
    if(file_data == NULL) {
        fprintf(stderr,"Could not allocate buffer for reading input file to rabin polynomial.\n");
        return NULL;
    }
    
    ssize_t bytes_read=fread(file_data,1,RAB_FILE_READ_BUF_SIZE,file_to_read);
    
    while(bytes_read != 0) {
        block=read_rabin_block(file_data,bytes_read,block);
        bytes_read=fread(file_data,1,RAB_FILE_READ_BUF_SIZE,file_to_read);
    }
    
    free(file_data);
    struct rabin_polynomial *head=block->head;
    free(block);
    return head;
}
Exemple #4
0
int main (int argc, char *argv[]){

	//directory or disk device
	int devicetype=-1;
    //path to the device
	char devicepath[100];

	//path of databases folder
	int dbfolder=0;
	char dbfolderpath[100];

	
  	while ((argc > 1) && (argv[1][0] == '-'))
  	{
		switch (argv[1][1])
		{
			case 'f':
			//Test if -d is not being used also
			if(devicetype!=DEVICE)
				devicetype=FOLDER;
			else{
			   printf("Cannot use both -f and -d\n");
			   usage();
			}
			break;
			case 'd':
			//test if -f is not being used also
			if(devicetype!=FOLDER)
				devicetype=DEVICE;
			else{
			    printf("Cannot use both -f and -d\n\n");
			    usage();
			}
			break;
			case 'p':
				strcpy(devicepath,&argv[1][2]);
			break;
			case 'b':
				str_split(&argv[1][2],FIXEDBLOCKS);
				break;
			case 'r':
				str_split(&argv[1][2],RABIN);
				break;
			case 'z':
				dbfolder=1;
				strcpy(dbfolderpath,&argv[1][2]);
				break;
			case 'h':
				help();
				break;
			default:
				printf("Wrong Argument: %s\n", argv[1]);
				usage();
				exit(0);
				break;
			}

			++argv;
			--argc;
	}


	//test if iotype is defined
	if(devicetype!=FOLDER && devicetype!=DEVICE){
		printf("missing -f or -d\n\n");
		usage();
		exit(0);
	}
	//test if testype is defined
	if(strlen(devicepath)==0){
		printf("missing -p<value>\n\n");
		usage();
		exit(0);
	}
	if(nr_sizes_proc_rabin==0 && nr_sizes_proc==0){
		printf("missing -b<value> or -r<value>\n\n");
		usage();
		exit(0);
	}



	//Initialize variables
	total_blocks=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));
	eq=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));

  incomplete_blocks=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));
  incomplete_space=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));

	dif=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));
	distinctdup=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));
	//zeroed_blocks=malloc(sizeof(uint64_t)*nr_sizes_proc);
	space=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin));

	dbporiginal=malloc(sizeof(DB**)*(nr_sizes_proc+nr_sizes_proc_rabin));
	envporiginal=malloc(sizeof(DB_ENV**)*(nr_sizes_proc+nr_sizes_proc_rabin));

	dbprinter=malloc(sizeof(DB**)*(nr_sizes_proc+nr_sizes_proc_rabin));
	envprinter=malloc(sizeof(DB_ENV**)*(nr_sizes_proc+nr_sizes_proc_rabin));


	int aux=0;
	for(aux=0;aux<(nr_sizes_proc+nr_sizes_proc_rabin);aux++){

		dbporiginal[aux]=malloc(sizeof(DB *));
		envporiginal[aux]=malloc(sizeof(DB_ENV *));
		dbprinter[aux]=malloc(sizeof(DB *));
		envprinter[aux]=malloc(sizeof(DB_ENV *));

		char printdbpath[100];
		char duplicatedbpath[100];
		char sizeid[20];

    if(aux<nr_sizes_proc){
      sprintf(sizeid,"fixed_%d",sizes_proc[aux]);
    }else{
      sprintf(sizeid,"rabin_%d",sizes_proc_rabin[aux-nr_sizes_proc]);
    }


		//if a folder were specified for databases
		if(dbfolder==1){
			strcpy(printdbpath,PRINTDB);
			strcat(printdbpath,sizeid);
			strcpy(duplicatedbpath,dbfolderpath);
			strcat(duplicatedbpath,sizeid);
		}
		else{
			strcpy(printdbpath,PRINTDB);
			strcat(printdbpath,sizeid);
			strcpy(duplicatedbpath,DUPLICATEDB);
			strcat(duplicatedbpath,sizeid);
		}

		char mkcmd[200];
		sprintf(mkcmd, "mkdir -p %s", printdbpath);
		int ress = system(mkcmd);
		sprintf(mkcmd, "mkdir -p %s", duplicatedbpath);
		ress=system(mkcmd);
		if(ress<0)
	    	perror("Error creating folders for databases\n");


		printf("Removing old databases\n");
		//remove databases if exist
		remove_db(duplicatedbpath,dbporiginal[aux],envporiginal[aux]);
		remove_db(printdbpath,dbprinter[aux],envprinter[aux]);

		printf("Initing new database\n");
		init_db(duplicatedbpath,dbporiginal[aux],envporiginal[aux]);

	}


  //initialize analyzis variables
  bzero(incomplete_blocks,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
  bzero(incomplete_space,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
  

	//initialize analyzis variables
	bzero(total_blocks,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
	//identical chunks (that could be eliminated)
	bzero(eq,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
	//distinct chunks
	bzero(dif,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
	//distinct chunks with duplicates
	bzero(distinctdup,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));
	//chunks that were appended with zeros due to their size
	//bzero(zeroed_blocks,nr_sizes_proc*(sizeof(uint64_t)));
	//duplicated disk space
	bzero(space,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t)));

	//initialize rabin block
    initialize_rabin_polynomial_defaults();
    cur_block=malloc(sizeof(struct rab_block_info *)*nr_sizes_proc_rabin);
	int auxc=0;
	for(auxc=0;auxc<nr_sizes_proc_rabin;auxc++){
	  	  cur_block[auxc]=NULL;
	}	


	//check if it is a folder or device and start processing
	if(devicetype==FOLDER){
		printf("start processing folder %s\n",devicepath);
		search_dir(devicepath);
	}
	else{
		printf("start processing device %s\n",devicepath);
		extract_blocks(devicepath);
	}

	for(aux=0;aux<(nr_sizes_proc+nr_sizes_proc_rabin);aux++){

		if(aux>=nr_sizes_proc)
			fprintf(stderr,"\n\n\nRabin Results for %d\n",sizes_proc_rabin[aux-nr_sizes_proc]);
		else
			fprintf(stderr,"\n\n\nFixed Size Results for %d\n",sizes_proc[aux]);

		fprintf(stderr,"files scanned %llu\n",(unsigned long long int)nfiles);
    fprintf(stderr,"space scanned %llu Bytes (including incomplete blocks)\n",(unsigned long long int)total_space);
		fprintf(stderr,"Complete and Incomplete Block statistics:\ntotal blocks scanned %llu\n",(unsigned long long int)total_blocks[aux]);
		//fprintf(stderr,"total blocks with zeros appended %llu\n",(unsigned long long int)zeroed_blocks[aux]);
		//blocks without any duplicate are the distinct block minus the distinct blocks with duplicates
		uint64_t zerodups=dif[aux]-distinctdup[aux];
		fprintf(stderr,"blocks without duplicates %llu\n",(unsigned long long int)zerodups);
		fprintf(stderr,"distinct blocks with duplicates %llu\n",(unsigned long long int)distinctdup[aux]);
		fprintf(stderr,"duplicate blocks %llu\n",(unsigned long long int)eq[aux]);
		fprintf(stderr,"space saved %llu Bytes\n",(unsigned long long int)space[aux]);

    fprintf(stderr,"Incomplete Block statistics:\nincomplete blocks %llu\n",(unsigned long long int)incomplete_blocks[aux]);
    fprintf(stderr,"incomplete blocks space %llu Bytes\n",(unsigned long long int)incomplete_space[aux]);



		close_db(dbporiginal[aux],envporiginal[aux]);
		//TODO this is not removed to keep the database for dedisgen-utils
		//remove_db(duplicatedbpath,dbporiginal,envporiginal);
	}

	//free memory

  free(incomplete_space);
  free(incomplete_blocks);
	free(total_blocks);
	free(eq);
	free(dif);
	free(distinctdup);
	//free(zeroed_blocks);
	free(space);

	free(dbporiginal);
	free(envporiginal);
	free(dbprinter);
	free(envprinter);

return 0;

}