/** * Allocates an empty block */ struct rab_block_info *init_empty_block() { initialize_rabin_polynomial_defaults(); struct rab_block_info *block=malloc(sizeof(struct rab_block_info)); if(block == NULL) { fprintf(stderr,"Could not allocate rabin polynomial block, no memory left!\n"); return NULL; } block->head=gen_new_polynomial(NULL,0,0,0); if(block->head == NULL) return NULL; //Couldn't allocate memory block->tail=block->head; block->cur_roll_checksum=0; block->total_bytes_read=0; block->window_pos=0; block->current_poly_finished=0; block->current_window_data=malloc(sizeof(char)*rabin_sliding_window_size); if(block->current_window_data == NULL) { fprintf(stderr,"Could not allocate buffer for sliding window data!\n"); free(block); return NULL; } int i; for(i=0;i<rabin_sliding_window_size;i++) { block->current_window_data[i]=0; } return block; }
/** * Initalizes the algorithm with the provided paramters */ int initialize_rabin_polynomial(uint64_t prime, unsigned max_size, unsigned int min_size, unsigned int average_block_size) { rabin_polynomial_prime=prime; rabin_polynomial_max_block_size=max_size; rabin_polynomial_min_block_size=min_size; rabin_polynomial_average_block_size=average_block_size; return initialize_rabin_polynomial_defaults(); }
struct rabin_polynomial *get_file_rabin_polys(FILE *file_to_read) { initialize_rabin_polynomial_defaults(); struct rab_block_info *block=NULL; char *file_data=malloc(RAB_FILE_READ_BUF_SIZE); if(file_data == NULL) { fprintf(stderr,"Could not allocate buffer for reading input file to rabin polynomial.\n"); return NULL; } ssize_t bytes_read=fread(file_data,1,RAB_FILE_READ_BUF_SIZE,file_to_read); while(bytes_read != 0) { block=read_rabin_block(file_data,bytes_read,block); bytes_read=fread(file_data,1,RAB_FILE_READ_BUF_SIZE,file_to_read); } free(file_data); struct rabin_polynomial *head=block->head; free(block); return head; }
int main (int argc, char *argv[]){ //directory or disk device int devicetype=-1; //path to the device char devicepath[100]; //path of databases folder int dbfolder=0; char dbfolderpath[100]; while ((argc > 1) && (argv[1][0] == '-')) { switch (argv[1][1]) { case 'f': //Test if -d is not being used also if(devicetype!=DEVICE) devicetype=FOLDER; else{ printf("Cannot use both -f and -d\n"); usage(); } break; case 'd': //test if -f is not being used also if(devicetype!=FOLDER) devicetype=DEVICE; else{ printf("Cannot use both -f and -d\n\n"); usage(); } break; case 'p': strcpy(devicepath,&argv[1][2]); break; case 'b': str_split(&argv[1][2],FIXEDBLOCKS); break; case 'r': str_split(&argv[1][2],RABIN); break; case 'z': dbfolder=1; strcpy(dbfolderpath,&argv[1][2]); break; case 'h': help(); break; default: printf("Wrong Argument: %s\n", argv[1]); usage(); exit(0); break; } ++argv; --argc; } //test if iotype is defined if(devicetype!=FOLDER && devicetype!=DEVICE){ printf("missing -f or -d\n\n"); usage(); exit(0); } //test if testype is defined if(strlen(devicepath)==0){ printf("missing -p<value>\n\n"); usage(); exit(0); } if(nr_sizes_proc_rabin==0 && nr_sizes_proc==0){ printf("missing -b<value> or -r<value>\n\n"); usage(); exit(0); } //Initialize variables total_blocks=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); eq=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); incomplete_blocks=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); incomplete_space=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); dif=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); distinctdup=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); //zeroed_blocks=malloc(sizeof(uint64_t)*nr_sizes_proc); space=malloc(sizeof(uint64_t)*(nr_sizes_proc+nr_sizes_proc_rabin)); dbporiginal=malloc(sizeof(DB**)*(nr_sizes_proc+nr_sizes_proc_rabin)); envporiginal=malloc(sizeof(DB_ENV**)*(nr_sizes_proc+nr_sizes_proc_rabin)); dbprinter=malloc(sizeof(DB**)*(nr_sizes_proc+nr_sizes_proc_rabin)); envprinter=malloc(sizeof(DB_ENV**)*(nr_sizes_proc+nr_sizes_proc_rabin)); int aux=0; for(aux=0;aux<(nr_sizes_proc+nr_sizes_proc_rabin);aux++){ dbporiginal[aux]=malloc(sizeof(DB *)); envporiginal[aux]=malloc(sizeof(DB_ENV *)); dbprinter[aux]=malloc(sizeof(DB *)); envprinter[aux]=malloc(sizeof(DB_ENV *)); char printdbpath[100]; char duplicatedbpath[100]; char sizeid[20]; if(aux<nr_sizes_proc){ sprintf(sizeid,"fixed_%d",sizes_proc[aux]); }else{ sprintf(sizeid,"rabin_%d",sizes_proc_rabin[aux-nr_sizes_proc]); } //if a folder were specified for databases if(dbfolder==1){ strcpy(printdbpath,PRINTDB); strcat(printdbpath,sizeid); strcpy(duplicatedbpath,dbfolderpath); strcat(duplicatedbpath,sizeid); } else{ strcpy(printdbpath,PRINTDB); strcat(printdbpath,sizeid); strcpy(duplicatedbpath,DUPLICATEDB); strcat(duplicatedbpath,sizeid); } char mkcmd[200]; sprintf(mkcmd, "mkdir -p %s", printdbpath); int ress = system(mkcmd); sprintf(mkcmd, "mkdir -p %s", duplicatedbpath); ress=system(mkcmd); if(ress<0) perror("Error creating folders for databases\n"); printf("Removing old databases\n"); //remove databases if exist remove_db(duplicatedbpath,dbporiginal[aux],envporiginal[aux]); remove_db(printdbpath,dbprinter[aux],envprinter[aux]); printf("Initing new database\n"); init_db(duplicatedbpath,dbporiginal[aux],envporiginal[aux]); } //initialize analyzis variables bzero(incomplete_blocks,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); bzero(incomplete_space,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //initialize analyzis variables bzero(total_blocks,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //identical chunks (that could be eliminated) bzero(eq,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //distinct chunks bzero(dif,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //distinct chunks with duplicates bzero(distinctdup,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //chunks that were appended with zeros due to their size //bzero(zeroed_blocks,nr_sizes_proc*(sizeof(uint64_t))); //duplicated disk space bzero(space,(nr_sizes_proc+nr_sizes_proc_rabin)*(sizeof(uint64_t))); //initialize rabin block initialize_rabin_polynomial_defaults(); cur_block=malloc(sizeof(struct rab_block_info *)*nr_sizes_proc_rabin); int auxc=0; for(auxc=0;auxc<nr_sizes_proc_rabin;auxc++){ cur_block[auxc]=NULL; } //check if it is a folder or device and start processing if(devicetype==FOLDER){ printf("start processing folder %s\n",devicepath); search_dir(devicepath); } else{ printf("start processing device %s\n",devicepath); extract_blocks(devicepath); } for(aux=0;aux<(nr_sizes_proc+nr_sizes_proc_rabin);aux++){ if(aux>=nr_sizes_proc) fprintf(stderr,"\n\n\nRabin Results for %d\n",sizes_proc_rabin[aux-nr_sizes_proc]); else fprintf(stderr,"\n\n\nFixed Size Results for %d\n",sizes_proc[aux]); fprintf(stderr,"files scanned %llu\n",(unsigned long long int)nfiles); fprintf(stderr,"space scanned %llu Bytes (including incomplete blocks)\n",(unsigned long long int)total_space); fprintf(stderr,"Complete and Incomplete Block statistics:\ntotal blocks scanned %llu\n",(unsigned long long int)total_blocks[aux]); //fprintf(stderr,"total blocks with zeros appended %llu\n",(unsigned long long int)zeroed_blocks[aux]); //blocks without any duplicate are the distinct block minus the distinct blocks with duplicates uint64_t zerodups=dif[aux]-distinctdup[aux]; fprintf(stderr,"blocks without duplicates %llu\n",(unsigned long long int)zerodups); fprintf(stderr,"distinct blocks with duplicates %llu\n",(unsigned long long int)distinctdup[aux]); fprintf(stderr,"duplicate blocks %llu\n",(unsigned long long int)eq[aux]); fprintf(stderr,"space saved %llu Bytes\n",(unsigned long long int)space[aux]); fprintf(stderr,"Incomplete Block statistics:\nincomplete blocks %llu\n",(unsigned long long int)incomplete_blocks[aux]); fprintf(stderr,"incomplete blocks space %llu Bytes\n",(unsigned long long int)incomplete_space[aux]); close_db(dbporiginal[aux],envporiginal[aux]); //TODO this is not removed to keep the database for dedisgen-utils //remove_db(duplicatedbpath,dbporiginal,envporiginal); } //free memory free(incomplete_space); free(incomplete_blocks); free(total_blocks); free(eq); free(dif); free(distinctdup); //free(zeroed_blocks); free(space); free(dbporiginal); free(envporiginal); free(dbprinter); free(envprinter); return 0; }