static int Rabin_dealloc(Rabin* self) { if (self->block != NULL) free_rabin_fingerprint_list(self->block->head); free(self->block); Py_XDECREF(self->callback); return 0; }
static PyObject* Rabin_clear(Rabin* self) { if (self->block != NULL) { free_rabin_fingerprint_list(self->block->head); free(self->block); self->block = NULL; } Py_INCREF(Py_None); return Py_None; }
void rabinChunking(){ // CONSTANTS: (ESTIMATES FOR SAM) // RABIN WINDOW - 31 BYTES // RABIN AVG BLOCK - 8KB (8192 bytes) // RABIN TEMP MIN - 4KB (4096 bytes) // RABIN TEMP MAX - 16KB (16384 bytes) // [x] make chunk directory // [x] read files // [x] rabin algorithm // [x] chunk // [x] save chunks in a (hidden) folder w/in containers // [x] SHA-1 function to compute for chunk IDs // [x] determine primary fingerprint // HOW TO GET MINIMUM HASH?!?!? -- get minimum value. durr. // [x] primary/representative IDs must contain pointers to bin // [x] bin contains chunk ID, chunk sizes, and chunk addresses (.txt file, maybe?) // (one bin per file!) // [x] determine secondary fingerprint // [x] dump into disk // [x] piggyback small files char concat[300],**chunknames; struct files *temp; FILE *fin,*fout; int i; temp = root; rabin_polynomial_min_block_size = 4096; rabin_polynomial_max_block_size = 16384; rabin_polynomial_average_block_size = 8192; // loop through each file to get fingerprint for(i=0; i<file_count; i++){ // create chunk storage directory if it does not exist yet if(chunk_storage_exists == 0){ if(PLATFORM == 0){ strcpy(concat,"md .\\"); //////////edit later file_agent_dir_system } else{ strcpy(concat,"mkdir ./"); } strcat(concat,chunk_storage_dir); system(concat); memset(concat, 0, sizeof concat); } // read a particular file printf("file read: %s\n",temp->filename); fin = fopen(temp->filename, "rb+"); // made a code change here!!! initially r+ :) if(fin == NULL) { fprintf(stderr, "Cannot read file %s!\n",temp->filename); } // rabin algorithm + chunking struct rabin_polynomial *head = get_file_rabin_polys(fin); fclose(fin); free_rabin_fingerprint_list(head); // prints bin in stdout fin = fopen(temp->filename, "rb"); strcpy(concat,file_agent_dir); strcat(concat,chunk_storage_dir); strcat(concat,"/"); // printf("concat: %s\n",concat); chunknames = print_rabin_poly_list_to_file(fout,head,concat,fin); // [] modify to return array of chunknames per 1 file!!! , number of chunks == sizeof / length printf("MINDEX = %d\nMIN HASH = %s\nchunkcount = %d\n",mindex,minimum_sha,chunkcount); // EXTREME BINNING // piggyback small files; no need to bin int cc = chunkcount + 1; if(cc <= 2){ char holder[100]; strcpy(holder,chunknames[0]); if(cc == 2){ strcat(holder,"_"); strcat(holder,chunknames[1]); } strcpy(temp->chunkID,holder); } // normal files; must bin else{ FILE *bin; int k = 0; strcpy(temp->chunkID,minimum_sha); strcat(concat,"__bin__"); // strcat(concat,temp->soloname); // strcat(concat,"__"); strcat(concat,temp->chunkID); strcat(concat,".bin"); // printf("concat: %s\n",concat); bin = fopen(concat, "w"); fprintf(bin,"%s\n",temp->filename); for(k=0;k<cc;k++){ if(strcmp(temp->chunkID,chunknames[k])){ // if not primary fprintf(bin,"%s\n",chunknames[k]); //might add chunk size later? (but it's pretty useless though) //printf("%d: %s\n",k,chunknames[k]); //might add chunk size later? (but it's pretty useless though) // printf("hoeheoheohoheoheohoho %d\n",cc); } } fclose(bin); } // progress to next file temp = temp->ptr; memset(concat, 0, sizeof concat); } }