Exemplo n.º 1
0
static int
Rabin_dealloc(Rabin* self)
{
  if (self->block != NULL)
    free_rabin_fingerprint_list(self->block->head);
  free(self->block);
  Py_XDECREF(self->callback);
  return 0;
}
Exemplo n.º 2
0
static PyObject*
Rabin_clear(Rabin* self)
{
  if (self->block != NULL) {
    free_rabin_fingerprint_list(self->block->head);
    free(self->block);
    self->block = NULL;
  }
  Py_INCREF(Py_None);
  return Py_None;
}
Exemplo n.º 3
0
void rabinChunking(){

	// CONSTANTS: (ESTIMATES FOR SAM)
	// RABIN WINDOW - 31 BYTES
	// RABIN AVG BLOCK - 8KB (8192 bytes)
	// RABIN TEMP MIN - 4KB (4096 bytes)
	// RABIN TEMP MAX - 16KB (16384 bytes)

	// [x] make chunk directory
	// [x] read files
	// [x] rabin algorithm
    // [x] chunk
    // [x] save chunks in a (hidden) folder w/in containers
    // [x] SHA-1 function to compute for chunk IDs
	
	// [x] determine primary fingerprint // HOW TO GET MINIMUM HASH?!?!? -- get minimum value. durr.
    // [x] primary/representative IDs must contain pointers to bin
	// [x] bin contains chunk ID, chunk sizes, and chunk addresses (.txt file, maybe?)
    // (one bin per file!)
    // [x] determine secondary fingerprint
    // [x] dump into disk
	// [x] piggyback small files

	
	char concat[300],**chunknames;
	struct files *temp;
	FILE *fin,*fout;
	int i;
	
	temp = root;
	
	rabin_polynomial_min_block_size = 4096;
	rabin_polynomial_max_block_size = 16384;
	rabin_polynomial_average_block_size = 8192;
	
	
	
	// loop through each file to get fingerprint
	for(i=0; i<file_count; i++){
		// create chunk storage directory if it does not exist yet
		if(chunk_storage_exists == 0){
			if(PLATFORM == 0){
				strcpy(concat,"md .\\"); //////////edit later file_agent_dir_system
			}
			else{
				strcpy(concat,"mkdir ./");
			}
			strcat(concat,chunk_storage_dir);
			system(concat);
			memset(concat, 0, sizeof concat);
		}
		
		// read a particular file
		printf("file read: %s\n",temp->filename);
		fin = fopen(temp->filename, "rb+"); // made a code change here!!! initially r+ :)
		if(fin == NULL) {
			fprintf(stderr, "Cannot read file %s!\n",temp->filename);
		}
		
		// rabin algorithm + chunking
		struct rabin_polynomial *head = get_file_rabin_polys(fin);
		fclose(fin);
		free_rabin_fingerprint_list(head);
		
		// prints bin in stdout
		fin = fopen(temp->filename, "rb");
		
		strcpy(concat,file_agent_dir);
		strcat(concat,chunk_storage_dir);
		strcat(concat,"/");
		
	//	printf("concat: %s\n",concat);
        chunknames = print_rabin_poly_list_to_file(fout,head,concat,fin); // [] modify to return array of chunknames per 1 file!!! , number of chunks == sizeof / length
		
		printf("MINDEX = %d\nMIN HASH = %s\nchunkcount = %d\n",mindex,minimum_sha,chunkcount);
    
    
	// EXTREME BINNING
        
		// piggyback small files; no need to bin
		int cc = chunkcount + 1;
		if(cc <= 2){
			char holder[100];
			strcpy(holder,chunknames[0]);
			if(cc == 2){
				strcat(holder,"_");
				strcat(holder,chunknames[1]);
			}
			strcpy(temp->chunkID,holder);
		}
		// normal files; must bin
		else{
			FILE *bin;
			int k = 0;
			
			strcpy(temp->chunkID,minimum_sha);
			
			strcat(concat,"__bin__");
		//	strcat(concat,temp->soloname);
		//	strcat(concat,"__");
			strcat(concat,temp->chunkID);
			strcat(concat,".bin");
		//	printf("concat: %s\n",concat);
			bin = fopen(concat, "w");

			
			fprintf(bin,"%s\n",temp->filename);
			for(k=0;k<cc;k++){
				if(strcmp(temp->chunkID,chunknames[k])){ // if not primary
					fprintf(bin,"%s\n",chunknames[k]); //might add chunk size later? (but it's pretty useless though)
					//printf("%d: %s\n",k,chunknames[k]); //might add chunk size later? (but it's pretty useless though)

				//	printf("hoeheoheohoheoheohoho %d\n",cc);
				}
			}
			fclose(bin);
		}
    
    
	
		// progress to next file
		temp = temp->ptr;
		memset(concat, 0, sizeof concat);
    
    
		
	}
	
	
}