示例#1
0
/*********************
 *                   *
 *       APIs        *
 *                   *
 *********************/
int jerasure_matrix_decode_data(int k, int m, int w, int *matrix, int row_k_ones, int *erasures,
        char **data_ptrs, char **coding_ptrs, int size)
{
    int i, edd, lastdrive;
    int *tmpids;
    int *erased, *decoding_matrix, *dm_ids;

    if (w != 8 && w != 16 && w != 32) return -1;

    erased = jerasure_erasures_to_erased(k, m, erasures);
    if (erased == NULL) return -1;

    /* Find the number of data drives failed */

    lastdrive = k;

    edd = 0;
    for (i = 0; i < k; i++) {
        if (erased[i]) {
            edd++;
            lastdrive = i;
        }
    }

    /* You only need to create the decoding matrix in the following cases:

       1. edd > 0 and row_k_ones is false.
       2. edd > 0 and row_k_ones is true and coding device 0 has been erased.
       3. edd > 1

       We're going to use lastdrive to denote when to stop decoding data.
       At this point in the code, it is equal to the last erased data device.
       However, if we can't use the parity row to decode it (i.e. row_k_ones=0
       or erased[k] = 1, we're going to set it to k so that the decoding
       pass will decode all data.
       */

    if (!row_k_ones || erased[k]) lastdrive = k;

    dm_ids = NULL;
    decoding_matrix = NULL;

    if (edd > 1 || (edd > 0 && (!row_k_ones || erased[k]))) {
        dm_ids = talloc(int, k);
        if (dm_ids == NULL) {
            free(erased);
            return -1;
        }

        decoding_matrix = talloc(int, k*k);
        if (decoding_matrix == NULL) {
            free(erased);
            free(dm_ids);
            return -1;
        }

        if (jerasure_make_decoding_matrix(k, m, w, matrix, erased, decoding_matrix, dm_ids) < 0) {
            free(erased);
            free(dm_ids);
            free(decoding_matrix);
            return -1;
        }
    }
int decode_MSR_product_matrix_no_output(char **input, size_t input_size, int* erasures, struct coding_info *info)
{
	clock_t clk, tclk;
	int i, j,c1,c2,tdone,inv;
	int n = info->req.n;
	int d = info->req.d;
	int k = info->req.k;
	int w = info->req.w;
	int subpacket_size = input_size/(d-k+1);
	int num_of_long = subpacket_size/sizeof(long);	
	long *src_pos,*des_pos;
	int *vector_A=NULL;
 	char **ptrs;
	int **decode_schedule=NULL;
	int *erased = NULL;	
	int *bitmatrix_temp = malloc(sizeof(int)*(k-1)*(k-1)*w*w*4);
	char *data_transformed = malloc(input_size*k);    // this is the buffer for tranformed data, i.e., matrix M. The output is the systematic part of 
							  // codingmatrix*M, and 
					                  // we will regenerate the erased data from M using the encoding matrix, which will be written to *output.
	int *pseudo_erasures = malloc(sizeof(int)*(n*2));  // in order to recompute M, we view it as a systematic MDS code, 
							  // sometimes (n+k,k), sometimes (n+d-k+1,d-k+1), thus we over allocate
	char **M_ptrs = malloc(sizeof(void*)*d*(d-k+1));  // this is the pointer matrix to elements in M.		
	char **data_ptrs = malloc(sizeof(void*)*n);
	char **coding_ptrs = malloc(sizeof(void*)*n);
	int* remaining = malloc(sizeof(int)*k);           // not erased devices
	char *buffer1 = malloc(subpacket_size*k*(k-1)*2);  // need subpacketsize*k>=(max(4,k-1)+k-1)*sizeof(int), thus put factor of 2 to guarentee it
	int *buffer1_int = (int*)buffer1;                  // alternative pointer for buffer1
	char *buffer2 = malloc(subpacket_size*k*(k-1));

	if(data_transformed==NULL||pseudo_erasures==NULL||data_ptrs==NULL
		||coding_ptrs==NULL||buffer1==NULL||buffer2==NULL||M_ptrs==NULL||remaining==NULL){
		printf("Can not allocate memory\n");
		jerasure_free_schedule(decode_schedule);
		if(data_ptrs!=NULL)free(data_ptrs);
		if(coding_ptrs!=NULL)free(coding_ptrs);
		if(pseudo_erasures!=NULL)free(pseudo_erasures);
		if(data_transformed!=NULL)free(data_transformed);
		if(M_ptrs!=NULL)free(M_ptrs);	
		if(buffer1!=NULL)free(buffer1);
		if(buffer2!=NULL)free(buffer2);
		if(remaining!=NULL)free(remaining);
		return(-1);
	}
	//set up pointers for matrix M
	// first k-1 rows, only have S1
	for(i=0,c2=0;i<k-1;i++){
		c1 = i*(d-k+1);
		for(j=i;j<k-1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=k-1;j<d-k+1;j++)
			M_ptrs[c1+j] = NULL;			
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[j*(d-k+1)+i]; // symmetric matrix, thus there is (i,j) and (j,i) point to the same pointer.
	}
	// next k-1 rows
	for(i=0;i<k-1;i++){
		c1 = (k-1+i)*(d-k+1);
		for(j=i;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i];		
	}
	// next 1 and (d-2k+1) rows, may not exist
	if(d>2*k-2)
	{
		c1 = (2*k-2)*(d-k+1);
		for(j=k-1;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+k-1];			
		for(i=2*k-1;i<d;i++){
			c1 = i*(d-k+1);
			for(j=0;j<k;j++)
				M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i-k+1];
			for(j=k;j<d-k+1;j++)
				M_ptrs[c1+j] = NULL;
		}
	}
	
        // first decode the last d-2k+1 columns of T and Z: view it as an (n+k,k) MDS code. Note strictly speaking this might 
	// not be a real (n+k,k) MDS code, but it hardly matters.
	// before decoding operation, prepare for the pseudoerasure location array
	if(d>2*k-2){ // only when d>2k-2, T and Z exist
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i;
		for(i=0;i<n;i++){
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i] + k;		
		}

		// make the decoding schedule: we can save the trouble of manually generate this schedule, at the expense of
		// more computation
		decode_schedule = jerasure_generate_decoding_schedule(k, n, w, info->subbitmatrix_array[0], pseudo_erasures, 1);

		for(i=0;i<d-2*k+1;i++){
			for(j=0;j<k;j++)
				data_ptrs[j] = M_ptrs[i+k+(d-k+1)*(k-1+j)];		
			for(j=0;j<n;j++)
				coding_ptrs[j] = input[j]+subpacket_size*(k+i);
			ptrs = set_up_ptrs_for_scheduled_decoding(k, n, pseudo_erasures, data_ptrs,coding_ptrs);
		  	if (ptrs == NULL){
				printf("Can not allocate memory\n");
				goto complete;
			}
			// assume packetsize = ALIGNMENT
			for (tdone = 0; tdone < subpacket_size; tdone += ALIGNMENT*w) {
				jerasure_do_scheduled_operations(ptrs, decode_schedule, ALIGNMENT);
				for (c1 = 0; c1 < k+n; c1++) ptrs[c1] += (ALIGNMENT*w);
			}
			free(ptrs);
		} 
		//next decode the first column of T and Z: we view this as an (n+d-k+1,d-k+1) erasure codes
		// first setup the pseudo erasure location vector	
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i; // in the first columne of the Z matrix, the last d-2k+1 elements are known, but the first k elements are not
		for(i=0;i<n;i++)
		{
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i]+d-k+1;		
		}
		for(j=0;j<d-k+1;j++)
			data_ptrs[j] = M_ptrs[(d-k+1)*(k-1+j)+k-1];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+subpacket_size*(k-1);
		jerasure_schedule_decode_lazy(d-k+1,n,w,
					info->subbitmatrix_array[1],pseudo_erasures,data_ptrs,
					coding_ptrs,subpacket_size,ALIGNMENT,0);
	}
	//clk = clock();

	// now this is the hard part: to decode S1 and S2. The algorithm used here is slightly different from that in the paper:
	// instead of right multiply \Phi_{DC}', we only right multiply the sub-matrix of \Phi_{DC}' without of the last row

	// setting up remaining device array to facilitate decoding
	erased = jerasure_erasures_to_erased(k, n-k, erasures);
	for(i=0,c1=0;i<n&&c1<k;i++){
		if(erased[i]==0){
			remaining[c1] = i;
			c1++;
		}	
	}	
	//compute C_{DC}-\Delta_{DC}*T'
	for(i=0;i<k-1;i++){ // has k-1 columns
		for(j=0;j<d-2*k+2;j++)
			data_ptrs[j] = M_ptrs[(2*k-2+j)*(d-k+1)+i];
		for(j=0;j<k;j++)
			coding_ptrs[j] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(j=0;j<k;j++){
			jerasure_bitmatrix_dotprod(d-2*k+2, w, info->subbitmatrix_array[2]+remaining[j]*(d-2*k+2)*w*w, NULL, j+d-2*k+2,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
			src_pos = (long*)(input[remaining[j]]+i*subpacket_size);
			des_pos	= (long*)(coding_ptrs[j]);
			for(c2=0;c2<num_of_long;c2++)	
				des_pos[c2] ^= src_pos[c2];
		}	
	}
	
	// right multiply \Phi_{DC}': this will be P. 
	// result is in buffer2
	for(j=0;j<k;j++){ //j-th row
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = buffer2 +(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++){
			jerasure_bitmatrix_dotprod(k-1, w, info->subbitmatrix_array[3]+remaining[i]*(k-1)*w*w, NULL, i+k-1,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
		}
	}
	// now solve for the off-diagonal terms
	for(i=0;i<k-1;i++){
		for(j=i+1;j<k-1;j++){
			// solve for S1 tilde off-diagonal 
			// here we directly use the fact that Lambda = [0 1 2 3 ....];	
			int** temp_schedule;			
			inv = galois_single_divide(1,remaining[i]^remaining[j],w);					
			buffer1_int[0] = buffer1_int[1] = inv;			
			data_ptrs[0] = buffer2+(i*(k-1)+j)*subpacket_size;
			data_ptrs[1] = buffer2+(j*(k-1)+i)*subpacket_size;
			coding_ptrs[0] = M_ptrs[i*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
			// solve for S2 tilde off-diagonal
			buffer1_int[0] = galois_single_multiply(remaining[j],inv,w);
			buffer1_int[1] = galois_single_multiply(remaining[i],inv,w);
			coding_ptrs[0] = M_ptrs[(i+k-1)*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
		}	
	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 off-diagonal decoded %.3e clocks \n", (double)tclk);	
	//clk = clock();
	// compute the A vector: A*\Phi_{DC1} = \Phi_{DC2}, note this is always possible because \Phi_{DC1} is alway full rank by construction
	// we first reuse buffer1 here to form \Phi_{DC1} matrix and then the compute its inverse
	for(i=0;i<k-1;i++){
		memcpy(buffer1_int+(k-1)*(k-1+i),(void*)(info->matrix+remaining[i]*d+k-1),(k-1)*sizeof(int));
	}

	jerasure_invert_matrix(buffer1_int+(k-1)*(k-1),buffer1_int,k-1,w);	
	vector_A = jerasure_matrix_multiply(info->matrix+remaining[k-1]*d+k-1,buffer1_int,1,k-1,k-1,k-1,w);	
	if(vector_A==NULL)
		goto complete;	
	
	for(i=0;i<k-1;i++){
		buffer1_int[i+(k-1)*(k-1)] = galois_single_multiply(vector_A[i],remaining[k-1],w);		
		buffer1_int[i+k*(k-1)] = vector_A[i];
	}		

	for(i=0;i<k-1;i++){
		memset(buffer1_int+(k-1)*(k+1),0,sizeof(int)*2*(k-1));
		buffer1_int[(k-1)*(k+1)+i] = remaining[i];
		buffer1_int[(k-1)*(k+2)+i] = 1;

		pseudo_erasures[0] = i;
		pseudo_erasures[1] = k-1+i;
		pseudo_erasures[2] = -1;
		for(j=0;j<2*k-2;j++)
			data_ptrs[j] = M_ptrs[i+j*(d-k+1)];
		coding_ptrs[0] = buffer2+((k-1)*(k-1)+i)*subpacket_size;
		coding_ptrs[1] = buffer2+(i*(k-1)+i)*subpacket_size;

		jerasure_matrix_to_bitmatrix_noallocate(2*k-2,2,w,buffer1_int+(k-1)*(k-1),bitmatrix_temp);				
		jerasure_schedule_decode_lazy(2*k-2,2,w,bitmatrix_temp,pseudo_erasures,data_ptrs,coding_ptrs,subpacket_size,ALIGNMENT,0);
      	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 decoded %.3e clocks \n", (double)tclk);	
	// now we have both \tilde{S_1} and \tilde{S_2} in M_ptrs, need to recover S1 and S2 from them
	// this is done by multiply \tilde{S_1} left and right by inv(\Phi_{DC1}).
	// right-multiply for S1 
	int* bitmatrix_inv = jerasure_matrix_to_bitmatrix(k-1,k-1,w,buffer1_int);
	int** inv_schedule = jerasure_smart_bitmatrix_to_schedule(k-1, k-1, w, bitmatrix_inv);

	// right-multiply for S1
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[i*(d-k+1)+j];		
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;	
		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);	
	}
	// left-multiply for S1 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[i*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);

	}
	// right-multiply for S2
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[(i+k-1)*(d-k+1)+j];
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// left-multiply for S2 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		//for(i=j;i<k-1;i++)
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[(i+k-1)*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// having S1,S2,T, now can also fill the first k-1 column of the output		
	for(i=0;i<k-1;i++){
		for(j=0;j<d;j++)
			data_ptrs[j] = M_ptrs[j*(d-k+1)+i];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+i*subpacket_size;
		for(j=0;j<n;j++){
			if(erased[j]==1)
				jerasure_bitmatrix_encode(d,1,w,info->bitmatrix+(j*d*w*w),data_ptrs,coding_ptrs+j,subpacket_size,ALIGNMENT);
		}
	}

	// clean up
complete:
	if(decode_schedule)
		jerasure_free_schedule(decode_schedule);
	if(inv_schedule)
		jerasure_free_schedule(inv_schedule);
	free(data_ptrs);
	free(coding_ptrs);
	if(erased)free(erased);
	free(pseudo_erasures);
	free(data_transformed);
	free(M_ptrs);	
	free(buffer1);
	free(buffer2);
	free(remaining);
	free(bitmatrix_temp);
	if(vector_A!=NULL)free(vector_A);
	return(1);
}