/********************* * * * APIs * * * *********************/ int jerasure_matrix_decode_data(int k, int m, int w, int *matrix, int row_k_ones, int *erasures, char **data_ptrs, char **coding_ptrs, int size) { int i, edd, lastdrive; int *tmpids; int *erased, *decoding_matrix, *dm_ids; if (w != 8 && w != 16 && w != 32) return -1; erased = jerasure_erasures_to_erased(k, m, erasures); if (erased == NULL) return -1; /* Find the number of data drives failed */ lastdrive = k; edd = 0; for (i = 0; i < k; i++) { if (erased[i]) { edd++; lastdrive = i; } } /* You only need to create the decoding matrix in the following cases: 1. edd > 0 and row_k_ones is false. 2. edd > 0 and row_k_ones is true and coding device 0 has been erased. 3. edd > 1 We're going to use lastdrive to denote when to stop decoding data. At this point in the code, it is equal to the last erased data device. However, if we can't use the parity row to decode it (i.e. row_k_ones=0 or erased[k] = 1, we're going to set it to k so that the decoding pass will decode all data. */ if (!row_k_ones || erased[k]) lastdrive = k; dm_ids = NULL; decoding_matrix = NULL; if (edd > 1 || (edd > 0 && (!row_k_ones || erased[k]))) { dm_ids = talloc(int, k); if (dm_ids == NULL) { free(erased); return -1; } decoding_matrix = talloc(int, k*k); if (decoding_matrix == NULL) { free(erased); free(dm_ids); return -1; } if (jerasure_make_decoding_matrix(k, m, w, matrix, erased, decoding_matrix, dm_ids) < 0) { free(erased); free(dm_ids); free(decoding_matrix); return -1; } }
int decode_MSR_product_matrix_no_output(char **input, size_t input_size, int* erasures, struct coding_info *info) { clock_t clk, tclk; int i, j,c1,c2,tdone,inv; int n = info->req.n; int d = info->req.d; int k = info->req.k; int w = info->req.w; int subpacket_size = input_size/(d-k+1); int num_of_long = subpacket_size/sizeof(long); long *src_pos,*des_pos; int *vector_A=NULL; char **ptrs; int **decode_schedule=NULL; int *erased = NULL; int *bitmatrix_temp = malloc(sizeof(int)*(k-1)*(k-1)*w*w*4); char *data_transformed = malloc(input_size*k); // this is the buffer for tranformed data, i.e., matrix M. The output is the systematic part of // codingmatrix*M, and // we will regenerate the erased data from M using the encoding matrix, which will be written to *output. int *pseudo_erasures = malloc(sizeof(int)*(n*2)); // in order to recompute M, we view it as a systematic MDS code, // sometimes (n+k,k), sometimes (n+d-k+1,d-k+1), thus we over allocate char **M_ptrs = malloc(sizeof(void*)*d*(d-k+1)); // this is the pointer matrix to elements in M. char **data_ptrs = malloc(sizeof(void*)*n); char **coding_ptrs = malloc(sizeof(void*)*n); int* remaining = malloc(sizeof(int)*k); // not erased devices char *buffer1 = malloc(subpacket_size*k*(k-1)*2); // need subpacketsize*k>=(max(4,k-1)+k-1)*sizeof(int), thus put factor of 2 to guarentee it int *buffer1_int = (int*)buffer1; // alternative pointer for buffer1 char *buffer2 = malloc(subpacket_size*k*(k-1)); if(data_transformed==NULL||pseudo_erasures==NULL||data_ptrs==NULL ||coding_ptrs==NULL||buffer1==NULL||buffer2==NULL||M_ptrs==NULL||remaining==NULL){ printf("Can not allocate memory\n"); jerasure_free_schedule(decode_schedule); if(data_ptrs!=NULL)free(data_ptrs); if(coding_ptrs!=NULL)free(coding_ptrs); if(pseudo_erasures!=NULL)free(pseudo_erasures); if(data_transformed!=NULL)free(data_transformed); if(M_ptrs!=NULL)free(M_ptrs); if(buffer1!=NULL)free(buffer1); if(buffer2!=NULL)free(buffer2); if(remaining!=NULL)free(remaining); return(-1); } //set up pointers for matrix M // first k-1 rows, only have S1 for(i=0,c2=0;i<k-1;i++){ c1 = i*(d-k+1); for(j=i;j<k-1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=k-1;j<d-k+1;j++) M_ptrs[c1+j] = NULL; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[j*(d-k+1)+i]; // symmetric matrix, thus there is (i,j) and (j,i) point to the same pointer. } // next k-1 rows for(i=0;i<k-1;i++){ c1 = (k-1+i)*(d-k+1); for(j=i;j<d-k+1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i]; } // next 1 and (d-2k+1) rows, may not exist if(d>2*k-2) { c1 = (2*k-2)*(d-k+1); for(j=k-1;j<d-k+1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+k-1]; for(i=2*k-1;i<d;i++){ c1 = i*(d-k+1); for(j=0;j<k;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i-k+1]; for(j=k;j<d-k+1;j++) M_ptrs[c1+j] = NULL; } } // first decode the last d-2k+1 columns of T and Z: view it as an (n+k,k) MDS code. Note strictly speaking this might // not be a real (n+k,k) MDS code, but it hardly matters. // before decoding operation, prepare for the pseudoerasure location array if(d>2*k-2){ // only when d>2k-2, T and Z exist for(i=0;i<k;i++) pseudo_erasures[i] = i; for(i=0;i<n;i++){ if(erasures[i]==-1){ pseudo_erasures[i+k] = -1; break; } pseudo_erasures[i+k] = erasures[i] + k; } // make the decoding schedule: we can save the trouble of manually generate this schedule, at the expense of // more computation decode_schedule = jerasure_generate_decoding_schedule(k, n, w, info->subbitmatrix_array[0], pseudo_erasures, 1); for(i=0;i<d-2*k+1;i++){ for(j=0;j<k;j++) data_ptrs[j] = M_ptrs[i+k+(d-k+1)*(k-1+j)]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+subpacket_size*(k+i); ptrs = set_up_ptrs_for_scheduled_decoding(k, n, pseudo_erasures, data_ptrs,coding_ptrs); if (ptrs == NULL){ printf("Can not allocate memory\n"); goto complete; } // assume packetsize = ALIGNMENT for (tdone = 0; tdone < subpacket_size; tdone += ALIGNMENT*w) { jerasure_do_scheduled_operations(ptrs, decode_schedule, ALIGNMENT); for (c1 = 0; c1 < k+n; c1++) ptrs[c1] += (ALIGNMENT*w); } free(ptrs); } //next decode the first column of T and Z: we view this as an (n+d-k+1,d-k+1) erasure codes // first setup the pseudo erasure location vector for(i=0;i<k;i++) pseudo_erasures[i] = i; // in the first columne of the Z matrix, the last d-2k+1 elements are known, but the first k elements are not for(i=0;i<n;i++) { if(erasures[i]==-1){ pseudo_erasures[i+k] = -1; break; } pseudo_erasures[i+k] = erasures[i]+d-k+1; } for(j=0;j<d-k+1;j++) data_ptrs[j] = M_ptrs[(d-k+1)*(k-1+j)+k-1]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+subpacket_size*(k-1); jerasure_schedule_decode_lazy(d-k+1,n,w, info->subbitmatrix_array[1],pseudo_erasures,data_ptrs, coding_ptrs,subpacket_size,ALIGNMENT,0); } //clk = clock(); // now this is the hard part: to decode S1 and S2. The algorithm used here is slightly different from that in the paper: // instead of right multiply \Phi_{DC}', we only right multiply the sub-matrix of \Phi_{DC}' without of the last row // setting up remaining device array to facilitate decoding erased = jerasure_erasures_to_erased(k, n-k, erasures); for(i=0,c1=0;i<n&&c1<k;i++){ if(erased[i]==0){ remaining[c1] = i; c1++; } } //compute C_{DC}-\Delta_{DC}*T' for(i=0;i<k-1;i++){ // has k-1 columns for(j=0;j<d-2*k+2;j++) data_ptrs[j] = M_ptrs[(2*k-2+j)*(d-k+1)+i]; for(j=0;j<k;j++) coding_ptrs[j] = buffer1+(j*(k-1)+i)*subpacket_size; for(j=0;j<k;j++){ jerasure_bitmatrix_dotprod(d-2*k+2, w, info->subbitmatrix_array[2]+remaining[j]*(d-2*k+2)*w*w, NULL, j+d-2*k+2, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); src_pos = (long*)(input[remaining[j]]+i*subpacket_size); des_pos = (long*)(coding_ptrs[j]); for(c2=0;c2<num_of_long;c2++) des_pos[c2] ^= src_pos[c2]; } } // right multiply \Phi_{DC}': this will be P. // result is in buffer2 for(j=0;j<k;j++){ //j-th row for(i=0;i<k-1;i++) data_ptrs[i] = buffer1+(j*(k-1)+i)*subpacket_size; for(i=0;i<k-1;i++) coding_ptrs[i] = buffer2 +(j*(k-1)+i)*subpacket_size; for(i=0;i<k-1;i++){ jerasure_bitmatrix_dotprod(k-1, w, info->subbitmatrix_array[3]+remaining[i]*(k-1)*w*w, NULL, i+k-1, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } } // now solve for the off-diagonal terms for(i=0;i<k-1;i++){ for(j=i+1;j<k-1;j++){ // solve for S1 tilde off-diagonal // here we directly use the fact that Lambda = [0 1 2 3 ....]; int** temp_schedule; inv = galois_single_divide(1,remaining[i]^remaining[j],w); buffer1_int[0] = buffer1_int[1] = inv; data_ptrs[0] = buffer2+(i*(k-1)+j)*subpacket_size; data_ptrs[1] = buffer2+(j*(k-1)+i)*subpacket_size; coding_ptrs[0] = M_ptrs[i*(d-k+1)+j]; jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp); temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp); jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT); if(temp_schedule!=NULL){ jerasure_free_schedule(temp_schedule); temp_schedule = NULL; } // solve for S2 tilde off-diagonal buffer1_int[0] = galois_single_multiply(remaining[j],inv,w); buffer1_int[1] = galois_single_multiply(remaining[i],inv,w); coding_ptrs[0] = M_ptrs[(i+k-1)*(d-k+1)+j]; jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp); temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp); jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT); if(temp_schedule!=NULL){ jerasure_free_schedule(temp_schedule); temp_schedule = NULL; } } } //tclk = clock()-clk; //printf("~S1 and ~S2 off-diagonal decoded %.3e clocks \n", (double)tclk); //clk = clock(); // compute the A vector: A*\Phi_{DC1} = \Phi_{DC2}, note this is always possible because \Phi_{DC1} is alway full rank by construction // we first reuse buffer1 here to form \Phi_{DC1} matrix and then the compute its inverse for(i=0;i<k-1;i++){ memcpy(buffer1_int+(k-1)*(k-1+i),(void*)(info->matrix+remaining[i]*d+k-1),(k-1)*sizeof(int)); } jerasure_invert_matrix(buffer1_int+(k-1)*(k-1),buffer1_int,k-1,w); vector_A = jerasure_matrix_multiply(info->matrix+remaining[k-1]*d+k-1,buffer1_int,1,k-1,k-1,k-1,w); if(vector_A==NULL) goto complete; for(i=0;i<k-1;i++){ buffer1_int[i+(k-1)*(k-1)] = galois_single_multiply(vector_A[i],remaining[k-1],w); buffer1_int[i+k*(k-1)] = vector_A[i]; } for(i=0;i<k-1;i++){ memset(buffer1_int+(k-1)*(k+1),0,sizeof(int)*2*(k-1)); buffer1_int[(k-1)*(k+1)+i] = remaining[i]; buffer1_int[(k-1)*(k+2)+i] = 1; pseudo_erasures[0] = i; pseudo_erasures[1] = k-1+i; pseudo_erasures[2] = -1; for(j=0;j<2*k-2;j++) data_ptrs[j] = M_ptrs[i+j*(d-k+1)]; coding_ptrs[0] = buffer2+((k-1)*(k-1)+i)*subpacket_size; coding_ptrs[1] = buffer2+(i*(k-1)+i)*subpacket_size; jerasure_matrix_to_bitmatrix_noallocate(2*k-2,2,w,buffer1_int+(k-1)*(k-1),bitmatrix_temp); jerasure_schedule_decode_lazy(2*k-2,2,w,bitmatrix_temp,pseudo_erasures,data_ptrs,coding_ptrs,subpacket_size,ALIGNMENT,0); } //tclk = clock()-clk; //printf("~S1 and ~S2 decoded %.3e clocks \n", (double)tclk); // now we have both \tilde{S_1} and \tilde{S_2} in M_ptrs, need to recover S1 and S2 from them // this is done by multiply \tilde{S_1} left and right by inv(\Phi_{DC1}). // right-multiply for S1 int* bitmatrix_inv = jerasure_matrix_to_bitmatrix(k-1,k-1,w,buffer1_int); int** inv_schedule = jerasure_smart_bitmatrix_to_schedule(k-1, k-1, w, bitmatrix_inv); // right-multiply for S1 for(i=0;i<k-1;i++){ for(j=0;j<k-1;j++) data_ptrs[j] = M_ptrs[i*(d-k+1)+j]; for(j=0;j<k-1;j++) coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // left-multiply for S1 for(j=0;j<k-1;j++){ for(i=0;i<k-1;i++) data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size; for(i=0;i<k-1;i++) coding_ptrs[i] = M_ptrs[i*(d-k+1)+j]; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // right-multiply for S2 for(i=0;i<k-1;i++){ for(j=0;j<k-1;j++) data_ptrs[j] = M_ptrs[(i+k-1)*(d-k+1)+j]; for(j=0;j<k-1;j++) coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // left-multiply for S2 for(j=0;j<k-1;j++){ for(i=0;i<k-1;i++) data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size; //for(i=j;i<k-1;i++) for(i=0;i<k-1;i++) coding_ptrs[i] = M_ptrs[(i+k-1)*(d-k+1)+j]; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // having S1,S2,T, now can also fill the first k-1 column of the output for(i=0;i<k-1;i++){ for(j=0;j<d;j++) data_ptrs[j] = M_ptrs[j*(d-k+1)+i]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+i*subpacket_size; for(j=0;j<n;j++){ if(erased[j]==1) jerasure_bitmatrix_encode(d,1,w,info->bitmatrix+(j*d*w*w),data_ptrs,coding_ptrs+j,subpacket_size,ALIGNMENT); } } // clean up complete: if(decode_schedule) jerasure_free_schedule(decode_schedule); if(inv_schedule) jerasure_free_schedule(inv_schedule); free(data_ptrs); free(coding_ptrs); if(erased)free(erased); free(pseudo_erasures); free(data_transformed); free(M_ptrs); free(buffer1); free(buffer2); free(remaining); free(bitmatrix_temp); if(vector_A!=NULL)free(vector_A); return(1); }