vector<ERL_NIF_TERM> LiberationCoding::doEncode(ERL_NIF_TERM dataBin) { int *bitmatrix = liberation_coding_bitmatrix(k, w); int **smart = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); char* dataBlocks[k]; char* codeBlocks[m]; ErlNifBinary data; enif_inspect_binary(env, dataBin, &data); size_t dataSize = data.size; size_t blockSize = roundTo((roundTo(dataSize, k*w) / (k*w)), 16) * w; size_t offset = 0; size_t remain = dataSize; int filled = 0; while(remain >= blockSize) { dataBlocks[filled] = (char*)data.data + offset; offset += blockSize; remain -= blockSize; filled++; } ErlNifBinary tmp; enif_alloc_binary((k + m - filled) * blockSize + 16, &tmp); size_t align = (((size_t)data.data & 0x0f) - ((size_t)tmp.data & 0x0f) + 16) & 0x0f; char* alignedHead = (char*)tmp.data + align; memcpy(alignedHead, data.data + filled * blockSize, dataSize - filled * blockSize); offset = 0; for(int i = filled; i < k + m; ++i, offset += blockSize) { (i < k) ? dataBlocks[i] = alignedHead + offset: codeBlocks[i - k] = alignedHead + offset; } jerasure_schedule_encode(k, m, w, smart, dataBlocks, codeBlocks, blockSize, blockSize / w); vector<ERL_NIF_TERM> blockList; for(int i = 0; i < filled; ++i) { blockList.push_back(enif_make_sub_binary(env, dataBin, i * blockSize, blockSize)); } ERL_NIF_TERM tmpBin = enif_make_binary(env, &tmp); offset = 0; for(int i = filled; i < k + m; ++i, offset += blockSize) { blockList.push_back(enif_make_sub_binary(env, tmpBin, offset + align, blockSize)); } jerasure_free_schedule(smart); free(bitmatrix); return blockList; }
int rain_encode_noalloc (struct rain_encoding_s *encoding, uint8_t **data, uint8_t **parity) { assert(encoding != NULL); // Prepare the jerasure structures int *bit_matrix=NULL, *matrix=NULL, **schedule=NULL; if (encoding->algo == JALG_liberation) { matrix = NULL; bit_matrix = liber8tion_coding_bitmatrix(encoding->k); schedule = jerasure_smart_bitmatrix_to_schedule(encoding->k, encoding->m, encoding->w, bit_matrix); } else if (encoding->algo == JALG_crs) { matrix = cauchy_good_general_coding_matrix( encoding->k, encoding->m, encoding->w); bit_matrix = jerasure_matrix_to_bitmatrix( encoding->k, encoding->m, encoding->w, matrix); schedule = jerasure_smart_bitmatrix_to_schedule( encoding->k, encoding->m, encoding->w, bit_matrix); } // Compute now ... damned, no return code to check jerasure_schedule_encode(encoding->k, encoding->m, encoding->w, schedule, (char**) data, (char**) parity, encoding->block_size, encoding->packet_size); if (schedule) jerasure_free_schedule(schedule); if (bit_matrix) free(bit_matrix); if (matrix) free(matrix); return 1; }
int decode_MSR_product_matrix_no_output(char **input, size_t input_size, int* erasures, struct coding_info *info) { clock_t clk, tclk; int i, j,c1,c2,tdone,inv; int n = info->req.n; int d = info->req.d; int k = info->req.k; int w = info->req.w; int subpacket_size = input_size/(d-k+1); int num_of_long = subpacket_size/sizeof(long); long *src_pos,*des_pos; int *vector_A=NULL; char **ptrs; int **decode_schedule=NULL; int *erased = NULL; int *bitmatrix_temp = malloc(sizeof(int)*(k-1)*(k-1)*w*w*4); char *data_transformed = malloc(input_size*k); // this is the buffer for tranformed data, i.e., matrix M. The output is the systematic part of // codingmatrix*M, and // we will regenerate the erased data from M using the encoding matrix, which will be written to *output. int *pseudo_erasures = malloc(sizeof(int)*(n*2)); // in order to recompute M, we view it as a systematic MDS code, // sometimes (n+k,k), sometimes (n+d-k+1,d-k+1), thus we over allocate char **M_ptrs = malloc(sizeof(void*)*d*(d-k+1)); // this is the pointer matrix to elements in M. char **data_ptrs = malloc(sizeof(void*)*n); char **coding_ptrs = malloc(sizeof(void*)*n); int* remaining = malloc(sizeof(int)*k); // not erased devices char *buffer1 = malloc(subpacket_size*k*(k-1)*2); // need subpacketsize*k>=(max(4,k-1)+k-1)*sizeof(int), thus put factor of 2 to guarentee it int *buffer1_int = (int*)buffer1; // alternative pointer for buffer1 char *buffer2 = malloc(subpacket_size*k*(k-1)); if(data_transformed==NULL||pseudo_erasures==NULL||data_ptrs==NULL ||coding_ptrs==NULL||buffer1==NULL||buffer2==NULL||M_ptrs==NULL||remaining==NULL){ printf("Can not allocate memory\n"); jerasure_free_schedule(decode_schedule); if(data_ptrs!=NULL)free(data_ptrs); if(coding_ptrs!=NULL)free(coding_ptrs); if(pseudo_erasures!=NULL)free(pseudo_erasures); if(data_transformed!=NULL)free(data_transformed); if(M_ptrs!=NULL)free(M_ptrs); if(buffer1!=NULL)free(buffer1); if(buffer2!=NULL)free(buffer2); if(remaining!=NULL)free(remaining); return(-1); } //set up pointers for matrix M // first k-1 rows, only have S1 for(i=0,c2=0;i<k-1;i++){ c1 = i*(d-k+1); for(j=i;j<k-1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=k-1;j<d-k+1;j++) M_ptrs[c1+j] = NULL; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[j*(d-k+1)+i]; // symmetric matrix, thus there is (i,j) and (j,i) point to the same pointer. } // next k-1 rows for(i=0;i<k-1;i++){ c1 = (k-1+i)*(d-k+1); for(j=i;j<d-k+1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i]; } // next 1 and (d-2k+1) rows, may not exist if(d>2*k-2) { c1 = (2*k-2)*(d-k+1); for(j=k-1;j<d-k+1;j++,c2++) M_ptrs[c1+j] = data_transformed + subpacket_size*c2; for(j=0;j<i;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+k-1]; for(i=2*k-1;i<d;i++){ c1 = i*(d-k+1); for(j=0;j<k;j++) M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i-k+1]; for(j=k;j<d-k+1;j++) M_ptrs[c1+j] = NULL; } } // first decode the last d-2k+1 columns of T and Z: view it as an (n+k,k) MDS code. Note strictly speaking this might // not be a real (n+k,k) MDS code, but it hardly matters. // before decoding operation, prepare for the pseudoerasure location array if(d>2*k-2){ // only when d>2k-2, T and Z exist for(i=0;i<k;i++) pseudo_erasures[i] = i; for(i=0;i<n;i++){ if(erasures[i]==-1){ pseudo_erasures[i+k] = -1; break; } pseudo_erasures[i+k] = erasures[i] + k; } // make the decoding schedule: we can save the trouble of manually generate this schedule, at the expense of // more computation decode_schedule = jerasure_generate_decoding_schedule(k, n, w, info->subbitmatrix_array[0], pseudo_erasures, 1); for(i=0;i<d-2*k+1;i++){ for(j=0;j<k;j++) data_ptrs[j] = M_ptrs[i+k+(d-k+1)*(k-1+j)]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+subpacket_size*(k+i); ptrs = set_up_ptrs_for_scheduled_decoding(k, n, pseudo_erasures, data_ptrs,coding_ptrs); if (ptrs == NULL){ printf("Can not allocate memory\n"); goto complete; } // assume packetsize = ALIGNMENT for (tdone = 0; tdone < subpacket_size; tdone += ALIGNMENT*w) { jerasure_do_scheduled_operations(ptrs, decode_schedule, ALIGNMENT); for (c1 = 0; c1 < k+n; c1++) ptrs[c1] += (ALIGNMENT*w); } free(ptrs); } //next decode the first column of T and Z: we view this as an (n+d-k+1,d-k+1) erasure codes // first setup the pseudo erasure location vector for(i=0;i<k;i++) pseudo_erasures[i] = i; // in the first columne of the Z matrix, the last d-2k+1 elements are known, but the first k elements are not for(i=0;i<n;i++) { if(erasures[i]==-1){ pseudo_erasures[i+k] = -1; break; } pseudo_erasures[i+k] = erasures[i]+d-k+1; } for(j=0;j<d-k+1;j++) data_ptrs[j] = M_ptrs[(d-k+1)*(k-1+j)+k-1]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+subpacket_size*(k-1); jerasure_schedule_decode_lazy(d-k+1,n,w, info->subbitmatrix_array[1],pseudo_erasures,data_ptrs, coding_ptrs,subpacket_size,ALIGNMENT,0); } //clk = clock(); // now this is the hard part: to decode S1 and S2. The algorithm used here is slightly different from that in the paper: // instead of right multiply \Phi_{DC}', we only right multiply the sub-matrix of \Phi_{DC}' without of the last row // setting up remaining device array to facilitate decoding erased = jerasure_erasures_to_erased(k, n-k, erasures); for(i=0,c1=0;i<n&&c1<k;i++){ if(erased[i]==0){ remaining[c1] = i; c1++; } } //compute C_{DC}-\Delta_{DC}*T' for(i=0;i<k-1;i++){ // has k-1 columns for(j=0;j<d-2*k+2;j++) data_ptrs[j] = M_ptrs[(2*k-2+j)*(d-k+1)+i]; for(j=0;j<k;j++) coding_ptrs[j] = buffer1+(j*(k-1)+i)*subpacket_size; for(j=0;j<k;j++){ jerasure_bitmatrix_dotprod(d-2*k+2, w, info->subbitmatrix_array[2]+remaining[j]*(d-2*k+2)*w*w, NULL, j+d-2*k+2, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); src_pos = (long*)(input[remaining[j]]+i*subpacket_size); des_pos = (long*)(coding_ptrs[j]); for(c2=0;c2<num_of_long;c2++) des_pos[c2] ^= src_pos[c2]; } } // right multiply \Phi_{DC}': this will be P. // result is in buffer2 for(j=0;j<k;j++){ //j-th row for(i=0;i<k-1;i++) data_ptrs[i] = buffer1+(j*(k-1)+i)*subpacket_size; for(i=0;i<k-1;i++) coding_ptrs[i] = buffer2 +(j*(k-1)+i)*subpacket_size; for(i=0;i<k-1;i++){ jerasure_bitmatrix_dotprod(k-1, w, info->subbitmatrix_array[3]+remaining[i]*(k-1)*w*w, NULL, i+k-1, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } } // now solve for the off-diagonal terms for(i=0;i<k-1;i++){ for(j=i+1;j<k-1;j++){ // solve for S1 tilde off-diagonal // here we directly use the fact that Lambda = [0 1 2 3 ....]; int** temp_schedule; inv = galois_single_divide(1,remaining[i]^remaining[j],w); buffer1_int[0] = buffer1_int[1] = inv; data_ptrs[0] = buffer2+(i*(k-1)+j)*subpacket_size; data_ptrs[1] = buffer2+(j*(k-1)+i)*subpacket_size; coding_ptrs[0] = M_ptrs[i*(d-k+1)+j]; jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp); temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp); jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT); if(temp_schedule!=NULL){ jerasure_free_schedule(temp_schedule); temp_schedule = NULL; } // solve for S2 tilde off-diagonal buffer1_int[0] = galois_single_multiply(remaining[j],inv,w); buffer1_int[1] = galois_single_multiply(remaining[i],inv,w); coding_ptrs[0] = M_ptrs[(i+k-1)*(d-k+1)+j]; jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp); temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp); jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT); if(temp_schedule!=NULL){ jerasure_free_schedule(temp_schedule); temp_schedule = NULL; } } } //tclk = clock()-clk; //printf("~S1 and ~S2 off-diagonal decoded %.3e clocks \n", (double)tclk); //clk = clock(); // compute the A vector: A*\Phi_{DC1} = \Phi_{DC2}, note this is always possible because \Phi_{DC1} is alway full rank by construction // we first reuse buffer1 here to form \Phi_{DC1} matrix and then the compute its inverse for(i=0;i<k-1;i++){ memcpy(buffer1_int+(k-1)*(k-1+i),(void*)(info->matrix+remaining[i]*d+k-1),(k-1)*sizeof(int)); } jerasure_invert_matrix(buffer1_int+(k-1)*(k-1),buffer1_int,k-1,w); vector_A = jerasure_matrix_multiply(info->matrix+remaining[k-1]*d+k-1,buffer1_int,1,k-1,k-1,k-1,w); if(vector_A==NULL) goto complete; for(i=0;i<k-1;i++){ buffer1_int[i+(k-1)*(k-1)] = galois_single_multiply(vector_A[i],remaining[k-1],w); buffer1_int[i+k*(k-1)] = vector_A[i]; } for(i=0;i<k-1;i++){ memset(buffer1_int+(k-1)*(k+1),0,sizeof(int)*2*(k-1)); buffer1_int[(k-1)*(k+1)+i] = remaining[i]; buffer1_int[(k-1)*(k+2)+i] = 1; pseudo_erasures[0] = i; pseudo_erasures[1] = k-1+i; pseudo_erasures[2] = -1; for(j=0;j<2*k-2;j++) data_ptrs[j] = M_ptrs[i+j*(d-k+1)]; coding_ptrs[0] = buffer2+((k-1)*(k-1)+i)*subpacket_size; coding_ptrs[1] = buffer2+(i*(k-1)+i)*subpacket_size; jerasure_matrix_to_bitmatrix_noallocate(2*k-2,2,w,buffer1_int+(k-1)*(k-1),bitmatrix_temp); jerasure_schedule_decode_lazy(2*k-2,2,w,bitmatrix_temp,pseudo_erasures,data_ptrs,coding_ptrs,subpacket_size,ALIGNMENT,0); } //tclk = clock()-clk; //printf("~S1 and ~S2 decoded %.3e clocks \n", (double)tclk); // now we have both \tilde{S_1} and \tilde{S_2} in M_ptrs, need to recover S1 and S2 from them // this is done by multiply \tilde{S_1} left and right by inv(\Phi_{DC1}). // right-multiply for S1 int* bitmatrix_inv = jerasure_matrix_to_bitmatrix(k-1,k-1,w,buffer1_int); int** inv_schedule = jerasure_smart_bitmatrix_to_schedule(k-1, k-1, w, bitmatrix_inv); // right-multiply for S1 for(i=0;i<k-1;i++){ for(j=0;j<k-1;j++) data_ptrs[j] = M_ptrs[i*(d-k+1)+j]; for(j=0;j<k-1;j++) coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // left-multiply for S1 for(j=0;j<k-1;j++){ for(i=0;i<k-1;i++) data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size; for(i=0;i<k-1;i++) coding_ptrs[i] = M_ptrs[i*(d-k+1)+j]; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // right-multiply for S2 for(i=0;i<k-1;i++){ for(j=0;j<k-1;j++) data_ptrs[j] = M_ptrs[(i+k-1)*(d-k+1)+j]; for(j=0;j<k-1;j++) coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // left-multiply for S2 for(j=0;j<k-1;j++){ for(i=0;i<k-1;i++) data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size; //for(i=j;i<k-1;i++) for(i=0;i<k-1;i++) coding_ptrs[i] = M_ptrs[(i+k-1)*(d-k+1)+j]; jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT); } // having S1,S2,T, now can also fill the first k-1 column of the output for(i=0;i<k-1;i++){ for(j=0;j<d;j++) data_ptrs[j] = M_ptrs[j*(d-k+1)+i]; for(j=0;j<n;j++) coding_ptrs[j] = input[j]+i*subpacket_size; for(j=0;j<n;j++){ if(erased[j]==1) jerasure_bitmatrix_encode(d,1,w,info->bitmatrix+(j*d*w*w),data_ptrs,coding_ptrs+j,subpacket_size,ALIGNMENT); } } // clean up complete: if(decode_schedule) jerasure_free_schedule(decode_schedule); if(inv_schedule) jerasure_free_schedule(inv_schedule); free(data_ptrs); free(coding_ptrs); if(erased)free(erased); free(pseudo_erasures); free(data_transformed); free(M_ptrs); free(buffer1); free(buffer2); free(remaining); free(bitmatrix_temp); if(vector_A!=NULL)free(vector_A); return(1); }