示例#1
0
void cauchy_improve_coding_matrix(int k, int m, int w, int *matrix)
{
    int index, i, j, x;
    int tmp;
    int bno, tno, bno_index;

    for (j = 0; j < k; j++)
    {
        if (matrix[j] != 1)
        {
            tmp = galois_single_divide(1, matrix[j], w);
            index = j;
            for (i = 0; i < m; i++)
            {
                matrix[index] = galois_single_multiply(matrix[index], tmp, w);
                index += k;
            }
        }
    }
    for (i = 1; i < m; i++)
    {
        bno = 0;
        index = i*k;
        for (j = 0; j < k; j++) bno += cauchy_n_ones(matrix[index+j], w);
        bno_index = -1;
        for (j = 0; j < k; j++)
        {
            if (matrix[index+j] != 1)
            {
                tmp = galois_single_divide(1, matrix[index+j], w);
                tno = 0;
                for (x = 0; x < k; x++)
                {
                    tno += cauchy_n_ones(galois_single_multiply(matrix[index+x], tmp, w), w);
                }
                if (tno < bno)
                {
                    bno = tno;
                    bno_index = j;
                }
            }
        }
        if (bno_index != -1)
        {
            tmp = galois_single_divide(1, matrix[index+bno_index], w);
            for (j = 0; j < k; j++)
            {
                matrix[index+j] = galois_single_multiply(matrix[index+j], tmp, w);
            }
        }
    }
}
示例#2
0
void evaluate_error(unsigned int *error_magnitudes, 
			unsigned int *error_locator_poly_derivative, 
			unsigned int *error_evaluator_poly, 
			unsigned int *locators, 
			int length_of_locators){
	int i = 0, j = 0, k = 0;
	unsigned int poly_evaluation = 0;
	unsigned int power = 1;
	unsigned int inverse_locator = 0 ;
	int temp = 0;// temp is the power of locator[i], also the error location,
	//ranges from 1 to N 

	for(i = 0; i < length_of_locators; i++){
		inverse_locator = galois_inverse(locators[i], w);
		//nominator computing
		poly_evaluation = 0;		
		for(j = N - 1; j >= K - 1; j--){//?
			power = 1;
			for(k = 0; k < N - 1 - j; k++)
				power = galois_single_multiply(power, inverse_locator, w);
			poly_evaluation ^= galois_single_multiply(power, error_evaluator_poly[j], w); 
		//	printf("test %d	\n", poly_evaluation);
		}		
		error_magnitudes[i] = galois_single_multiply(poly_evaluation, locators[i], w);
		//denominator computing
		poly_evaluation = 0;
		for(j = (N - K)/2; j >= 0; j--){
			power = 1;
			for(k = 0; k < (N - K)/2 - j; k++)
				power = galois_single_multiply(power, inverse_locator, w);
			poly_evaluation ^= galois_single_multiply(power, error_locator_poly_derivative[j], w); 
		}
		// error magnitudes  computing		
		error_magnitudes[i] = galois_single_divide(error_magnitudes[i], poly_evaluation, w);
		//if decode GRS, then we will have to divide error_magnitudes by parity
		//check matrix's corresponding column multipliers, otherwise we have to
		//comment this function or set multipliers vector to all ones.
		temp = galois_log(locators[i], w);

		error_magnitudes[i] = galois_single_divide(error_magnitudes[i],
		                                            multiplier[temp], w);
	}
	printf("The error magnitudes are:\n");
	for(i = 0; i < length_of_locators; i ++){
		printf("%d	", error_magnitudes[i]);
	}
	printf("\n");
}
示例#3
0
文件: jerasure_01.c 项目: AleksMx/qfs
int main(int argc, char **argv)
{
  int r, c, w, i, n;
  int *matrix;

  if (argc != 4) usage(NULL);
  if (sscanf(argv[1], "%d", &r) == 0 || r <= 0) usage("Bad r");
  if (sscanf(argv[2], "%d", &c) == 0 || c <= 0) usage("Bad c");
  if (sscanf(argv[3], "%d", &w) == 0 || w <= 0) usage("Bad w");

  matrix = talloc(int, r*c);

  n = 1;
  for (i = 0; i < r*c; i++) {
    matrix[i] = n;
    n = galois_single_multiply(n, 2, w);
  }

  printf("<HTML><TITLE>jerasure_01");
  for (i = 1; i < argc; i++) printf(" %s", argv[i]);
  printf("</TITLE>\n");
  printf("<h3>jerasure_01");
  for (i = 1; i < argc; i++) printf(" %s", argv[i]);
  printf("</h3>\n");
  printf("<pre>\n");

  jerasure_print_matrix(matrix, r, c, w);
  return 0;
}
void matrix_multiply(int* product, int *m1, int *m2, int r1, int c1, int r2, int c2, int w)
{
  int i, j, k, l;
  for (i = 0; i < r1*c2; i++) product[i] = 0;

  for (i = 0; i < r1; i++) {
    for (j = 0; j < c2; j++) {
      for (k = 0; k < r2; k++) {
        product[i*c2+j] ^= galois_single_multiply(m1[i*c1+k], m2[k*c2+j], w);
      }
    }
  }
}
示例#5
0
int cauchy_n_ones(int n, int w)
{
    int no;
    int cno;
    int nones;
    int i, j;
    int highbit;

    highbit = (1 << (w-1));

    if (PPs[w] == -1)
    {
        nones = 0;
        PPs[w] = galois_single_multiply(highbit, 2, w);
        for (i = 0; i < w; i++)
        {
            if (PPs[w] & (1 << i))
            {
                ONEs[w][nones] = (1 << i);
                nones++;
            }
        }
        NOs[w] = nones;
    }

    no = 0;
    for (i = 0; i < w; i++) if (n & (1 << i)) no++;
    cno = no;
    for (i = 1; i < w; i++)
    {
        if (n & highbit)
        {
            n ^= highbit;
            n <<= 1;
            n ^= PPs[w];
            cno--;
            for (j = 0; j < NOs[w]; j++)
            {
                cno += (n & ONEs[w][j]) ? 1 : -1;
            }
        }
        else
        {
            n <<= 1;
        }
        no += cno;
    }
    return no;
}
示例#6
0
//when compute the syndrome, codeword should be in format of c0c1c2..., other
//than the polynomial reverse format
//Syndrome = [s1, s2, ..., sr]
unsigned int *compute_syndrome(unsigned int *syndrome, unsigned int *received_codeword){
	unsigned int sum = 0;
	int i = 0, j = 0;
	
	get_parity_check_matrix();
	for(i = 0; i< N - K; i++){
		sum = 0;
		for(j = 0; j < N; j++){
			sum ^= galois_single_multiply(parity_check_matrix[i][j], received_codeword[j], w);
		}
		syndrome[i] = sum;
	}
	printf("The syndrome is:\n");
	for(i = 0; i < N - K; i++)
		printf("%d	", syndrome[i]);
	printf("\n");
	return syndrome;
}
示例#7
0
void get_parity_check_matrix(){
	int *galois_ilog_table = NULL;
	int i = 0, j = 0;
	galois_ilog_table = galois_get_ilog_table(w);
	for(j = 0; j < N - K; j++){
		if(j == 0)
			for(i = 0; i < N; i++)
				parity_check_matrix[j][i] = galois_ilog_table[i];
		else
			for(i = 0; i < N; i++)
				parity_check_matrix[j][i] = galois_single_multiply(parity_check_matrix[j - 1][i], galois_ilog_table[i], w);	
	}
/*	printf("The parity check matrix is:\n");
	for(i = 0; i < N - K; i++){
		for(j = 0; j < N; j++){
			printf("%d	", parity_check_matrix[i][j]);
		}
		printf("\n");
	}*/
}
示例#8
0
int main(int argc, char **argv)
{
  int r, c, w, i, n;
  int *matrix;

  if (argc != 4) usage(NULL);
  if (sscanf(argv[1], "%d", &r) == 0 || r <= 0) usage("Bad r");
  if (sscanf(argv[2], "%d", &c) == 0 || c <= 0) usage("Bad c");
  if (sscanf(argv[3], "%d", &w) == 0 || w <= 0) usage("Bad w");

  matrix = talloc(int, r*c);

  n = 1;
  for (i = 0; i < r*c; i++) {
    matrix[i] = n;
    n = galois_single_multiply(n, 2, w);
  }

  jerasure_print_matrix(matrix, r, c, w);
  return 0;
}
示例#9
0
main(int argc, char **argv)
{
  unsigned int x, y, w;

  if (argc != 4) {
    fprintf(stderr, "usage: galois_mult x y w - does multiplication in GF(2^w)\n");
    exit(1);
  }

  sscanf(argv[1], "%u", &x);
  sscanf(argv[2], "%u", &y);
  w = atoi(argv[3]);

  if (w < 1 || w > 32) { fprintf(stderr, "Bad w\n"); exit(1); }

  if (w < 32 && x >= (1 << w)) { fprintf(stderr, "x must be in [0,%d]\n", (1 << w)-1); exit(1); }
  if (w < 32 && y >= (1 << w)) { fprintf(stderr, "y must be in [0,%d]\n", (1 << w)-1); exit(1); }

  printf("%u\n", galois_single_multiply(x, y, w));
  exit(0);
}
示例#10
0
/*
* Class:     eu_vandertil_jerasure_jni_Galois
* Method:    galois_single_multiply
* Signature: (III)I
*/
JNIEXPORT jint JNICALL Java_eu_vandertil_jerasure_jni_Galois_galois_1single_1multiply
	(JNIEnv *env, jclass clazz, jint a, jint b, jint w)
{
	return galois_single_multiply(a, b, w);
}
void rref(int *mat, int rows, int cols, int w)
{
  int i, j, k, x, rs2;
  int row_start, tmp, inverse;

  int r, c;
  r = 0;
  c = 0;

  /* First -- convert into upper triangular  */
  while (r < rows && c < cols)
  {
    row_start = cols*r;

    /*
      Swap rows if we have a zero r,c element.
      If we can't swap, move onto next col.
    */

    if (mat[row_start+c] == 0) {
      for (i = r+1; i < rows && mat[cols*i+c] == 0; i++) ;
      if (i == rows) {
        c++;
        continue;
      }
      rs2 = i*cols;
      for (k = 0; k < cols; k++) {
        tmp = mat[row_start+k];
        mat[row_start+k] = mat[rs2+k];
        mat[rs2+k] = tmp;
      }
    }

    /* Multiply the row by 1/element r,c  */
    tmp = mat[row_start+c];
    if (tmp != 1) {
      inverse = galois_single_divide(1, tmp, w);
      for (j = 0; j < cols; j++) {  //TODO(all): can we start i at c?
        mat[row_start+j] = galois_single_multiply(mat[row_start+j], inverse, w);
      }
    }

    /* Now for each row i > r, subtract A_ic*Ar from Ai  */
    k = row_start+c;
    for (i = r+1; i < rows; i++) {
      k += cols; //mat[k] == mat[i, c]
      if (mat[k] != 0) {
        if (mat[k] == 1) {
          rs2 = cols*i;
          for (x = 0; x < cols; x++) { //TODO(all): x can be started at c?
            mat[rs2+x] ^= mat[row_start+x];
          }
        } else {
          tmp = mat[k];
          rs2 = cols*i;
          for (x = 0; x < cols; x++) { //TODO(all): x can be started at c?
            mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w);
          }
        }
      }
    }

    r++;
    c++;
  }

  /*
    Now the matrix is upper triangular.
    Back-substitute.
  */

  for (i = rows-1; i >= 0; i--) {
    row_start = i*cols;

    // find pivot in row i
    int k;
    for (k = i; k < cols && mat[row_start + k] == 0; k++) ;
    if (k == cols)
      continue;

    // substitute into row j
    for (j = 0; j < i; j++) {
      rs2 = j*cols;

      if (mat[rs2+k] != 0) {
        tmp = mat[rs2+k];
        for (x = 0; x < cols; x++) {
          mat[rs2+x] ^= galois_single_multiply(tmp, mat[row_start+x], w);
        }
      }
    }
  }

}
int decode_MSR_product_matrix_no_output(char **input, size_t input_size, int* erasures, struct coding_info *info)
{
	clock_t clk, tclk;
	int i, j,c1,c2,tdone,inv;
	int n = info->req.n;
	int d = info->req.d;
	int k = info->req.k;
	int w = info->req.w;
	int subpacket_size = input_size/(d-k+1);
	int num_of_long = subpacket_size/sizeof(long);	
	long *src_pos,*des_pos;
	int *vector_A=NULL;
 	char **ptrs;
	int **decode_schedule=NULL;
	int *erased = NULL;	
	int *bitmatrix_temp = malloc(sizeof(int)*(k-1)*(k-1)*w*w*4);
	char *data_transformed = malloc(input_size*k);    // this is the buffer for tranformed data, i.e., matrix M. The output is the systematic part of 
							  // codingmatrix*M, and 
					                  // we will regenerate the erased data from M using the encoding matrix, which will be written to *output.
	int *pseudo_erasures = malloc(sizeof(int)*(n*2));  // in order to recompute M, we view it as a systematic MDS code, 
							  // sometimes (n+k,k), sometimes (n+d-k+1,d-k+1), thus we over allocate
	char **M_ptrs = malloc(sizeof(void*)*d*(d-k+1));  // this is the pointer matrix to elements in M.		
	char **data_ptrs = malloc(sizeof(void*)*n);
	char **coding_ptrs = malloc(sizeof(void*)*n);
	int* remaining = malloc(sizeof(int)*k);           // not erased devices
	char *buffer1 = malloc(subpacket_size*k*(k-1)*2);  // need subpacketsize*k>=(max(4,k-1)+k-1)*sizeof(int), thus put factor of 2 to guarentee it
	int *buffer1_int = (int*)buffer1;                  // alternative pointer for buffer1
	char *buffer2 = malloc(subpacket_size*k*(k-1));

	if(data_transformed==NULL||pseudo_erasures==NULL||data_ptrs==NULL
		||coding_ptrs==NULL||buffer1==NULL||buffer2==NULL||M_ptrs==NULL||remaining==NULL){
		printf("Can not allocate memory\n");
		jerasure_free_schedule(decode_schedule);
		if(data_ptrs!=NULL)free(data_ptrs);
		if(coding_ptrs!=NULL)free(coding_ptrs);
		if(pseudo_erasures!=NULL)free(pseudo_erasures);
		if(data_transformed!=NULL)free(data_transformed);
		if(M_ptrs!=NULL)free(M_ptrs);	
		if(buffer1!=NULL)free(buffer1);
		if(buffer2!=NULL)free(buffer2);
		if(remaining!=NULL)free(remaining);
		return(-1);
	}
	//set up pointers for matrix M
	// first k-1 rows, only have S1
	for(i=0,c2=0;i<k-1;i++){
		c1 = i*(d-k+1);
		for(j=i;j<k-1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=k-1;j<d-k+1;j++)
			M_ptrs[c1+j] = NULL;			
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[j*(d-k+1)+i]; // symmetric matrix, thus there is (i,j) and (j,i) point to the same pointer.
	}
	// next k-1 rows
	for(i=0;i<k-1;i++){
		c1 = (k-1+i)*(d-k+1);
		for(j=i;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i];		
	}
	// next 1 and (d-2k+1) rows, may not exist
	if(d>2*k-2)
	{
		c1 = (2*k-2)*(d-k+1);
		for(j=k-1;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+k-1];			
		for(i=2*k-1;i<d;i++){
			c1 = i*(d-k+1);
			for(j=0;j<k;j++)
				M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i-k+1];
			for(j=k;j<d-k+1;j++)
				M_ptrs[c1+j] = NULL;
		}
	}
	
        // first decode the last d-2k+1 columns of T and Z: view it as an (n+k,k) MDS code. Note strictly speaking this might 
	// not be a real (n+k,k) MDS code, but it hardly matters.
	// before decoding operation, prepare for the pseudoerasure location array
	if(d>2*k-2){ // only when d>2k-2, T and Z exist
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i;
		for(i=0;i<n;i++){
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i] + k;		
		}

		// make the decoding schedule: we can save the trouble of manually generate this schedule, at the expense of
		// more computation
		decode_schedule = jerasure_generate_decoding_schedule(k, n, w, info->subbitmatrix_array[0], pseudo_erasures, 1);

		for(i=0;i<d-2*k+1;i++){
			for(j=0;j<k;j++)
				data_ptrs[j] = M_ptrs[i+k+(d-k+1)*(k-1+j)];		
			for(j=0;j<n;j++)
				coding_ptrs[j] = input[j]+subpacket_size*(k+i);
			ptrs = set_up_ptrs_for_scheduled_decoding(k, n, pseudo_erasures, data_ptrs,coding_ptrs);
		  	if (ptrs == NULL){
				printf("Can not allocate memory\n");
				goto complete;
			}
			// assume packetsize = ALIGNMENT
			for (tdone = 0; tdone < subpacket_size; tdone += ALIGNMENT*w) {
				jerasure_do_scheduled_operations(ptrs, decode_schedule, ALIGNMENT);
				for (c1 = 0; c1 < k+n; c1++) ptrs[c1] += (ALIGNMENT*w);
			}
			free(ptrs);
		} 
		//next decode the first column of T and Z: we view this as an (n+d-k+1,d-k+1) erasure codes
		// first setup the pseudo erasure location vector	
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i; // in the first columne of the Z matrix, the last d-2k+1 elements are known, but the first k elements are not
		for(i=0;i<n;i++)
		{
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i]+d-k+1;		
		}
		for(j=0;j<d-k+1;j++)
			data_ptrs[j] = M_ptrs[(d-k+1)*(k-1+j)+k-1];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+subpacket_size*(k-1);
		jerasure_schedule_decode_lazy(d-k+1,n,w,
					info->subbitmatrix_array[1],pseudo_erasures,data_ptrs,
					coding_ptrs,subpacket_size,ALIGNMENT,0);
	}
	//clk = clock();

	// now this is the hard part: to decode S1 and S2. The algorithm used here is slightly different from that in the paper:
	// instead of right multiply \Phi_{DC}', we only right multiply the sub-matrix of \Phi_{DC}' without of the last row

	// setting up remaining device array to facilitate decoding
	erased = jerasure_erasures_to_erased(k, n-k, erasures);
	for(i=0,c1=0;i<n&&c1<k;i++){
		if(erased[i]==0){
			remaining[c1] = i;
			c1++;
		}	
	}	
	//compute C_{DC}-\Delta_{DC}*T'
	for(i=0;i<k-1;i++){ // has k-1 columns
		for(j=0;j<d-2*k+2;j++)
			data_ptrs[j] = M_ptrs[(2*k-2+j)*(d-k+1)+i];
		for(j=0;j<k;j++)
			coding_ptrs[j] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(j=0;j<k;j++){
			jerasure_bitmatrix_dotprod(d-2*k+2, w, info->subbitmatrix_array[2]+remaining[j]*(d-2*k+2)*w*w, NULL, j+d-2*k+2,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
			src_pos = (long*)(input[remaining[j]]+i*subpacket_size);
			des_pos	= (long*)(coding_ptrs[j]);
			for(c2=0;c2<num_of_long;c2++)	
				des_pos[c2] ^= src_pos[c2];
		}	
	}
	
	// right multiply \Phi_{DC}': this will be P. 
	// result is in buffer2
	for(j=0;j<k;j++){ //j-th row
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = buffer2 +(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++){
			jerasure_bitmatrix_dotprod(k-1, w, info->subbitmatrix_array[3]+remaining[i]*(k-1)*w*w, NULL, i+k-1,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
		}
	}
	// now solve for the off-diagonal terms
	for(i=0;i<k-1;i++){
		for(j=i+1;j<k-1;j++){
			// solve for S1 tilde off-diagonal 
			// here we directly use the fact that Lambda = [0 1 2 3 ....];	
			int** temp_schedule;			
			inv = galois_single_divide(1,remaining[i]^remaining[j],w);					
			buffer1_int[0] = buffer1_int[1] = inv;			
			data_ptrs[0] = buffer2+(i*(k-1)+j)*subpacket_size;
			data_ptrs[1] = buffer2+(j*(k-1)+i)*subpacket_size;
			coding_ptrs[0] = M_ptrs[i*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
			// solve for S2 tilde off-diagonal
			buffer1_int[0] = galois_single_multiply(remaining[j],inv,w);
			buffer1_int[1] = galois_single_multiply(remaining[i],inv,w);
			coding_ptrs[0] = M_ptrs[(i+k-1)*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
		}	
	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 off-diagonal decoded %.3e clocks \n", (double)tclk);	
	//clk = clock();
	// compute the A vector: A*\Phi_{DC1} = \Phi_{DC2}, note this is always possible because \Phi_{DC1} is alway full rank by construction
	// we first reuse buffer1 here to form \Phi_{DC1} matrix and then the compute its inverse
	for(i=0;i<k-1;i++){
		memcpy(buffer1_int+(k-1)*(k-1+i),(void*)(info->matrix+remaining[i]*d+k-1),(k-1)*sizeof(int));
	}

	jerasure_invert_matrix(buffer1_int+(k-1)*(k-1),buffer1_int,k-1,w);	
	vector_A = jerasure_matrix_multiply(info->matrix+remaining[k-1]*d+k-1,buffer1_int,1,k-1,k-1,k-1,w);	
	if(vector_A==NULL)
		goto complete;	
	
	for(i=0;i<k-1;i++){
		buffer1_int[i+(k-1)*(k-1)] = galois_single_multiply(vector_A[i],remaining[k-1],w);		
		buffer1_int[i+k*(k-1)] = vector_A[i];
	}		

	for(i=0;i<k-1;i++){
		memset(buffer1_int+(k-1)*(k+1),0,sizeof(int)*2*(k-1));
		buffer1_int[(k-1)*(k+1)+i] = remaining[i];
		buffer1_int[(k-1)*(k+2)+i] = 1;

		pseudo_erasures[0] = i;
		pseudo_erasures[1] = k-1+i;
		pseudo_erasures[2] = -1;
		for(j=0;j<2*k-2;j++)
			data_ptrs[j] = M_ptrs[i+j*(d-k+1)];
		coding_ptrs[0] = buffer2+((k-1)*(k-1)+i)*subpacket_size;
		coding_ptrs[1] = buffer2+(i*(k-1)+i)*subpacket_size;

		jerasure_matrix_to_bitmatrix_noallocate(2*k-2,2,w,buffer1_int+(k-1)*(k-1),bitmatrix_temp);				
		jerasure_schedule_decode_lazy(2*k-2,2,w,bitmatrix_temp,pseudo_erasures,data_ptrs,coding_ptrs,subpacket_size,ALIGNMENT,0);
      	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 decoded %.3e clocks \n", (double)tclk);	
	// now we have both \tilde{S_1} and \tilde{S_2} in M_ptrs, need to recover S1 and S2 from them
	// this is done by multiply \tilde{S_1} left and right by inv(\Phi_{DC1}).
	// right-multiply for S1 
	int* bitmatrix_inv = jerasure_matrix_to_bitmatrix(k-1,k-1,w,buffer1_int);
	int** inv_schedule = jerasure_smart_bitmatrix_to_schedule(k-1, k-1, w, bitmatrix_inv);

	// right-multiply for S1
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[i*(d-k+1)+j];		
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;	
		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);	
	}
	// left-multiply for S1 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[i*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);

	}
	// right-multiply for S2
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[(i+k-1)*(d-k+1)+j];
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// left-multiply for S2 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		//for(i=j;i<k-1;i++)
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[(i+k-1)*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// having S1,S2,T, now can also fill the first k-1 column of the output		
	for(i=0;i<k-1;i++){
		for(j=0;j<d;j++)
			data_ptrs[j] = M_ptrs[j*(d-k+1)+i];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+i*subpacket_size;
		for(j=0;j<n;j++){
			if(erased[j]==1)
				jerasure_bitmatrix_encode(d,1,w,info->bitmatrix+(j*d*w*w),data_ptrs,coding_ptrs+j,subpacket_size,ALIGNMENT);
		}
	}

	// clean up
complete:
	if(decode_schedule)
		jerasure_free_schedule(decode_schedule);
	if(inv_schedule)
		jerasure_free_schedule(inv_schedule);
	free(data_ptrs);
	free(coding_ptrs);
	if(erased)free(erased);
	free(pseudo_erasures);
	free(data_transformed);
	free(M_ptrs);	
	free(buffer1);
	free(buffer2);
	free(remaining);
	free(bitmatrix_temp);
	if(vector_A!=NULL)free(vector_A);
	return(1);
}