示例#1
0
static int
do_rehydrate(struct rain_encoding_s *enc, uint8_t **data,
		uint8_t **coding, int *erasures)
{
	/* Creating coding matrix or bitmatrix */
	int *bit_matrix=NULL, *matrix=NULL;
	if (enc->algo == JALG_liberation)
		bit_matrix = liber8tion_coding_bitmatrix(enc->k);
	else if (enc->algo == JALG_crs) {
		matrix = cauchy_good_general_coding_matrix(enc->k, enc->m, enc->w);
		bit_matrix = jerasure_matrix_to_bitmatrix(enc->k, enc->m, enc->w, matrix);
	}

	/* Choose proper decoding method */
	jerasure_schedule_decode_lazy(enc->k, enc->m, enc->w,
			bit_matrix, erasures, (char**)data, (char**)coding,
			enc->block_size, enc->packet_size, 1);

	/* Freeing previously allocated memory */
	if (bit_matrix)
		free(bit_matrix);
	if (matrix)
		free(matrix);

	return 1;
}
示例#2
0
int decoder(char *argv,char *dup_argv, int ssd_no[] ) {
	FILE *fp;				// File pointer

	/* Jerasure arguments */
	char **data;
	char **coding;
	int *erasures;
	int *erased;
	int *matrix;
	int *bitmatrix;
	
	/* Parameters */
	int k = 5;
	int m = 3;
	int w = 4;
	int packetsize = 1024;
	int buffersize = k*w*1024*sizeof(int);
	int d=0;	
	int i, j;				// loop control variables
	int blocksize;			// size of individual files
	int origsize;			// size of file before padding
	int total;				// used to write data, not padding to file
	struct stat status;		// used to find size of individual files
	int numerased;			// number of erased files
		
	/* Used to recreate file names */
	char *temp;
	char *cs1, *cs2;
	char *dup_cs1, *dup_cs2;
	char *fname;
	int md;
	char *curdir;

	/* Used to time decoding */
	struct timeval t1, t2, t3, t4;
	struct timezone tz;
	double tsec;
	double totalsec;
	
	signal(SIGQUIT, ctrl_bs_handler);

	matrix = NULL;
	bitmatrix = NULL;
	totalsec = 0.0;
	
	/* Start timing */
	gettimeofday(&t1, &tz);

	/* Error checking parameters */
	//if (argc != 2) {
	//	fprintf(stderr, "usage: inputfile\n");
	//	exit(0);
	//}
	curdir = (char *)malloc(sizeof(char)*100);
	getcwd(curdir, 100);

	printf("argv=%s dup_argv=%s\n",argv,dup_argv);
	
	/* Begin recreation of file names */
	cs1 = (char*)malloc(sizeof(char)*strlen(argv));
	cs2 = strrchr(argv, '/');
	if (cs2 != NULL) {
		cs2++;
		strcpy(cs1, cs2);
	}
	else {
		strcpy(cs1, argv);
	}
	cs2 = strchr(cs1, '.');
	if (cs2 != NULL) {
		*cs2 = '\0';
	}	
	cs2 = (char*)malloc(sizeof(char)*strlen(argv));
	fname = strchr(argv, '.');
	if(fname != NULL)
		strcpy(cs2, fname);
	else
		*cs2='\0';
//

	if(dup_argv != NULL){
		dup_cs1 = (char*)malloc(sizeof(char)*strlen(dup_argv));
		dup_cs2 = strrchr(dup_argv, '/');
		if (dup_cs2 != NULL) {
			dup_cs2++;
			strcpy(dup_cs1, dup_cs2);
		}
		else {
			strcpy(dup_cs1, dup_argv);
		}
		dup_cs2 = strchr(dup_cs1, '.');
		if (dup_cs2 != NULL) {
			*dup_cs2 = '\0';
		}	
		dup_cs2 = (char*)malloc(sizeof(char)*strlen(dup_argv));
		fname = strchr(dup_argv, '.');
		if(fname!=NULL)
			strcpy(dup_cs2, fname);
		else
			*dup_cs2='\0';
	}
//
		printf("CRS_decoder.c: cs1=%s cs2=%s\n",cs1,cs2);
		printf("CRS_decoder.c: dup_cs1=%s dup_cs2=%s\n",dup_cs1,dup_cs2);

	fname = (char *)malloc(sizeof(char*)*(100+strlen(argv)+10));

	/* Read in parameters from metadata file */
	sprintf(fname, "%s/Coding/%s_meta.txt", curdir, cs1);

	fp = fopen(fname, "rb");
	temp = (char *)malloc(sizeof(char)*(strlen(argv)+10));
	fscanf(fp, "%s", temp);	
	
	if (fscanf(fp, "%d", &origsize) != 1) {
		fprintf(stderr, "Original size is not valid\n");
		exit(0);
	}
	fclose(fp);	

	if(origsize%buffersize == 0)
		readins = origsize/buffersize;
	else
		readins = (origsize/buffersize)+1;

	/* Allocate memory */
	erased = (int *)malloc(sizeof(int)*(k+m));
	for (i = 0; i < k+m; i++)
		erased[i] = 0;
	erasures = (int *)malloc(sizeof(int)*(k+m));

	data = (char **)malloc(sizeof(char *)*k);
	coding = (char **)malloc(sizeof(char *)*m);
	if (buffersize != origsize) {
		for (i = 0; i < k; i++) {
			data[i] = (char *)malloc(sizeof(char)*(buffersize/k));
		}
		for (i = 0; i < m; i++) {
			coding[i] = (char *)malloc(sizeof(char)*(buffersize/k));
		}
		blocksize = buffersize/k;
	}

	sprintf(temp, "%d", k);
	md = strlen(temp);
	gettimeofday(&t3, &tz);

	/* Create coding matrix or bitmatrix */
	
	matrix = cauchy_good_general_coding_matrix(k, m, w);
	bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
			

	gettimeofday(&t4, &tz);
	tsec = 0.0;
	tsec += t4.tv_usec;
	tsec -= t3.tv_usec;
	tsec /= 1000000.0;
	tsec += t4.tv_sec;
	tsec -= t3.tv_sec;
	totalsec += tsec;
	
	/* Begin decoding process */
	total = 0;
	n = 1;	
	while (n <= readins) {
		numerased = 0;
		/* Open files, check for erasures, read in data/coding */	
		for (i = 1; i <= k; i++) {
			sprintf(fname, "/media/newSSD%d/server/%s_k%0*d%s", ssd_no[i-1], cs1, md, i, cs2);
			fp = fopen(fname, "rb");
			if (fp == NULL) {
				erased[i-1] = 1;
				erasures[numerased] = i-1;
				numerased++;
				//printf("%s failed\n", fname);
			}
			else {
				if (buffersize == origsize) {
					stat(fname, &status);
					blocksize = status.st_size;
					data[i-1] = (char *)malloc(sizeof(char)*blocksize);
					fread(data[i-1], sizeof(char), blocksize, fp);
				}
				else {
					fseek(fp, blocksize*(n-1), SEEK_SET); 
					fread(data[i-1], sizeof(char), buffersize/k, fp);
				}
				fclose(fp);
				d++;
			}
		}
		for (i = 1; i <= m ; i++) {
			sprintf(fname, "/media/newSSD%d/server/%s_m%0*d%s", ssd_no[k+i-1], cs1, md, i, cs2);
			fp = fopen(fname, "rb");
			if (fp == NULL || d > k ) {
				erased[k+(i-1)] = 1;
				erasures[numerased] = k+i-1;
				numerased++;
				//printf("%s failed\n", fname);
				if(fp != NULL) fclose(fp);
			}
			else {
				if (buffersize == origsize) {
					stat(fname, &status);
					blocksize = status.st_size;
					coding[i-1] = (char *)malloc(sizeof(char)*blocksize);
					fread(coding[i-1], sizeof(char), blocksize, fp);
				}
				else {
					fseek(fp, blocksize*(n-1), SEEK_SET);
					fread(coding[i-1], sizeof(char), blocksize, fp);
				}	
				fclose(fp);
				d++;
			}
		}
		/* Finish allocating data/coding if needed */
		if (n == 1) {
			for (i = 0; i < numerased; i++) {
				if (erasures[i] < k) {
					data[erasures[i]] = (char *)malloc(sizeof(char)*blocksize);
				}
				else {
					coding[erasures[i]-k] = (char *)malloc(sizeof(char)*blocksize);
				}
			}
		}
		
		erasures[numerased] = -1;
		gettimeofday(&t3, &tz);
	
		/* Choose proper decoding method */
			i = jerasure_schedule_decode_lazy(k, m, w, bitmatrix, erasures, data, coding, blocksize, packetsize, 1);
		
		gettimeofday(&t4, &tz);
	
		/* Exit if decoding was unsuccessful */
		if (i == -1) {
			fprintf(stderr, "Unsuccessful!\n");
			exit(0);
		}
		

	
		/* Create decoded file */
		if(dup_argv == NULL)
			sprintf(fname, "%s/Coding/%s_decoded%s", curdir, cs1, cs2);
		else
			sprintf(fname, "%s/Coding/%s_decoded%s", curdir, dup_cs1, dup_cs2);
		if (n == 1) {
			fp = fopen(fname, "wb");
		}
		else {
			fp = fopen(fname, "ab");
		}
		for (i = 0; i < k; i++) {
			if (total+blocksize <= origsize) {
				fwrite(data[i], sizeof(char), blocksize, fp);
				total+= blocksize;
			}
			else {
				for (j = 0; j < blocksize; j++) {
					if (total < origsize) {
						fprintf(fp, "%c", data[i][j]);
						total++;
					}
					else {
						break;
					}
					
				}
			}
		}
		n++;
		fclose(fp);
		tsec = 0.0;
		tsec += t4.tv_usec;
		tsec -= t3.tv_usec;
		tsec /= 1000000.0;
		tsec += t4.tv_sec;
		tsec -= t3.tv_sec;
		totalsec += tsec;
	}
	
	/* Free allocated memory */
	if(dup_argv != NULL)
	{
		free(dup_cs1);
		free(dup_cs2);
	}

	free(cs1);
	free(fname);
	free(data);
	free(coding);
	free(erasures);
	free(erased);
	
	/* Stop timing and print time */
	gettimeofday(&t2, &tz);
	tsec = 0;
	tsec += t2.tv_usec;
	tsec -= t1.tv_usec;
	tsec /= 1000000.0;
	tsec += t2.tv_sec;
	tsec -= t1.tv_sec;
	printf("Decoding (MB/sec): %0.10f\n", (origsize/1024/1024)/totalsec);
	printf("De_Total (MB/sec): %0.10f\n\n", (origsize/1024/1024)/tsec);
}	
示例#3
0
int main (int argc, char **argv) {
	FILE *fp;				// File pointer
        char *dummy;
        int err;

	/* Jerasure arguments */
	char **data;
	char **coding;
	int *erasures;
	int *erased;
	int *matrix;
	int *bitmatrix;
	
	/* Parameters */
	int k, m, w, packetsize, buffersize;
	enum Coding_Technique tech;
	char *c_tech;
	
	int i, j;				// loop control variables
	int blocksize;			// size of individual files
	int origsize;			// size of file before padding
	int total;				// used to write data, not padding to file
	struct stat status;		// used to find size of individual files
	int numerased;			// number of erased files
		
	/* Used to recreate file names */
	char *temp;
	char *cs1, *cs2, *extension;
	char *fname;
	int md;
	char *curdir;

	/* Used to time decoding */
	struct timeval t1, t2, t3, t4;
	struct timezone tz;
	double tsec;
	double totalsec;

	
	signal(SIGQUIT, ctrl_bs_handler);

	matrix = NULL;
	bitmatrix = NULL;
	totalsec = 0.0;
        blocksize = -1;

	/* Start timing */
	gettimeofday(&t1, &tz);

	/* Error checking parameters */
	if (argc != 2) {
		fprintf(stderr, "usage: inputfile\n");
		exit(0);
	}
	curdir = (char *)malloc(sizeof(char)*100);
	dummy = getcwd(curdir, 100);
	
	/* Begin recreation of file names */
	cs1 = (char*)malloc(sizeof(char)*strlen(argv[1]));
	cs2 = strrchr(argv[1], '/');
	if (cs2 != NULL) {
		cs2++;
		strcpy(cs1, cs2);
	}
	else {
		strcpy(cs1, argv[1]);
	}
	cs2 = strchr(cs1, '.');
	if (cs2 != NULL) {
                extension = strdup(cs2);
		*cs2 = '\0';
	} else {
           extension = strdup("");
        }	
	fname = (char *)malloc(sizeof(char*)*(100+strlen(argv[1])+10));

	/* Read in parameters from metadata file */
	sprintf(fname, "%s/Coding/%s_meta.txt", curdir, cs1);

	fp = fopen(fname, "rb");
        if (fp == NULL) {
          fprintf(stderr, "Error: no metadata file %s\n", fname);
          exit(1);
        }
	temp = (char *)malloc(sizeof(char)*(strlen(argv[1])+10));
	err = fscanf(fp, "%s", temp);	
	
	if (fscanf(fp, "%d", &origsize) != 1) {
		fprintf(stderr, "Original size is not valid\n");
		exit(0);
	}
	if (fscanf(fp, "%d %d %d %d %d", &k, &m, &w, &packetsize, &buffersize) != 5) {
		fprintf(stderr, "Parameters are not correct\n");
		exit(0);
	}
	c_tech = (char *)malloc(sizeof(char)*(strlen(argv[1])+10));
	err = fscanf(fp, "%s", c_tech);
	err = fscanf(fp, "%d", &tech);
	method = tech;
	err = fscanf(fp, "%d", &readins);
	fclose(fp);	

	/* Allocate memory */
	erased = (int *)malloc(sizeof(int)*(k+m));
	for (i = 0; i < k+m; i++)
		erased[i] = 0;
	erasures = (int *)malloc(sizeof(int)*(k+m));

	data = (char **)malloc(sizeof(char *)*k);
	coding = (char **)malloc(sizeof(char *)*m);
	if (buffersize != origsize) {
		for (i = 0; i < k; i++) {
			data[i] = (char *)malloc(sizeof(char)*(buffersize/k));
		}
		for (i = 0; i < m; i++) {
			coding[i] = (char *)malloc(sizeof(char)*(buffersize/k));
		}
		blocksize = buffersize/k;
	}

	sprintf(temp, "%d", k);
	md = strlen(temp);
	gettimeofday(&t3, &tz);

	/* Create coding matrix or bitmatrix */
	switch(tech) {
		case No_Coding:
                case RDP:
                case EVENODD:
			break;
		case Reed_Sol_Van:
			matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
			break;
		case Reed_Sol_R6_Op:
			matrix = reed_sol_r6_coding_matrix(k, w);
			break;
		case Cauchy_Orig:
			matrix = cauchy_original_coding_matrix(k, m, w);
			bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
			break;
		case Cauchy_Good:
			matrix = cauchy_good_general_coding_matrix(k, m, w);
			bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
			break;
		case Liberation:
			bitmatrix = liberation_coding_bitmatrix(k, w);
			break;
		case Blaum_Roth:
			bitmatrix = blaum_roth_coding_bitmatrix(k, w);
			break;
		case Liber8tion:
			bitmatrix = liber8tion_coding_bitmatrix(k);
			break;
		default:
			fprintf(stderr,  "unsupported coding technique used\n");
			break;
	}
	gettimeofday(&t4, &tz);
	tsec = 0.0;
	tsec += t4.tv_usec;
	tsec -= t3.tv_usec;
	tsec /= 1000000.0;
	tsec += t4.tv_sec;
	tsec -= t3.tv_sec;
	totalsec += tsec;
	
	/* Begin decoding process */
	total = 0;
	n = 1;	
	while (n <= readins) {
		numerased = 0;
		/* Open files, check for erasures, read in data/coding */	
		for (i = 1; i <= k; i++) {
			sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, cs1, md, i, extension);
			fp = fopen(fname, "rb");
			if (fp == NULL) {
				erased[i-1] = 1;
				erasures[numerased] = i-1;
				numerased++;
				//printf("%s failed\n", fname);
			}
			else {
				if (buffersize == origsize) {
					stat(fname, &status);
					blocksize = status.st_size;
					data[i-1] = (char *)malloc(sizeof(char)*blocksize);
					err = fread(data[i-1], sizeof(char), blocksize, fp);
				}
				else {
					fseek(fp, blocksize*(n-1), SEEK_SET); 
					err = fread(data[i-1], sizeof(char), buffersize/k, fp);
				}
				fclose(fp);
			}
		}
		for (i = 1; i <= m; i++) {
			sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, cs1, md, i, extension);
				fp = fopen(fname, "rb");
			if (fp == NULL) {
				erased[k+(i-1)] = 1;
				erasures[numerased] = k+i-1;
				numerased++;
				//printf("%s failed\n", fname);
			}
			else {
				if (buffersize == origsize) {
					stat(fname, &status);
					blocksize = status.st_size;
					coding[i-1] = (char *)malloc(sizeof(char)*blocksize);
					err = fread(coding[i-1], sizeof(char), blocksize, fp);
				}
				else {
					fseek(fp, blocksize*(n-1), SEEK_SET);
					err = fread(coding[i-1], sizeof(char), blocksize, fp);
				}
				fclose(fp);
			}
		}
		/* Finish allocating data/coding if needed */
		if (n == 1) {
			for (i = 0; i < numerased; i++) {
				if (erasures[i] < k) {
					data[erasures[i]] = (char *)malloc(sizeof(char)*blocksize);
				}
				else {
					coding[erasures[i]-k] = (char *)malloc(sizeof(char)*blocksize);
				}
			}
		}
		
		erasures[numerased] = -1;
		gettimeofday(&t3, &tz);
	
		/* Choose proper decoding method */
		if (tech == Reed_Sol_Van || tech == Reed_Sol_R6_Op) {
			i = jerasure_matrix_decode(k, m, w, matrix, 1, erasures, data, coding, blocksize);
		}
		else if (tech == Cauchy_Orig || tech == Cauchy_Good || tech == Liberation || tech == Blaum_Roth || tech == Liber8tion) {
			i = jerasure_schedule_decode_lazy(k, m, w, bitmatrix, erasures, data, coding, blocksize, packetsize, 1);
		}
		else {
			fprintf(stderr, "Not a valid coding technique.\n");
			exit(0);
		}
		gettimeofday(&t4, &tz);
	
		/* Exit if decoding was unsuccessful */
		if (i == -1) {
			fprintf(stderr, "Unsuccessful!\n");
			exit(0);
		}
	
		/* Create decoded file */
		sprintf(fname, "%s/Coding/%s_decoded%s", curdir, cs1, extension);
		if (n == 1) {
			fp = fopen(fname, "wb");
		}
		else {
			fp = fopen(fname, "ab");
		}
		for (i = 0; i < k; i++) {
			if (total+blocksize <= origsize) {
				fwrite(data[i], sizeof(char), blocksize, fp);
				total+= blocksize;
			}
			else {
				for (j = 0; j < blocksize; j++) {
					if (total < origsize) {
						fprintf(fp, "%c", data[i][j]);
						total++;
					}
					else {
						break;
					}
					
				}
			}
		}
		n++;
		fclose(fp);
		tsec = 0.0;
		tsec += t4.tv_usec;
		tsec -= t3.tv_usec;
		tsec /= 1000000.0;
		tsec += t4.tv_sec;
		tsec -= t3.tv_sec;
		totalsec += tsec;
	}
	
	/* Free allocated memory */
	free(cs1);
	free(extension);
	free(fname);
	free(data);
	free(coding);
	free(erasures);
	free(erased);

	/* Stop timing and print time */
	gettimeofday(&t2, &tz);
	tsec = 0;
	tsec += t2.tv_usec;
	tsec -= t1.tv_usec;
	tsec /= 1000000.0;
	tsec += t2.tv_sec;
	tsec -= t1.tv_sec;
	printf("Decoding (MB/sec): %0.10f\n", (origsize/1024/1024)/totalsec);
	printf("De_Total (MB/sec): %0.10f\n\n", (origsize/1024/1024)/tsec);

        return(0);
}
int decode_MSR_product_matrix_no_output(char **input, size_t input_size, int* erasures, struct coding_info *info)
{
	clock_t clk, tclk;
	int i, j,c1,c2,tdone,inv;
	int n = info->req.n;
	int d = info->req.d;
	int k = info->req.k;
	int w = info->req.w;
	int subpacket_size = input_size/(d-k+1);
	int num_of_long = subpacket_size/sizeof(long);	
	long *src_pos,*des_pos;
	int *vector_A=NULL;
 	char **ptrs;
	int **decode_schedule=NULL;
	int *erased = NULL;	
	int *bitmatrix_temp = malloc(sizeof(int)*(k-1)*(k-1)*w*w*4);
	char *data_transformed = malloc(input_size*k);    // this is the buffer for tranformed data, i.e., matrix M. The output is the systematic part of 
							  // codingmatrix*M, and 
					                  // we will regenerate the erased data from M using the encoding matrix, which will be written to *output.
	int *pseudo_erasures = malloc(sizeof(int)*(n*2));  // in order to recompute M, we view it as a systematic MDS code, 
							  // sometimes (n+k,k), sometimes (n+d-k+1,d-k+1), thus we over allocate
	char **M_ptrs = malloc(sizeof(void*)*d*(d-k+1));  // this is the pointer matrix to elements in M.		
	char **data_ptrs = malloc(sizeof(void*)*n);
	char **coding_ptrs = malloc(sizeof(void*)*n);
	int* remaining = malloc(sizeof(int)*k);           // not erased devices
	char *buffer1 = malloc(subpacket_size*k*(k-1)*2);  // need subpacketsize*k>=(max(4,k-1)+k-1)*sizeof(int), thus put factor of 2 to guarentee it
	int *buffer1_int = (int*)buffer1;                  // alternative pointer for buffer1
	char *buffer2 = malloc(subpacket_size*k*(k-1));

	if(data_transformed==NULL||pseudo_erasures==NULL||data_ptrs==NULL
		||coding_ptrs==NULL||buffer1==NULL||buffer2==NULL||M_ptrs==NULL||remaining==NULL){
		printf("Can not allocate memory\n");
		jerasure_free_schedule(decode_schedule);
		if(data_ptrs!=NULL)free(data_ptrs);
		if(coding_ptrs!=NULL)free(coding_ptrs);
		if(pseudo_erasures!=NULL)free(pseudo_erasures);
		if(data_transformed!=NULL)free(data_transformed);
		if(M_ptrs!=NULL)free(M_ptrs);	
		if(buffer1!=NULL)free(buffer1);
		if(buffer2!=NULL)free(buffer2);
		if(remaining!=NULL)free(remaining);
		return(-1);
	}
	//set up pointers for matrix M
	// first k-1 rows, only have S1
	for(i=0,c2=0;i<k-1;i++){
		c1 = i*(d-k+1);
		for(j=i;j<k-1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=k-1;j<d-k+1;j++)
			M_ptrs[c1+j] = NULL;			
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[j*(d-k+1)+i]; // symmetric matrix, thus there is (i,j) and (j,i) point to the same pointer.
	}
	// next k-1 rows
	for(i=0;i<k-1;i++){
		c1 = (k-1+i)*(d-k+1);
		for(j=i;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i];		
	}
	// next 1 and (d-2k+1) rows, may not exist
	if(d>2*k-2)
	{
		c1 = (2*k-2)*(d-k+1);
		for(j=k-1;j<d-k+1;j++,c2++)
			M_ptrs[c1+j] = data_transformed + subpacket_size*c2;
		for(j=0;j<i;j++)
			M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+k-1];			
		for(i=2*k-1;i<d;i++){
			c1 = i*(d-k+1);
			for(j=0;j<k;j++)
				M_ptrs[c1+j] = M_ptrs[(j+k-1)*(d-k+1)+i-k+1];
			for(j=k;j<d-k+1;j++)
				M_ptrs[c1+j] = NULL;
		}
	}
	
        // first decode the last d-2k+1 columns of T and Z: view it as an (n+k,k) MDS code. Note strictly speaking this might 
	// not be a real (n+k,k) MDS code, but it hardly matters.
	// before decoding operation, prepare for the pseudoerasure location array
	if(d>2*k-2){ // only when d>2k-2, T and Z exist
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i;
		for(i=0;i<n;i++){
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i] + k;		
		}

		// make the decoding schedule: we can save the trouble of manually generate this schedule, at the expense of
		// more computation
		decode_schedule = jerasure_generate_decoding_schedule(k, n, w, info->subbitmatrix_array[0], pseudo_erasures, 1);

		for(i=0;i<d-2*k+1;i++){
			for(j=0;j<k;j++)
				data_ptrs[j] = M_ptrs[i+k+(d-k+1)*(k-1+j)];		
			for(j=0;j<n;j++)
				coding_ptrs[j] = input[j]+subpacket_size*(k+i);
			ptrs = set_up_ptrs_for_scheduled_decoding(k, n, pseudo_erasures, data_ptrs,coding_ptrs);
		  	if (ptrs == NULL){
				printf("Can not allocate memory\n");
				goto complete;
			}
			// assume packetsize = ALIGNMENT
			for (tdone = 0; tdone < subpacket_size; tdone += ALIGNMENT*w) {
				jerasure_do_scheduled_operations(ptrs, decode_schedule, ALIGNMENT);
				for (c1 = 0; c1 < k+n; c1++) ptrs[c1] += (ALIGNMENT*w);
			}
			free(ptrs);
		} 
		//next decode the first column of T and Z: we view this as an (n+d-k+1,d-k+1) erasure codes
		// first setup the pseudo erasure location vector	
		for(i=0;i<k;i++)
			pseudo_erasures[i] = i; // in the first columne of the Z matrix, the last d-2k+1 elements are known, but the first k elements are not
		for(i=0;i<n;i++)
		{
			if(erasures[i]==-1){
				pseudo_erasures[i+k] = -1;
				break;
			}
			pseudo_erasures[i+k] = erasures[i]+d-k+1;		
		}
		for(j=0;j<d-k+1;j++)
			data_ptrs[j] = M_ptrs[(d-k+1)*(k-1+j)+k-1];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+subpacket_size*(k-1);
		jerasure_schedule_decode_lazy(d-k+1,n,w,
					info->subbitmatrix_array[1],pseudo_erasures,data_ptrs,
					coding_ptrs,subpacket_size,ALIGNMENT,0);
	}
	//clk = clock();

	// now this is the hard part: to decode S1 and S2. The algorithm used here is slightly different from that in the paper:
	// instead of right multiply \Phi_{DC}', we only right multiply the sub-matrix of \Phi_{DC}' without of the last row

	// setting up remaining device array to facilitate decoding
	erased = jerasure_erasures_to_erased(k, n-k, erasures);
	for(i=0,c1=0;i<n&&c1<k;i++){
		if(erased[i]==0){
			remaining[c1] = i;
			c1++;
		}	
	}	
	//compute C_{DC}-\Delta_{DC}*T'
	for(i=0;i<k-1;i++){ // has k-1 columns
		for(j=0;j<d-2*k+2;j++)
			data_ptrs[j] = M_ptrs[(2*k-2+j)*(d-k+1)+i];
		for(j=0;j<k;j++)
			coding_ptrs[j] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(j=0;j<k;j++){
			jerasure_bitmatrix_dotprod(d-2*k+2, w, info->subbitmatrix_array[2]+remaining[j]*(d-2*k+2)*w*w, NULL, j+d-2*k+2,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
			src_pos = (long*)(input[remaining[j]]+i*subpacket_size);
			des_pos	= (long*)(coding_ptrs[j]);
			for(c2=0;c2<num_of_long;c2++)	
				des_pos[c2] ^= src_pos[c2];
		}	
	}
	
	// right multiply \Phi_{DC}': this will be P. 
	// result is in buffer2
	for(j=0;j<k;j++){ //j-th row
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer1+(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = buffer2 +(j*(k-1)+i)*subpacket_size;
		for(i=0;i<k-1;i++){
			jerasure_bitmatrix_dotprod(k-1, w, info->subbitmatrix_array[3]+remaining[i]*(k-1)*w*w, NULL, i+k-1,
        	                data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
		}
	}
	// now solve for the off-diagonal terms
	for(i=0;i<k-1;i++){
		for(j=i+1;j<k-1;j++){
			// solve for S1 tilde off-diagonal 
			// here we directly use the fact that Lambda = [0 1 2 3 ....];	
			int** temp_schedule;			
			inv = galois_single_divide(1,remaining[i]^remaining[j],w);					
			buffer1_int[0] = buffer1_int[1] = inv;			
			data_ptrs[0] = buffer2+(i*(k-1)+j)*subpacket_size;
			data_ptrs[1] = buffer2+(j*(k-1)+i)*subpacket_size;
			coding_ptrs[0] = M_ptrs[i*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
			// solve for S2 tilde off-diagonal
			buffer1_int[0] = galois_single_multiply(remaining[j],inv,w);
			buffer1_int[1] = galois_single_multiply(remaining[i],inv,w);
			coding_ptrs[0] = M_ptrs[(i+k-1)*(d-k+1)+j];
			jerasure_matrix_to_bitmatrix_noallocate(2,1,w,buffer1_int,bitmatrix_temp);
			temp_schedule = jerasure_smart_bitmatrix_to_schedule(2, 1, w, bitmatrix_temp);
			jerasure_schedule_encode(2, 1, w, temp_schedule, data_ptrs, coding_ptrs,subpacket_size, ALIGNMENT);	
			if(temp_schedule!=NULL){
				jerasure_free_schedule(temp_schedule);
				temp_schedule = NULL;
			}			
		}	
	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 off-diagonal decoded %.3e clocks \n", (double)tclk);	
	//clk = clock();
	// compute the A vector: A*\Phi_{DC1} = \Phi_{DC2}, note this is always possible because \Phi_{DC1} is alway full rank by construction
	// we first reuse buffer1 here to form \Phi_{DC1} matrix and then the compute its inverse
	for(i=0;i<k-1;i++){
		memcpy(buffer1_int+(k-1)*(k-1+i),(void*)(info->matrix+remaining[i]*d+k-1),(k-1)*sizeof(int));
	}

	jerasure_invert_matrix(buffer1_int+(k-1)*(k-1),buffer1_int,k-1,w);	
	vector_A = jerasure_matrix_multiply(info->matrix+remaining[k-1]*d+k-1,buffer1_int,1,k-1,k-1,k-1,w);	
	if(vector_A==NULL)
		goto complete;	
	
	for(i=0;i<k-1;i++){
		buffer1_int[i+(k-1)*(k-1)] = galois_single_multiply(vector_A[i],remaining[k-1],w);		
		buffer1_int[i+k*(k-1)] = vector_A[i];
	}		

	for(i=0;i<k-1;i++){
		memset(buffer1_int+(k-1)*(k+1),0,sizeof(int)*2*(k-1));
		buffer1_int[(k-1)*(k+1)+i] = remaining[i];
		buffer1_int[(k-1)*(k+2)+i] = 1;

		pseudo_erasures[0] = i;
		pseudo_erasures[1] = k-1+i;
		pseudo_erasures[2] = -1;
		for(j=0;j<2*k-2;j++)
			data_ptrs[j] = M_ptrs[i+j*(d-k+1)];
		coding_ptrs[0] = buffer2+((k-1)*(k-1)+i)*subpacket_size;
		coding_ptrs[1] = buffer2+(i*(k-1)+i)*subpacket_size;

		jerasure_matrix_to_bitmatrix_noallocate(2*k-2,2,w,buffer1_int+(k-1)*(k-1),bitmatrix_temp);				
		jerasure_schedule_decode_lazy(2*k-2,2,w,bitmatrix_temp,pseudo_erasures,data_ptrs,coding_ptrs,subpacket_size,ALIGNMENT,0);
      	}
	//tclk = clock()-clk;
	//printf("~S1 and ~S2 decoded %.3e clocks \n", (double)tclk);	
	// now we have both \tilde{S_1} and \tilde{S_2} in M_ptrs, need to recover S1 and S2 from them
	// this is done by multiply \tilde{S_1} left and right by inv(\Phi_{DC1}).
	// right-multiply for S1 
	int* bitmatrix_inv = jerasure_matrix_to_bitmatrix(k-1,k-1,w,buffer1_int);
	int** inv_schedule = jerasure_smart_bitmatrix_to_schedule(k-1, k-1, w, bitmatrix_inv);

	// right-multiply for S1
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[i*(d-k+1)+j];		
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;	
		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);	
	}
	// left-multiply for S1 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[i*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);

	}
	// right-multiply for S2
	for(i=0;i<k-1;i++){
		for(j=0;j<k-1;j++)
			data_ptrs[j] = M_ptrs[(i+k-1)*(d-k+1)+j];
		for(j=0;j<k-1;j++)
			coding_ptrs[j] = buffer2+(i*(k-1)+j)*subpacket_size;

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// left-multiply for S2 
	for(j=0;j<k-1;j++){
		for(i=0;i<k-1;i++)
			data_ptrs[i] = buffer2+(i*(k-1)+j)*subpacket_size;
		//for(i=j;i<k-1;i++)
		for(i=0;i<k-1;i++)
			coding_ptrs[i] = M_ptrs[(i+k-1)*(d-k+1)+j];

		jerasure_schedule_encode(k-1, k-1, w, inv_schedule, data_ptrs, coding_ptrs, subpacket_size, ALIGNMENT);
	}
	// having S1,S2,T, now can also fill the first k-1 column of the output		
	for(i=0;i<k-1;i++){
		for(j=0;j<d;j++)
			data_ptrs[j] = M_ptrs[j*(d-k+1)+i];
		for(j=0;j<n;j++)
			coding_ptrs[j] = input[j]+i*subpacket_size;
		for(j=0;j<n;j++){
			if(erased[j]==1)
				jerasure_bitmatrix_encode(d,1,w,info->bitmatrix+(j*d*w*w),data_ptrs,coding_ptrs+j,subpacket_size,ALIGNMENT);
		}
	}

	// clean up
complete:
	if(decode_schedule)
		jerasure_free_schedule(decode_schedule);
	if(inv_schedule)
		jerasure_free_schedule(inv_schedule);
	free(data_ptrs);
	free(coding_ptrs);
	if(erased)free(erased);
	free(pseudo_erasures);
	free(data_transformed);
	free(M_ptrs);	
	free(buffer1);
	free(buffer2);
	free(remaining);
	free(bitmatrix_temp);
	if(vector_A!=NULL)free(vector_A);
	return(1);
}