/* Recover two failed blocks. */
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
{
	if ( faila > failb ) {
		int tmp = faila;
		faila = failb;
		failb = tmp;
	}

	if ( failb == disks-1 ) {
		if ( faila == disks-2 ) {
			/* P+Q failure.  Just rebuild the syndrome. */
			raid6_call.gen_syndrome(disks, bytes, ptrs);
		} else {
			/* data+Q failure.  Reconstruct data from P,
			   then rebuild syndrome. */
			/* NOT IMPLEMENTED - equivalent to RAID-5 */
		}
	} else {
		if ( failb == disks-2 ) {
			/* data+P failure. */
			raid6_datap_recov(disks, bytes, faila, ptrs);
		} else {
			/* data+data failure. */
			raid6_2data_recov(disks, bytes, faila, failb, ptrs);
		}
	}
}
Esempio n. 2
0
/**
 * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
 * @disks: number of disks in the RAID-6 array
 * @bytes: block size
 * @faila: failed drive index
 * @blocks: array of source pointers where the last two entries are p and q
 * @submit: submission/completion modifiers
 */
struct dma_async_tx_descriptor *
async_raid6_datap_recov(int disks, size_t bytes, int faila,
			struct page **blocks, struct async_submit_ctl *submit)
{
	struct dma_async_tx_descriptor *tx = NULL;
	struct page *p, *q, *dq;
	u8 coef;
	enum async_tx_flags flags = submit->flags;
	dma_async_tx_callback cb_fn = submit->cb_fn;
	void *cb_param = submit->cb_param;
	void *scribble = submit->scribble;
	int good_srcs, good, i;
	struct page *srcs[2];

	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);

	/* we need to preserve the contents of 'blocks' for the async
	 * case, so punt to synchronous if a scribble buffer is not available
	 */
	if (!scribble) {
		void **ptrs = (void **) blocks;

		async_tx_quiesce(&submit->depend_tx);
		for (i = 0; i < disks; i++)
			if (blocks[i] == NULL)
				ptrs[i] = (void*)raid6_empty_zero_page;
			else
				ptrs[i] = page_address(blocks[i]);

		raid6_datap_recov(disks, bytes, faila, ptrs);

		async_tx_sync_epilog(submit);

		return NULL;
	}

	good_srcs = 0;
	good = -1;
	for (i = 0; i < disks-2; i++) {
		if (i == faila)
			continue;
		if (blocks[i]) {
			good = i;
			good_srcs++;
			if (good_srcs > 1)
				break;
		}
	}
	BUG_ON(good_srcs == 0);

	p = blocks[disks-2];
	q = blocks[disks-1];

	/* Compute syndrome with zero for the missing data page
	 * Use the dead data page as temporary storage for delta q
	 */
	dq = blocks[faila];
	blocks[faila] = NULL;
	blocks[disks-1] = dq;

	/* in the 4-disk case we only need to perform a single source
	 * multiplication with the one good data block.
	 */
	if (good_srcs == 1) {
		struct page *g = blocks[good];

		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
				  scribble);
		tx = async_memcpy(p, g, 0, 0, bytes, submit);

		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
				  scribble);
		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
	} else {
		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
				  scribble);
		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
	}

	/* Restore pointer table */
	blocks[faila]   = dq;
	blocks[disks-1] = q;

	/* calculate g^{-faila} */
	coef = raid6_gfinv[raid6_gfexp[faila]];

	srcs[0] = dq;
	srcs[1] = q;
	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
			  NULL, NULL, scribble);
	tx = async_xor(dq, srcs, 0, 2, bytes, submit);

	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
	tx = async_mult(dq, dq, coef, bytes, submit);

	srcs[0] = p;
	srcs[1] = dq;
	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
			  cb_param, scribble);
	tx = async_xor(p, srcs, 0, 2, bytes, submit);

	return tx;
}
Esempio n. 3
0
/*******************************************************************************
 * Function:	save_stripes
 * Description:
 *	Function reads data (only data without P and Q) from array and writes
 * it to buf and opcjonaly to backup files
 * Parameters:
 *	source		: A list of 'fds' of the active disks.
 *			  Some may be absent
 *	offsets		: A list of offsets on disk belonging
 *			 to the array [bytes]
 *	raid_disks	: geometry: number of disks in the array
 *	chunk_size	: geometry: chunk size [bytes]
 *	level		: geometry: RAID level
 *	layout		: geometry: layout
 *	nwrites		: number of backup files
 *	dest		: A list of 'fds' for mirrored targets
 *			  (e.g. backup files). They are already seeked to right
 *			  (write) location. If NULL, data will be wrote
 *			  to the buf only
 *	start		: start address of data to read (must be stripe-aligned)
 *			  [bytes]
 *	length	-	: length of data to read (must be stripe-aligned)
 *			  [bytes]
 *	buf		: buffer for data. It is large enough to hold
 *			  one stripe. It is stripe aligned
 * Returns:
 *	 0 : success
 *	-1 : fail
 ******************************************************************************/
int save_stripes(int *source, unsigned long long *offsets,
		 int raid_disks, int chunk_size, int level, int layout,
		 int nwrites, int *dest,
		 unsigned long long start, unsigned long long length,
		 char *buf)
{
	int len;
	int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2);
	int disk;
	int i;
	unsigned long long length_test;

	if (!tables_ready)
		make_tables();
	ensure_zero_has_size(chunk_size);

	len = data_disks * chunk_size;
	length_test = length / len;
	length_test *= len;

	if (length != length_test) {
		dprintf("Error: save_stripes(): Data are not alligned. EXIT\n");
		dprintf("\tArea for saving stripes (length) = %llu\n", length);
		dprintf("\tWork step (len)                  = %i\n", len);
		dprintf("\tExpected save area (length_test) = %llu\n",
			length_test);
		abort();
	}

	while (length > 0) {
		int failed = 0;
		int fdisk[3], fblock[3];
		for (disk = 0; disk < raid_disks ; disk++) {
			unsigned long long offset;
			int dnum;

			offset = (start/chunk_size/data_disks)*chunk_size;
			dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1,
				       start/chunk_size/data_disks,
				       raid_disks, level, layout);
			if (dnum < 0) abort();
			if (source[dnum] < 0 ||
			    lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 ||
			    read(source[dnum], buf+disk * chunk_size, chunk_size)
			    != chunk_size)
				if (failed <= 2) {
					fdisk[failed] = dnum;
					fblock[failed] = disk;
					failed++;
				}
		}
		if (failed == 0 || fblock[0] >= data_disks)
			/* all data disks are good */
			;
		else if (failed == 1 || fblock[1] >= data_disks+1) {
			/* one failed data disk and good parity */
			char *bufs[data_disks];
			for (i=0; i < data_disks; i++)
				if (fblock[0] == i)
					bufs[i] = buf + data_disks*chunk_size;
				else
					bufs[i] = buf + i*chunk_size;

			xor_blocks(buf + fblock[0]*chunk_size,
				   bufs, data_disks, chunk_size);
		} else if (failed > 2 || level != 6)
			/* too much failure */
			return -1;
		else {
			/* RAID6 computations needed. */
			uint8_t *bufs[data_disks+4];
			int qdisk;
			int syndrome_disks;
			disk = geo_map(-1, start/chunk_size/data_disks,
				       raid_disks, level, layout);
			qdisk = geo_map(-2, start/chunk_size/data_disks,
				       raid_disks, level, layout);
			if (is_ddf(layout)) {
				/* q over 'raid_disks' blocks, in device order.
				 * 'p' and 'q' get to be all zero
				 */
				for (i = 0; i < raid_disks; i++)
					bufs[i] = zero;
				for (i = 0; i < data_disks; i++) {
					int dnum = geo_map(i,
							   start/chunk_size/data_disks,
							   raid_disks, level, layout);
					int snum;
					/* i is the logical block number, so is index to 'buf'.
					 * dnum is physical disk number
					 * and thus the syndrome number.
					 */
					snum = dnum;
					bufs[snum] = (uint8_t*)buf + chunk_size * i;
				}
				syndrome_disks = raid_disks;
			} else {
				/* for md, q is over 'data_disks' blocks,
				 * starting immediately after 'q'
				 * Note that for the '_6' variety, the p block
				 * makes a hole that we need to be careful of.
				 */
				int j;
				int snum = 0;
				for (j = 0; j < raid_disks; j++) {
					int dnum = (qdisk + 1 + j) % raid_disks;
					if (dnum == disk || dnum == qdisk)
						continue;
					for (i = 0; i < data_disks; i++)
						if (geo_map(i,
							    start/chunk_size/data_disks,
							    raid_disks, level, layout) == dnum)
							break;
					/* i is the logical block number, so is index to 'buf'.
					 * dnum is physical disk number
					 * snum is syndrome disk for which 0 is immediately after Q
					 */
					bufs[snum] = (uint8_t*)buf + chunk_size * i;

					if (fblock[0] == i)
						fdisk[0] = snum;
					if (fblock[1] == i)
						fdisk[1] = snum;
					snum++;
				}

				syndrome_disks = data_disks;
			}

			/* Place P and Q blocks at end of bufs */
			bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks;
			bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1);

			if (fblock[1] == data_disks)
				/* One data failed, and parity failed */
				raid6_datap_recov(syndrome_disks+2, chunk_size,
						  fdisk[0], bufs, 0);
			else {
				/* Two data blocks failed, P,Q OK */
				raid6_2data_recov(syndrome_disks+2, chunk_size,
						  fdisk[0], fdisk[1], bufs, 0);
			}
		}
		if (dest) {
			for (i = 0; i < nwrites; i++)
				if (write(dest[i], buf, len) != len)
					return -1;
		} else {
			/* build next stripe in buffer */
			buf += len;
		}
		length -= len;
		start += len;
	}
	return 0;
}
Esempio n. 4
0
int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets,
		  int raid_disks, int chunk_size, int level, int layout,
		  unsigned long long start, unsigned long long length, char *name[],
		  int repair, int failed_disk1, int failed_disk2)
{
	/* read the data and p and q blocks, and check we got them right */
	char *stripe_buf = xmalloc(raid_disks * chunk_size);
	char **stripes = xmalloc(raid_disks * sizeof(char*));
	char **blocks = xmalloc(raid_disks * sizeof(char*));
	int *block_index_for_slot = xmalloc(raid_disks * sizeof(int));
	uint8_t *p = xmalloc(chunk_size);
	uint8_t *q = xmalloc(chunk_size);
	int *results = xmalloc(chunk_size * sizeof(int));
	sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t));

	int i;
	int diskP, diskQ;
	int data_disks = raid_disks - 2;
	int err = 0;

	extern int tables_ready;

	if (!tables_ready)
		make_tables();

	for ( i = 0 ; i < raid_disks ; i++)
		stripes[i] = stripe_buf + i * chunk_size;

	while (length > 0) {
		int disk;

		printf("pos --> %llu\n", start);

		err = lock_stripe(info, start, chunk_size, data_disks, sig);
		if(err != 0) {
			if (err != 2)
				unlock_all_stripes(info, sig);
			goto exitCheck;
		}
		for (i = 0 ; i < raid_disks ; i++) {
			lseek64(source[i], offsets[i] + start * chunk_size, 0);
			read(source[i], stripes[i], chunk_size);
		}
		err = unlock_all_stripes(info, sig);
		if(err != 0)
			goto exitCheck;

		for (i = 0 ; i < data_disks ; i++) {
			int disk = geo_map(i, start, raid_disks, level, layout);
			blocks[i] = stripes[disk];
			block_index_for_slot[disk] = i;
			printf("%d->%d\n", i, disk);
		}

		qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size);
		diskP = geo_map(-1, start, raid_disks, level, layout);
		diskQ = geo_map(-2, start, raid_disks, level, layout);
		blocks[data_disks] = stripes[diskP];
		block_index_for_slot[diskP] = data_disks;
		blocks[data_disks+1] = stripes[diskQ];
		block_index_for_slot[diskQ] = data_disks+1;

		if (memcmp(p, stripes[diskP], chunk_size) != 0) {
			printf("P(%d) wrong at %llu\n", diskP, start);
		}
		if (memcmp(q, stripes[diskQ], chunk_size) != 0) {
			printf("Q(%d) wrong at %llu\n", diskQ, start);
		}
		raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results);
		disk = raid6_stats(results, raid_disks, chunk_size);

		if(disk >= -2) {
			disk = geo_map(disk, start, raid_disks, level, layout);
		}
		if(disk >= 0) {
			printf("Error detected at %llu: possible failed disk slot: %d --> %s\n",
				start, disk, name[disk]);
		}
		if(disk == -65535) {
			printf("Error detected at %llu: disk slot unknown\n", start);
		}
		if(repair == 1) {
			printf("Repairing stripe %llu\n", start);
			printf("Assuming slots %d (%s) and %d (%s) are incorrect\n",
			       failed_disk1, name[failed_disk1],
			       failed_disk2, name[failed_disk2]);

			if (failed_disk1 == diskQ || failed_disk2 == diskQ) {
				char *all_but_failed_blocks[data_disks];
				int failed_data_or_p;
				int failed_block_index;

				if (failed_disk1 == diskQ)
					failed_data_or_p = failed_disk2;
				else
					failed_data_or_p = failed_disk1;
				printf("Repairing D/P(%d) and Q\n", failed_data_or_p);
				failed_block_index = block_index_for_slot[failed_data_or_p];
				for (i=0; i < data_disks; i++)
					if (failed_block_index == i)
						all_but_failed_blocks[i] = stripes[diskP];
					else
						all_but_failed_blocks[i] = blocks[i];
				xor_blocks(stripes[failed_data_or_p],
					all_but_failed_blocks, data_disks, chunk_size);
				qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
			} else {
				ensure_zero_has_size(chunk_size);
				if (failed_disk1 == diskP || failed_disk2 == diskP) {
					int failed_data, failed_block_index;
					if (failed_disk1 == diskP)
						failed_data = failed_disk2;
					else
						failed_data = failed_disk1;
					failed_block_index = block_index_for_slot[failed_data];
					printf("Repairing D(%d) and P\n", failed_data);
					raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks);
				} else {
					printf("Repairing D and D\n");
					int failed_block_index1 = block_index_for_slot[failed_disk1];
					int failed_block_index2 = block_index_for_slot[failed_disk2];
					if (failed_block_index1 > failed_block_index2) {
						int t = failed_block_index1;
						failed_block_index1 = failed_block_index2;
						failed_block_index2 = t;
					}
					raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks);
				}
			}

			err = lock_stripe(info, start, chunk_size, data_disks, sig);
			if(err != 0) {
				if (err != 2)
					unlock_all_stripes(info, sig);
				goto exitCheck;
			}

			lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0);
			write(source[failed_disk1], stripes[failed_disk1], chunk_size);
			lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0);
			write(source[failed_disk2], stripes[failed_disk2], chunk_size);

			err = unlock_all_stripes(info, sig);
			if(err != 0)
				goto exitCheck;
		} else if (disk >= 0 && repair == 2) {
			printf("Auto-repairing slot %d (%s)\n", disk, name[disk]);
			if (disk == diskQ) {
				qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size);
			} else {
				char *all_but_failed_blocks[data_disks];
				int failed_block_index = block_index_for_slot[disk];
				for (i=0; i < data_disks; i++)
					if (failed_block_index == i)
						all_but_failed_blocks[i] = stripes[diskP];
					else
						all_but_failed_blocks[i] = blocks[i];
				xor_blocks(stripes[disk],
					all_but_failed_blocks, data_disks, chunk_size);
			}

			err = lock_stripe(info, start, chunk_size, data_disks, sig);
			if(err != 0) {
				if (err != 2)
					unlock_all_stripes(info, sig);
				goto exitCheck;
			}

			lseek64(source[disk], offsets[disk] + start * chunk_size, 0);
			write(source[disk], stripes[disk], chunk_size);

			err = unlock_all_stripes(info, sig);
			if(err != 0)
				goto exitCheck;
		}


		length--;
		start++;
	}

exitCheck:

	free(stripe_buf);
	free(stripes);
	free(blocks);
	free(p);
	free(q);
	free(results);

	return err;
}