/* Try to find out if a specific disk has a problem */ int raid6_check_disks(int data_disks, int start, int chunk_size, int level, int layout, int diskP, int diskQ, char *p, char *q, char **stripes) { int i; int data_id, diskD; uint8_t Px, Qx; int curr_broken_disk = -1; int prev_broken_disk = -1; int broken_status = 0; for(i = 0; i < chunk_size; i++) { Px = (uint8_t)stripes[diskP][i] ^ (uint8_t)p[i]; Qx = (uint8_t)stripes[diskQ][i] ^ (uint8_t)q[i]; if((Px != 0) && (Qx == 0)) curr_broken_disk = diskP; if((Px == 0) && (Qx != 0)) curr_broken_disk = diskQ; if((Px != 0) && (Qx != 0)) { data_id = (raid6_gflog[Qx] - raid6_gflog[Px]); if(data_id < 0) data_id += 255; diskD = geo_map(data_id, start/chunk_size, data_disks + 2, level, layout); curr_broken_disk = diskD; } if((Px == 0) && (Qx == 0)) curr_broken_disk = curr_broken_disk; if(curr_broken_disk >= data_disks + 2) broken_status = 2; switch(broken_status) { case 0: if(curr_broken_disk != -1) { prev_broken_disk = curr_broken_disk; broken_status = 1; } break; case 1: if(curr_broken_disk != prev_broken_disk) broken_status = 2; break; case 2: default: curr_broken_disk = prev_broken_disk = -2; break; } } return curr_broken_disk; }
int test_stripes(int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, unsigned long long start, unsigned long long length) { /* ready the data and p (and q) blocks, and check we got them right */ char *stripe_buf = xmalloc(raid_disks * chunk_size); char **stripes = xmalloc(raid_disks * sizeof(char*)); char **blocks = xmalloc(raid_disks * sizeof(char*)); char *p = xmalloc(chunk_size); char *q = xmalloc(chunk_size); int i; int diskP, diskQ; int data_disks = raid_disks - (level == 5 ? 1: 2); if (!tables_ready) make_tables(); for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; while (length > 0) { int disk; for (i = 0 ; i < raid_disks ; i++) { lseek64(source[i], offsets[i]+start, 0); read(source[i], stripes[i], chunk_size); } for (i = 0 ; i < data_disks ; i++) { int disk = geo_map(i, start/chunk_size, raid_disks, level, layout); blocks[i] = stripes[disk]; printf("%d->%d\n", i, disk); } switch(level) { case 6: qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); diskP = geo_map(-1, start/chunk_size, raid_disks, level, layout); if (memcmp(p, stripes[diskP], chunk_size) != 0) { printf("P(%d) wrong at %llu\n", diskP, start / chunk_size); } diskQ = geo_map(-2, start/chunk_size, raid_disks, level, layout); if (memcmp(q, stripes[diskQ], chunk_size) != 0) { printf("Q(%d) wrong at %llu\n", diskQ, start / chunk_size); } disk = raid6_check_disks(data_disks, start, chunk_size, level, layout, diskP, diskQ, p, q, stripes); if(disk >= 0) { printf("Possible failed disk: %d\n", disk); } if(disk == -2) { printf("Failure detected, but disk unknown\n"); } break; } length -= chunk_size; start += chunk_size; } return 0; }
/* Restore data: * We are given: * A list of 'fds' of the active disks. Some may be '-1' for not-available. * A geometry: raid_disks, chunk_size, level, layout * An 'fd' to read from. It is already seeked to the right (Read) location. * A start and length. * The length must be a multiple of the stripe size. * * We build a full stripe in memory and then write it out. * We assume that there are enough working devices. */ int restore_stripes(int *dest, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, int source, unsigned long long read_offset, unsigned long long start, unsigned long long length, char *src_buf) { char *stripe_buf; char **stripes = xmalloc(raid_disks * sizeof(char*)); char **blocks = xmalloc(raid_disks * sizeof(char*)); int i; int rv; int data_disks = raid_disks - (level == 0 ? 0 : level <= 5 ? 1 : 2); if (posix_memalign((void**)&stripe_buf, 4096, raid_disks * chunk_size)) stripe_buf = NULL; if (zero == NULL || chunk_size > zero_size) { if (zero) free(zero); zero = xcalloc(1, chunk_size); zero_size = chunk_size; } if (stripe_buf == NULL || stripes == NULL || blocks == NULL || zero == NULL) { rv = -2; goto abort; } for (i = 0; i < raid_disks; i++) stripes[i] = stripe_buf + i * chunk_size; while (length > 0) { unsigned int len = data_disks * chunk_size; unsigned long long offset; int disk, qdisk; int syndrome_disks; if (length < len) { rv = -3; goto abort; } for (i = 0; i < data_disks; i++) { int disk = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); if (src_buf == NULL) { /* read from file */ if (lseek64(source, read_offset, 0) != (off64_t)read_offset) { rv = -1; goto abort; } if (read(source, stripes[disk], chunk_size) != chunk_size) { rv = -1; goto abort; } } else { /* read from input buffer */ memcpy(stripes[disk], src_buf + read_offset, chunk_size); } read_offset += chunk_size; } /* We have the data, now do the parity */ offset = (start/chunk_size/data_disks) * chunk_size; switch (level) { case 4: case 5: disk = geo_map(-1, start/chunk_size/data_disks, raid_disks, level, layout); for (i = 0; i < data_disks; i++) blocks[i] = stripes[(disk+1+i) % raid_disks]; xor_blocks(stripes[disk], blocks, data_disks, chunk_size); break; case 6: disk = geo_map(-1, start/chunk_size/data_disks, raid_disks, level, layout); qdisk = geo_map(-2, start/chunk_size/data_disks, raid_disks, level, layout); if (is_ddf(layout)) { /* q over 'raid_disks' blocks, in device order. * 'p' and 'q' get to be all zero */ for (i = 0; i < raid_disks; i++) if (i == disk || i == qdisk) blocks[i] = (char*)zero; else blocks[i] = stripes[i]; syndrome_disks = raid_disks; } else { /* for md, q is over 'data_disks' blocks, * starting immediately after 'q' */ for (i = 0; i < data_disks; i++) blocks[i] = stripes[(qdisk+1+i) % raid_disks]; syndrome_disks = data_disks; } qsyndrome((uint8_t*)stripes[disk], (uint8_t*)stripes[qdisk], (uint8_t**)blocks, syndrome_disks, chunk_size); break; } for (i=0; i < raid_disks ; i++) if (dest[i] >= 0) { if (lseek64(dest[i], offsets[i]+offset, 0) < 0) { rv = -1; goto abort; } if (write(dest[i], stripes[i], chunk_size) != chunk_size) { rv = -1; goto abort; } } length -= len; start += len; } rv = 0; abort: free(stripe_buf); free(stripes); free(blocks); return rv; }
/******************************************************************************* * Function: save_stripes * Description: * Function reads data (only data without P and Q) from array and writes * it to buf and opcjonaly to backup files * Parameters: * source : A list of 'fds' of the active disks. * Some may be absent * offsets : A list of offsets on disk belonging * to the array [bytes] * raid_disks : geometry: number of disks in the array * chunk_size : geometry: chunk size [bytes] * level : geometry: RAID level * layout : geometry: layout * nwrites : number of backup files * dest : A list of 'fds' for mirrored targets * (e.g. backup files). They are already seeked to right * (write) location. If NULL, data will be wrote * to the buf only * start : start address of data to read (must be stripe-aligned) * [bytes] * length - : length of data to read (must be stripe-aligned) * [bytes] * buf : buffer for data. It is large enough to hold * one stripe. It is stripe aligned * Returns: * 0 : success * -1 : fail ******************************************************************************/ int save_stripes(int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, int nwrites, int *dest, unsigned long long start, unsigned long long length, char *buf) { int len; int data_disks = raid_disks - (level == 0 ? 0 : level <=5 ? 1 : 2); int disk; int i; unsigned long long length_test; if (!tables_ready) make_tables(); ensure_zero_has_size(chunk_size); len = data_disks * chunk_size; length_test = length / len; length_test *= len; if (length != length_test) { dprintf("Error: save_stripes(): Data are not alligned. EXIT\n"); dprintf("\tArea for saving stripes (length) = %llu\n", length); dprintf("\tWork step (len) = %i\n", len); dprintf("\tExpected save area (length_test) = %llu\n", length_test); abort(); } while (length > 0) { int failed = 0; int fdisk[3], fblock[3]; for (disk = 0; disk < raid_disks ; disk++) { unsigned long long offset; int dnum; offset = (start/chunk_size/data_disks)*chunk_size; dnum = geo_map(disk < data_disks ? disk : data_disks - disk - 1, start/chunk_size/data_disks, raid_disks, level, layout); if (dnum < 0) abort(); if (source[dnum] < 0 || lseek64(source[dnum], offsets[dnum]+offset, 0) < 0 || read(source[dnum], buf+disk * chunk_size, chunk_size) != chunk_size) if (failed <= 2) { fdisk[failed] = dnum; fblock[failed] = disk; failed++; } } if (failed == 0 || fblock[0] >= data_disks) /* all data disks are good */ ; else if (failed == 1 || fblock[1] >= data_disks+1) { /* one failed data disk and good parity */ char *bufs[data_disks]; for (i=0; i < data_disks; i++) if (fblock[0] == i) bufs[i] = buf + data_disks*chunk_size; else bufs[i] = buf + i*chunk_size; xor_blocks(buf + fblock[0]*chunk_size, bufs, data_disks, chunk_size); } else if (failed > 2 || level != 6) /* too much failure */ return -1; else { /* RAID6 computations needed. */ uint8_t *bufs[data_disks+4]; int qdisk; int syndrome_disks; disk = geo_map(-1, start/chunk_size/data_disks, raid_disks, level, layout); qdisk = geo_map(-2, start/chunk_size/data_disks, raid_disks, level, layout); if (is_ddf(layout)) { /* q over 'raid_disks' blocks, in device order. * 'p' and 'q' get to be all zero */ for (i = 0; i < raid_disks; i++) bufs[i] = zero; for (i = 0; i < data_disks; i++) { int dnum = geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout); int snum; /* i is the logical block number, so is index to 'buf'. * dnum is physical disk number * and thus the syndrome number. */ snum = dnum; bufs[snum] = (uint8_t*)buf + chunk_size * i; } syndrome_disks = raid_disks; } else { /* for md, q is over 'data_disks' blocks, * starting immediately after 'q' * Note that for the '_6' variety, the p block * makes a hole that we need to be careful of. */ int j; int snum = 0; for (j = 0; j < raid_disks; j++) { int dnum = (qdisk + 1 + j) % raid_disks; if (dnum == disk || dnum == qdisk) continue; for (i = 0; i < data_disks; i++) if (geo_map(i, start/chunk_size/data_disks, raid_disks, level, layout) == dnum) break; /* i is the logical block number, so is index to 'buf'. * dnum is physical disk number * snum is syndrome disk for which 0 is immediately after Q */ bufs[snum] = (uint8_t*)buf + chunk_size * i; if (fblock[0] == i) fdisk[0] = snum; if (fblock[1] == i) fdisk[1] = snum; snum++; } syndrome_disks = data_disks; } /* Place P and Q blocks at end of bufs */ bufs[syndrome_disks] = (uint8_t*)buf + chunk_size * data_disks; bufs[syndrome_disks+1] = (uint8_t*)buf + chunk_size * (data_disks+1); if (fblock[1] == data_disks) /* One data failed, and parity failed */ raid6_datap_recov(syndrome_disks+2, chunk_size, fdisk[0], bufs, 0); else { /* Two data blocks failed, P,Q OK */ raid6_2data_recov(syndrome_disks+2, chunk_size, fdisk[0], fdisk[1], bufs, 0); } } if (dest) { for (i = 0; i < nwrites; i++) if (write(dest[i], buf, len) != len) return -1; } else { /* build next stripe in buffer */ buf += len; } length -= len; start += len; } return 0; }
int check_stripes(struct mdinfo *info, int *source, unsigned long long *offsets, int raid_disks, int chunk_size, int level, int layout, unsigned long long start, unsigned long long length, char *name[], int repair, int failed_disk1, int failed_disk2) { /* read the data and p and q blocks, and check we got them right */ char *stripe_buf = xmalloc(raid_disks * chunk_size); char **stripes = xmalloc(raid_disks * sizeof(char*)); char **blocks = xmalloc(raid_disks * sizeof(char*)); int *block_index_for_slot = xmalloc(raid_disks * sizeof(int)); uint8_t *p = xmalloc(chunk_size); uint8_t *q = xmalloc(chunk_size); int *results = xmalloc(chunk_size * sizeof(int)); sighandler_t *sig = xmalloc(3 * sizeof(sighandler_t)); int i; int diskP, diskQ; int data_disks = raid_disks - 2; int err = 0; extern int tables_ready; if (!tables_ready) make_tables(); for ( i = 0 ; i < raid_disks ; i++) stripes[i] = stripe_buf + i * chunk_size; while (length > 0) { int disk; printf("pos --> %llu\n", start); err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } for (i = 0 ; i < raid_disks ; i++) { lseek64(source[i], offsets[i] + start * chunk_size, 0); read(source[i], stripes[i], chunk_size); } err = unlock_all_stripes(info, sig); if(err != 0) goto exitCheck; for (i = 0 ; i < data_disks ; i++) { int disk = geo_map(i, start, raid_disks, level, layout); blocks[i] = stripes[disk]; block_index_for_slot[disk] = i; printf("%d->%d\n", i, disk); } qsyndrome(p, q, (uint8_t**)blocks, data_disks, chunk_size); diskP = geo_map(-1, start, raid_disks, level, layout); diskQ = geo_map(-2, start, raid_disks, level, layout); blocks[data_disks] = stripes[diskP]; block_index_for_slot[diskP] = data_disks; blocks[data_disks+1] = stripes[diskQ]; block_index_for_slot[diskQ] = data_disks+1; if (memcmp(p, stripes[diskP], chunk_size) != 0) { printf("P(%d) wrong at %llu\n", diskP, start); } if (memcmp(q, stripes[diskQ], chunk_size) != 0) { printf("Q(%d) wrong at %llu\n", diskQ, start); } raid6_collect(chunk_size, p, q, stripes[diskP], stripes[diskQ], results); disk = raid6_stats(results, raid_disks, chunk_size); if(disk >= -2) { disk = geo_map(disk, start, raid_disks, level, layout); } if(disk >= 0) { printf("Error detected at %llu: possible failed disk slot: %d --> %s\n", start, disk, name[disk]); } if(disk == -65535) { printf("Error detected at %llu: disk slot unknown\n", start); } if(repair == 1) { printf("Repairing stripe %llu\n", start); printf("Assuming slots %d (%s) and %d (%s) are incorrect\n", failed_disk1, name[failed_disk1], failed_disk2, name[failed_disk2]); if (failed_disk1 == diskQ || failed_disk2 == diskQ) { char *all_but_failed_blocks[data_disks]; int failed_data_or_p; int failed_block_index; if (failed_disk1 == diskQ) failed_data_or_p = failed_disk2; else failed_data_or_p = failed_disk1; printf("Repairing D/P(%d) and Q\n", failed_data_or_p); failed_block_index = block_index_for_slot[failed_data_or_p]; for (i=0; i < data_disks; i++) if (failed_block_index == i) all_but_failed_blocks[i] = stripes[diskP]; else all_but_failed_blocks[i] = blocks[i]; xor_blocks(stripes[failed_data_or_p], all_but_failed_blocks, data_disks, chunk_size); qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size); } else { ensure_zero_has_size(chunk_size); if (failed_disk1 == diskP || failed_disk2 == diskP) { int failed_data, failed_block_index; if (failed_disk1 == diskP) failed_data = failed_disk2; else failed_data = failed_disk1; failed_block_index = block_index_for_slot[failed_data]; printf("Repairing D(%d) and P\n", failed_data); raid6_datap_recov(raid_disks, chunk_size, failed_block_index, (uint8_t**)blocks); } else { printf("Repairing D and D\n"); int failed_block_index1 = block_index_for_slot[failed_disk1]; int failed_block_index2 = block_index_for_slot[failed_disk2]; if (failed_block_index1 > failed_block_index2) { int t = failed_block_index1; failed_block_index1 = failed_block_index2; failed_block_index2 = t; } raid6_2data_recov(raid_disks, chunk_size, failed_block_index1, failed_block_index2, (uint8_t**)blocks); } } err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } lseek64(source[failed_disk1], offsets[failed_disk1] + start * chunk_size, 0); write(source[failed_disk1], stripes[failed_disk1], chunk_size); lseek64(source[failed_disk2], offsets[failed_disk2] + start * chunk_size, 0); write(source[failed_disk2], stripes[failed_disk2], chunk_size); err = unlock_all_stripes(info, sig); if(err != 0) goto exitCheck; } else if (disk >= 0 && repair == 2) { printf("Auto-repairing slot %d (%s)\n", disk, name[disk]); if (disk == diskQ) { qsyndrome(p, (uint8_t*)stripes[diskQ], (uint8_t**)blocks, data_disks, chunk_size); } else { char *all_but_failed_blocks[data_disks]; int failed_block_index = block_index_for_slot[disk]; for (i=0; i < data_disks; i++) if (failed_block_index == i) all_but_failed_blocks[i] = stripes[diskP]; else all_but_failed_blocks[i] = blocks[i]; xor_blocks(stripes[disk], all_but_failed_blocks, data_disks, chunk_size); } err = lock_stripe(info, start, chunk_size, data_disks, sig); if(err != 0) { if (err != 2) unlock_all_stripes(info, sig); goto exitCheck; } lseek64(source[disk], offsets[disk] + start * chunk_size, 0); write(source[disk], stripes[disk], chunk_size); err = unlock_all_stripes(info, sig); if(err != 0) goto exitCheck; } length--; start++; } exitCheck: free(stripe_buf); free(stripes); free(blocks); free(p); free(q); free(results); return err; }