Exemplo n.º 1
0
__int64 file_info::info_get_int(const char * name) const
{
	assert(is_valid_utf8(name));
	const char * val = info_get(name);
	if (val==0) return 0;
	return _atoi64(val);
}
Exemplo n.º 2
0
t_int64 file_info::info_get_int(const char * name) const
{
    PFC_ASSERT(pfc::is_valid_utf8(name));
    const char * val = info_get(name);
    if (val==0) return 0;
    return _atoi64(val);
}
Exemplo n.º 3
0
static void
cliVersion(char *cmdline) {
    const char *bootver;
    cli_printf("Hardware:       %d.%d\r\n", hwver >> 8, hwver & 0xff);
    cli_puts(  "Software:       " VERSION "\r\n");
    bootver = info_get(boot_table, INFO_BOOTVER);
    if (bootver == NULL) {
        bootver = "no bootloader";
    }
    cli_printf("Bootloader:     %s\r\n", bootver);
}
Exemplo n.º 4
0
bool file_info::is_encoding_lossy() const {
    const char * encoding = info_get("encoding");
    if (encoding != NULL) {
        if (pfc::stricmp_ascii(encoding,"lossy") == 0 /*|| pfc::stricmp_ascii(encoding,"hybrid") == 0*/) return true;
    } else {
        //the old way
        //disabled: don't whine if we're not sure what we're dealing with - might be a file with info not-yet-loaded in oddball cases or a mod file
        //if (info_get("bitspersample") == NULL) return true;
    }
    return false;
}
Exemplo n.º 5
0
void
main(void) {
    GPIO_OFF(E_NRST);
    unstick_i2c();
    GPIO_ON(E_NRST);
    setup_clocks((int)info_get(boot_table, INFO_HSE_FREQ));
    iwdg_start(4, 0xFFF);
    watchdog_main = watchdog_net = 5;
    ASSERT(xTaskCreate(main_thread, "main", MAIN_STACK_SIZE, NULL,
                THREAD_PRIO_MAIN, &thread_main));
    vTaskStartScheduler();
}
Exemplo n.º 6
0
/**
 * Check if we have to process the specified block index ::i.
 */
static int block_is_enabled(void* void_plan, block_off_t i)
{
	struct snapraid_plan* plan = void_plan;
	time_t blocktime;
	snapraid_info info;

	/* don't scrub unused blocks in all plans */
	info = info_get(&plan->state->infoarr, i);
	if (info == 0)
		return 0;

	/* bad blocks are always scrubbed in all plans */
	if (info_get_bad(info))
		return 1;

	switch (plan->plan) {
	case SCRUB_FULL :
		/* in 'full' plan everything is scrubbed */
		return 1;
	case SCRUB_EVEN :
		/* in 'even' plan, scrub only even blocks */
		return i % 2 == 0;
	case SCRUB_NEW :
		/* in 'sync' plan, only blocks never scrubbed */
		return info_get_justsynced(info);
	case SCRUB_BAD :
		/* in 'bad' plan, only bad blocks (already reported) */
		return 0;
	}

	/* if it's too new */
	blocktime = info_get_time(info);
	if (blocktime > plan->timelimit) {
		/* skip it */
		return 0;
	}

	/* if the time is less than the limit, always include */
	/* otherwise, check if we reached the last limit count */
	if (blocktime == plan->timelimit) {
		/* if we reached the count limit */
		if (plan->countlast >= plan->lastlimit) {
			/* skip it */
			return 0;
		}

		++plan->countlast;
	}

	return 1;
}
Exemplo n.º 7
0
int filter_correctness(int filter_error, tommy_arrayblkof* infoarr, struct snapraid_disk* disk, struct snapraid_file* file)
{
	unsigned i;

	if (!filter_error)
		return 0;

	/* check each block of the file */
	for (i = 0; i < file->blockmax; ++i) {
		block_off_t parity_pos = fs_file2par_get(disk, file, i);
		snapraid_info info = info_get(infoarr, parity_pos);

		/* if the file has a bad block, don't exclude it */
		if (info_get_bad(info))
			return 0;
	}

	/* the file is correct, so we filter it out */
	return 1;
}
Exemplo n.º 8
0
int state_status(struct snapraid_state* state)
{
	block_off_t blockmax;
	block_off_t i;
	snapraid_info* infomap;
	time_t now;
	block_off_t bad;
	block_off_t rehash;
	block_off_t count;
	unsigned dayoldest, daymedian, daynewest;
	unsigned bar[GRAPH_COLUMN];
	unsigned barpos;
	unsigned barmax;
	time_t oldest, newest, median;
	unsigned x, y;
	tommy_node* node_disk;
	unsigned file_count;
	unsigned file_fragmented;
	unsigned extra_fragment;
	uint64_t file_size;
	unsigned unsynced_blocks;

	/* get the present time */
	now = time(0);

	blockmax = parity_size(state);

	/* count fragments */
	file_count = 0;
	file_size = 0;
	file_fragmented = 0;
	extra_fragment = 0;
	for(node_disk=state->disklist;node_disk!=0;node_disk=node_disk->next) {
		struct snapraid_disk* disk = node_disk->data;
		tommy_node* node;
		unsigned disk_file_count = 0;
		unsigned disk_file_fragmented = 0;
		unsigned disk_extra_fragment = 0;
		uint64_t disk_file_size = 0;

		/* for each file in the disk */
		node = disk->filelist;
		while (node) {
			struct snapraid_file* file;

			file = node->data;
			node = node->next; /* next node */

			/* check fragmentation */
			if (file->size) {
				block_off_t prev_pos;
				int fragmented;

				fragmented = 0;
				prev_pos = file->blockvec[0].parity_pos;
				for(i=1;i<file->blockmax;++i) {
					block_off_t parity_pos = file->blockvec[i].parity_pos;
					if (prev_pos + 1 != parity_pos) {
						fragmented = 1;
						++extra_fragment;
						++disk_extra_fragment;
					}
					prev_pos = parity_pos;
				}

				if (fragmented) {
					++file_fragmented;
					++disk_file_fragmented;
				}
			}

			/* count files */
			++file_count;
			++disk_file_count;
			file_size += file->size;
			disk_file_size += file->size;
		}

		fprintf(stdlog, "summary:disk_file_count:%s:%u\n", disk->name, disk_file_count);
		fprintf(stdlog, "summary:disk_fragmented_file_count:%s:%u\n", disk->name, disk_file_fragmented);
		fprintf(stdlog, "summary:disk_excess_fragment_count:%s:%u\n", disk->name, disk_extra_fragment);
		fprintf(stdlog, "summary:disk_file_size:%s:%"PRIu64"\n", disk->name, disk_file_size);
	}

	printf("\n");

	printf("Files: %u\n", file_count);
	printf("Fragmented files: %u\n", file_fragmented);
	printf("Excess fragments: %u\n", extra_fragment);
	printf("Files size: %"PRIu64" GiB\n", file_size / (1024*1024*1024) );
	printf("Parity size: %"PRIu64" GiB\n", blockmax * (uint64_t)state->block_size / (1024*1024*1024) );

	printf("\n");

	fprintf(stdlog, "summary:file_count:%u\n", file_count);
	fprintf(stdlog, "summary:fragmented_file_count:%u\n", file_fragmented);
	fprintf(stdlog, "summary:excess_fragment_count:%u\n", extra_fragment);
	fprintf(stdlog, "summary:file_size:%"PRIu64"\n", file_size);
	fprintf(stdlog, "summary:parity_size:%"PRIu64"\n", blockmax * (uint64_t)state->block_size);
	fprintf(stdlog, "summary:hash:%s\n", hash_config_name(state->hash));
	fprintf(stdlog, "summary:prev_hash:%s\n", hash_config_name(state->prevhash));
	fprintf(stdlog, "summary:best_hash:%s\n", hash_config_name(state->besthash));
	fprintf(stdlog, "summary:block_count:%u\n", blockmax);
	fflush(stdlog);

	/* copy the info a temp vector, and count bad/rehash/unsynced blocks */
	infomap = malloc_nofail(blockmax * sizeof(snapraid_info));
	bad = 0;
	count = 0;
	rehash = 0;
	unsynced_blocks = 0;
	for(i=0;i<blockmax;++i) {
		int one_invalid;
		int one_valid;

		snapraid_info info = info_get(&state->infoarr, i);

		/* for each disk */
		one_invalid = 0;
		one_valid = 0;
		for(node_disk=state->disklist;node_disk!=0;node_disk=node_disk->next) {
			struct snapraid_disk* disk = node_disk->data;
			struct snapraid_block* block = disk_block_get(disk, i);

			if (block_has_file(block))
				one_valid = 1;
			if (block_has_invalid_parity(block))
				one_invalid = 1;
		}

		/* if both valid and invalid, we need to update */
		if (one_invalid && one_valid) {
			++unsynced_blocks;
		}

		/* skip unused blocks */
		if (info != 0) {
			if (info_get_bad(info))
				++bad;

			if (info_get_rehash(info))
				++rehash;

			infomap[count++] = info;
		}

		if (state->opt.gui) {
			if (info != 0)
				fprintf(stdlog, "block:%u:%"PRIu64":%s:%s:%s:%s\n", i, (uint64_t)info_get_time(info), one_valid ? "used": "", one_invalid ? "unsynced" : "", info_get_bad(info) ? "bad" : "", info_get_rehash(info) ? "rehash" : "");
			else
				fprintf(stdlog, "block_noinfo:%u:%s:%s\n", i, one_valid ? "used": "", one_invalid ? "unsynced" : "");
		}
	}

	fprintf(stdlog, "summary:has_unsynced:%u\n", unsynced_blocks);
	fprintf(stdlog, "summary:has_rehash:%u\n", rehash);
	fprintf(stdlog, "summary:has_bad:%u\n", bad);
	fprintf(stdlog, "summary:time_count:%u\n", count);
	fflush(stdlog);

	if (!count) {
		fprintf(stderr, "The array is empty.\n");
		free(infomap);
		return 0;
	}

	/* sort the info to get the time info */
	qsort(infomap, count, sizeof(snapraid_info), info_time_compare);

	/* info for making the graph */
	i = 0;
	while (i < count) {
		unsigned j = i + 1;
		while (j < count && info_get_time(infomap[i]) == info_get_time(infomap[j]))
			++j;
		fprintf(stdlog, "time:%"PRIu64":%u\n", (uint64_t)info_get_time(infomap[i]), j - i);
		i = j;
	}
	fflush(stdlog);

	oldest = info_get_time(infomap[0]);
	median = info_get_time(infomap[count / 2]);
	newest = info_get_time(infomap[count - 1]);
	dayoldest = day_ago(oldest, now);
	daymedian = day_ago(median, now);
	daynewest = day_ago(newest, now);

	/* compute graph limits */
	barpos = 0;
	barmax = 0;
	for(i=0;i<GRAPH_COLUMN;++i) {
		time_t limit;
		unsigned step;

		limit = oldest + (newest - oldest) * (i+1) / GRAPH_COLUMN;

		step = 0;
		while (barpos < count && info_get_time(infomap[barpos]) <= limit) {
			++barpos;
			++step;
		}

		if (step > barmax)
			barmax = step;

		bar[i] = step;
	}

	/* print the graph */
	for(y=0;y<GRAPH_ROW;++y) {
		if (y == 0)
			printf("%3u%% ", barmax * 100 / count);
		else if (y == GRAPH_ROW - 1)
			printf("  0%% ");
		else if (y == GRAPH_ROW/2)
			printf("%3u%% ", barmax * 50 / count);
		else
			printf("     ");
		for(x=0;x<GRAPH_COLUMN;++x) {
			unsigned pivot = barmax * (GRAPH_ROW-y) / GRAPH_ROW;
			/* if it's the baseline */
			if (y == GRAPH_ROW-1) {
				if (bar[x] >= pivot) {
					printf("*");
				} else if (bar[x] > 0) {
					printf("+");
				} else {
					printf("_");
				}
			} else {
				unsigned halfpivot = pivot - barmax / GRAPH_ROW / 2;
				if (bar[x] >= pivot) {
					printf("*");
				} else if (bar[x] >= halfpivot) {
					printf("+");
				} else {
					printf(" ");
				}
			}
		}
		printf("\n");
	}
	printf("   %3u                    days ago of the last scrub                    %3u\n", dayoldest, daynewest);

	printf("\n");

	printf("The oldest block was scrubbed %u days ago, the median %u, the newest %u.\n", dayoldest, daymedian, daynewest);

	printf("\n");

	if (unsynced_blocks) {
		printf("WARNING! The array is NOT fully synced.\n");
		printf("You have a sync in progress at %u%%.\n", (blockmax - unsynced_blocks) * 100 / blockmax);
	} else {
		printf("No sync is in progress.\n");
	}

	if (rehash) {
		printf("You have a rehash in progress at %u%%.\n", (count - rehash) * 100 / count);
	} else {
		if (state->besthash != state->hash) {
			printf("No rehash is in progress, but for optimal performance one is recommended.\n");
		} else {
			printf("No rehash is in progress or needed.\n");
		}
	}

	if (bad) {
		printf("DANGER! In the array there are %u silent errors!\n\n", bad);

		printf("They are at blocks:");

		/* print all the errors */
		for(i=0;i<blockmax;++i) {
			snapraid_info info = info_get(&state->infoarr, i);

			/* skip unused blocks */
			if (info == 0)
				continue;
			
			if (info_get_bad(info))
				printf(" %u", i);
		}

		printf("\n\n");

		printf("To fix them use the command 'snapraid -e fix'.\n");
		printf("The errors will disapper from the 'status' at the next 'scrub' command.\n");
	} else {
		printf("No silent error detected.\n");
	}

	/* free the temp vector */
	free(infomap);

	return 0;
}
Exemplo n.º 9
0
int state_scrub(struct snapraid_state* state, int percentage, int olderthan)
{
	block_off_t blockmax;
	block_off_t countlimit;
	block_off_t i;
	time_t timelimit;
	time_t recentlimit;
	unsigned count;
	int ret;
	struct snapraid_parity parity[LEV_MAX];
	struct snapraid_parity* parity_ptr[LEV_MAX];
	snapraid_info* infomap;
	unsigned error;
	time_t now;
	unsigned l;

	/* get the present time */
	now = time(0);

	printf("Initializing...\n");

	blockmax = parity_size(state);

	if (state->opt.force_scrub_even) {
		/* no limit */
		countlimit = blockmax;
		recentlimit = now;
	} else if (state->opt.force_scrub) {
		/* scrub the specified amount of blocks */
		countlimit = state->opt.force_scrub;
		recentlimit = now;
	} else {
		/* by default scrub 1/12 of the array */
		countlimit = md(blockmax, 1, 12);

		if (percentage != -1)
			countlimit = md(blockmax, percentage, 100);

		/* by default use a 10 day time limit */
		recentlimit = now - 10 * 24 * 3600;

		if (olderthan != -1)
			recentlimit = now - olderthan * 24 * 3600;
	}

	/* identify the time limit */
	/* we sort all the block times, and we identify the time limit for which we reach the quota */
	/* this allow to process first the oldest blocks */
	infomap = malloc_nofail(blockmax * sizeof(snapraid_info));

	/* copy the info in the temp vector */
	count = 0;
	for(i=0;i<blockmax;++i) {
		snapraid_info info = info_get(&state->infoarr, i);

		/* skip unused blocks */
		if (info == 0)
			continue;

		infomap[count++] = info;
	}

	if (!count) {
		/* LCOV_EXCL_START */
		fprintf(stderr, "The array appears to be empty.\n");
		exit(EXIT_FAILURE);
		/* LCOV_EXCL_STOP */
	}

	/* sort it */
	qsort(infomap, count, sizeof(snapraid_info), info_time_compare);

	/* don't check more block than the available ones */
	if (countlimit > count)
		countlimit = count;

	if (countlimit > 0) {
		/* get the time limit */
		timelimit = info_get_time(infomap[countlimit - 1]);

		/* don't scrub too recent blocks */
		if (timelimit > recentlimit) {
			timelimit = recentlimit;
		}
	} else {
		/* if we select a 0 percentage, disable also the time limit */
		timelimit = 0;
	}

	/* free the temp vector */
	free(infomap);

	/* open the file for reading */
	for(l=0;l<state->level;++l) {
		parity_ptr[l] = &parity[l];
		ret = parity_open(parity_ptr[l], state->parity_path[l], state->opt.skip_sequential);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			fprintf(stderr, "WARNING! Without an accessible %s file, it isn't possible to scrub.\n", lev_name(l));
			exit(EXIT_FAILURE);
			/* LCOV_EXCL_STOP */
		}
	}

	printf("Scrubbing...\n");

	error = 0;

	ret = state_scrub_process(state, parity_ptr, 0, blockmax, timelimit, countlimit, now);
	if (ret == -1) {
		++error;
		/* continue, as we are already exiting */
	}

	for(l=0;l<state->level;++l) {
		ret = parity_close(parity_ptr[l]);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			fprintf(stderr, "DANGER! Unexpected close error in %s disk.\n", lev_name(l));
			++error;
			/* continue, as we are already exiting */
			/* LCOV_EXCL_STOP */
		}
	}

	/* abort if required */
	if (error != 0)
		return -1;
	return 0;
}
Exemplo n.º 10
0
static int state_scrub_process(struct snapraid_state* state, struct snapraid_parity** parity, block_off_t blockstart, block_off_t blockmax, time_t timelimit, block_off_t countlimit, time_t now)
{
	struct snapraid_handle* handle;
	void* rehandle_alloc;
	struct snapraid_rehash* rehandle;
	unsigned diskmax;
	block_off_t i;
	unsigned j;
	void* buffer_alloc;
	void** buffer;
	unsigned buffermax;
	data_off_t countsize;
	block_off_t countpos;
	block_off_t countmax;
	block_off_t recountmax;
	block_off_t autosavedone;
	block_off_t autosavelimit;
	block_off_t autosavemissing;
	int ret;
	unsigned error;
	unsigned silent_error;
	unsigned l;

	/* maps the disks to handles */
	handle = handle_map(state, &diskmax);

	/* rehash buffers */
	rehandle = malloc_nofail_align(diskmax * sizeof(struct snapraid_rehash), &rehandle_alloc);

	/* we need disk + 2 for each parity level buffers */
	buffermax = diskmax + state->level * 2;

	buffer = malloc_nofail_vector_align(diskmax, buffermax, state->block_size, &buffer_alloc);
	if (!state->opt.skip_self)
		mtest_vector(buffermax, state->block_size, buffer);

	error = 0;
	silent_error = 0;

	/* first count the number of blocks to process */
	countmax = 0;
	for(i=blockstart;i<blockmax;++i) {
		time_t blocktime;
		snapraid_info info;

		/* if it's unused */
		info = info_get(&state->infoarr, i);
		if (info == 0) {
			/* skip it */
			continue;
		}

		/* blocks marked as bad are always checked */
		if (!info_get_bad(info)) {

			/* if it's too new */
			blocktime = info_get_time(info);
			if (blocktime > timelimit) {
				/* skip it */
				continue;
			}

			/* skip odd blocks, used only for testing */
			if (state->opt.force_scrub_even && (i % 2) != 0) {
				/* skip it */
				continue;
			}

			/* if the time is less than the limit, always include */
			/* otherwise, check if we reached the max count */
			if (blocktime == timelimit) {
				/* if we reached the count limit */
				if (countmax >= countlimit) {
					/* skip it */
					continue;
				}
			}
		}

		++countmax;
	}

	/* compute the autosave size for all disk, even if not read */
	/* this makes sense because the speed should be almost the same */
	/* if the disks are read in parallel */
	autosavelimit = state->autosave / (diskmax * state->block_size);
	autosavemissing = countmax; /* blocks to do */
	autosavedone = 0; /* blocks done */

	countsize = 0;
	countpos = 0;
	state_progress_begin(state, blockstart, blockmax, countmax);
	recountmax = 0;
	for(i=blockstart;i<blockmax;++i) {
		time_t blocktime;
		snapraid_info info;
		int error_on_this_block;
		int silent_error_on_this_block;
		int block_is_unsynced;
		int rehash;

		/* if it's unused */
		info = info_get(&state->infoarr, i);
		if (info == 0) {
			/* skip it */
			continue;
		}

		/* blocks marked as bad are always checked */
		if (!info_get_bad(info)) {

			/* if it's too new */
			blocktime = info_get_time(info);
			if (blocktime > timelimit) {
				/* skip it */
				continue;
			}

			/* skip odd blocks, used only for testing */
			if (state->opt.force_scrub_even && (i % 2) != 0) {
				/* skip it */
				continue;
			}

			/* if the time is less than the limit, always include */
			/* otherwise, check if we reaced the count max */
			if (blocktime == timelimit) {
				/* if we reached the count limit */
				if (recountmax >= countlimit) {
					/* skip it */
					continue;
				}
			}
		}

		++recountmax;

		/* one more block processed for autosave */
		++autosavedone;
		--autosavemissing;

		/* by default process the block, and skip it if something goes wrong */
		error_on_this_block = 0;
		silent_error_on_this_block = 0;

		/* if all the blocks at this address are synced */
		/* if not, parity is not even checked */
		block_is_unsynced = 0;

		/* if we have to use the old hash */
		rehash = info_get_rehash(info);

		/* for each disk, process the block */
		for(j=0;j<diskmax;++j) {
			int read_size;
			unsigned char hash[HASH_SIZE];
			struct snapraid_block* block;
			int file_is_unsynced;

			/* if the file on this disk is synced */
			/* if not, silent errors are assumed as expected error */
			file_is_unsynced = 0;

			/* by default no rehash in case of "continue" */
			rehandle[j].block = 0;

			/* if the disk position is not used */
			if (!handle[j].disk) {
				/* use an empty block */
				memset(buffer[j], 0, state->block_size);
				continue;
			}

			/* if the block is not used */
			block = disk_block_get(handle[j].disk, i);
			if (!block_has_file(block)) {
				/* use an empty block */
				memset(buffer[j], 0, state->block_size);
				continue;
			}

			/* if the block is unsynced, errors are expected */
			if (block_has_invalid_parity(block)) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;

				/* follow */
			}

			/* if the file is different than the current one, close it */
			if (handle[j].file != 0 && handle[j].file != block_file_get(block)) {
				/* keep a pointer at the file we are going to close for error reporting */
				struct snapraid_file* file = handle[j].file;
				ret = handle_close(&handle[j]);
				if (ret == -1) {
					/* LCOV_EXCL_START */
					/* This one is really an unexpected error, because we are only reading */
					/* and closing a descriptor should never fail */
					fprintf(stdlog, "error:%u:%s:%s: Close error. %s\n", i, handle[j].disk->name, file->sub, strerror(errno));
					fprintf(stderr, "DANGER! Unexpected close error in a data disk, it isn't possible to scrub.\n");
					printf("Stopping at block %u\n", i);
					++error;
					goto bail;
					/* LCOV_EXCL_STOP */
				}
			}

			ret = handle_open(&handle[j], block_file_get(block), state->opt.skip_sequential, stderr);
			if (ret == -1) {
				/* file we have tried to open for error reporting */
				struct snapraid_file* file = block_file_get(block);
				fprintf(stdlog, "error:%u:%s:%s: Open error. %s\n", i, handle[j].disk->name, file->sub, strerror(errno));
				++error;
				error_on_this_block = 1;
				continue;
			}

			/* check if the file is changed */
			if (handle[j].st.st_size != block_file_get(block)->size
				|| handle[j].st.st_mtime != block_file_get(block)->mtime_sec
				|| STAT_NSEC(&handle[j].st) != block_file_get(block)->mtime_nsec
				|| handle[j].st.st_ino != block_file_get(block)->inode
			) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;

				/* follow */
			}

			/* note that we intentionally don't abort if the file has different attributes */
			/* from the last sync, as we are expected to return errors if running */
			/* in an unsynced array. This is just like the check command. */

			read_size = handle_read(&handle[j], block, buffer[j], state->block_size, stderr);
			if (read_size == -1) {
				fprintf(stdlog, "error:%u:%s:%s: Read error at position %u\n", i, handle[j].disk->name, handle[j].file->sub, block_file_pos(block));
				++error;
				error_on_this_block = 1;
				continue;
			}

			countsize += read_size;

			/* now compute the hash */
			if (rehash) {
				memhash(state->prevhash, state->prevhashseed, hash, buffer[j], read_size);

				/* compute the new hash, and store it */
				rehandle[j].block = block;
				memhash(state->hash, state->hashseed, rehandle[j].hash, buffer[j], read_size);
			} else {
				memhash(state->hash, state->hashseed, hash, buffer[j], read_size);
			}

			if (block_has_updated_hash(block)) {
				/* compare the hash */
				if (memcmp(hash, block->hash, HASH_SIZE) != 0) {
					fprintf(stdlog, "error:%u:%s:%s: Data error at position %u\n", i, handle[j].disk->name, handle[j].file->sub, block_file_pos(block));

					/* it's a silent error only if we are dealing with synced files */
					if (file_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						fprintf(stderr, "Data error in file '%s' at position '%u'\n", handle[j].path, block_file_pos(block));
						fprintf(stderr, "WARNING! Unexpected data error in a data disk! The block is now marked as bad!\n");
						fprintf(stderr, "Try with 'snapraid -e fix' to recover!\n");

						++silent_error;
						silent_error_on_this_block = 1;
					}
					continue;
				}
			}
		}

		/* if we have read all the data required and it's correct, proceed with the parity check */
		if (!error_on_this_block && !silent_error_on_this_block) {
			unsigned char* buffer_recov[LEV_MAX];

			/* buffers for parity read and not computed */
			for(l=0;l<state->level;++l)
				buffer_recov[l] = buffer[diskmax + state->level + l];
			for(;l<LEV_MAX;++l)
				buffer_recov[l] = 0;

			/* read the parity */
			for(l=0;l<state->level;++l) {
				ret = parity_read(parity[l], i, buffer_recov[l], state->block_size, stdlog);
				if (ret == -1) {
					buffer_recov[l] = 0;
					fprintf(stdlog, "parity_error:%u:%s: Read error\n", i, lev_config_name(l));
					++error;
					error_on_this_block = 1;

					/* follow */
				}
			}

			/* compute the parity */
			raid_gen(diskmax, state->level, state->block_size, buffer);

			/* compare the parity */
			for(l=0;l<state->level;++l) {
				if (buffer_recov[l] && memcmp(buffer[diskmax + l], buffer_recov[l], state->block_size) != 0) {
					fprintf(stdlog, "parity_error:%u:%s: Data error\n", i, lev_config_name(l));

					/* it's a silent error only if we are dealing with synced blocks */
					if (block_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						fprintf(stderr, "Data error in parity '%s' at position '%u'\n", lev_config_name(l), i);
						fprintf(stderr, "WARNING! Unexpected data error in a parity disk! The block is now marked as bad!\n");
						fprintf(stderr, "Try with 'snapraid -e fix' to recover!\n");

						++silent_error;
						silent_error_on_this_block = 1;
					}
				}
			}
		}

		if (silent_error_on_this_block) {
			/* set the error status keeping the existing time and hash */
			info_set(&state->infoarr, i, info_set_bad(info));
		} else if (error_on_this_block) {
			/* do nothing, as this is a generic error */
			/* likely caused by a not synced array */
		} else {
			/* if rehash is needed */
			if (rehash) {
				/* store all the new hash already computed */
				for(j=0;j<diskmax;++j) {
					if (rehandle[j].block)
						memcpy(rehandle[j].block->hash, rehandle[j].hash, HASH_SIZE);
				}
			}

			/* update the time info of the block */
			/* and clear any other flag */
			info_set(&state->infoarr, i, info_make(now, 0, 0));
		}

		/* mark the state as needing write */
		state->need_write = 1;

		/* count the number of processed block */
		++countpos;

		/* progress */
		if (state_progress(state, i, countpos, countmax, countsize)) {
			/* LCOV_EXCL_START */
			break;
			/* LCOV_EXCL_STOP */
		}

		/* autosave */
		if (state->autosave != 0
			&& autosavedone >= autosavelimit /* if we have reached the limit */
			&& autosavemissing >= autosavelimit /* if we have at least a full step to do */
		) {
			autosavedone = 0; /* restart the counter */

			state_progress_stop(state);

			printf("Autosaving...\n");
			state_write(state);

			state_progress_restart(state);
		}
	}

	state_progress_end(state, countpos, countmax, countsize);

	if (error || silent_error) {
		printf("\n");
		printf("%8u read errors\n", error);
		printf("%8u data errors\n", silent_error);
		printf("WARNING! There are errors!\n");
	} else {
		/* print the result only if processed something */
		if (countpos != 0)
			printf("Everything OK\n");
	}

	fprintf(stdlog, "summary:error_read:%u\n", error);
	fprintf(stdlog, "summary:error_data:%u\n", silent_error);
	if (error + silent_error == 0)
		fprintf(stdlog, "summary:exit:ok\n");
	else
		fprintf(stdlog, "summary:exit:error\n");
	fflush(stdlog);

bail:
	for(j=0;j<diskmax;++j) {
		ret = handle_close(&handle[j]);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			fprintf(stderr, "DANGER! Unexpected close error in a data disk.\n");
			++error;
			/* continue, as we are already exiting */
			/* LCOV_EXCL_STOP */
		}
	}

	free(handle);
	free(buffer_alloc);
	free(buffer);
	free(rehandle_alloc);

	if (state->opt.expect_recoverable) {
		if (error + silent_error == 0)
			return -1;
	} else {
		if (error + silent_error != 0)
			return -1;
	}
	return 0;
}
Exemplo n.º 11
0
double file_info::info_get_float(const char * name)
{
	const char * ptr = info_get(name);
	if (ptr) return pfc_string_to_float(ptr);
	else return 0;
}
Exemplo n.º 12
0
int state_scrub(struct snapraid_state* state, int plan, int olderthan)
{
	block_off_t blockmax;
	block_off_t countlimit;
	block_off_t i;
	block_off_t count;
	time_t recentlimit;
	int ret;
	struct snapraid_parity_handle parity_handle[LEV_MAX];
	struct snapraid_plan ps;
	time_t* timemap;
	unsigned error;
	time_t now;
	unsigned l;

	/* get the present time */
	now = time(0);

	msg_progress("Initializing...\n");

	if ((plan == SCRUB_BAD || plan == SCRUB_NEW || plan == SCRUB_FULL)
		&& olderthan >= 0) {
		/* LCOV_EXCL_START */
		log_fatal("You can specify -o, --older-than only with a numeric percentage.\n");
		exit(EXIT_FAILURE);
		/* LCOV_EXCL_STOP */
	}

	blockmax = parity_allocated_size(state);

	/* preinitialize to avoid warnings */
	countlimit = 0;
	recentlimit = 0;

	ps.state = state;
	if (state->opt.force_scrub_even) {
		ps.plan = SCRUB_EVEN;
	} else if (plan == SCRUB_FULL) {
		ps.plan = SCRUB_FULL;
	} else if (plan == SCRUB_NEW) {
		ps.plan = SCRUB_NEW;
	} else if (plan == SCRUB_BAD) {
		ps.plan = SCRUB_BAD;
	} else if (state->opt.force_scrub_at) {
		/* scrub the specified amount of blocks */
		ps.plan = SCRUB_AUTO;
		countlimit = state->opt.force_scrub_at;
		recentlimit = now;
	} else {
		ps.plan = SCRUB_AUTO;
		if (plan >= 0) {
			countlimit = md(blockmax, plan, 100);
		} else {
			/* by default scrub 8.33% of the array (100/12=8.(3)) */
			countlimit = md(blockmax, 1, 12);
		}

		if (olderthan >= 0) {
			recentlimit = now - olderthan * 24 * 3600;
		} else {
			/* by default use a 10 day time limit */
			recentlimit = now - 10 * 24 * 3600;
		}
	}

	/* identify the time limit */
	/* we sort all the block times, and we identify the time limit for which we reach the quota */
	/* this allow to process first the oldest blocks */
	timemap = malloc_nofail(blockmax * sizeof(time_t));

	/* copy the info in the temp vector */
	count = 0;
	log_tag("block_count:%u\n", blockmax);
	for (i = 0; i < blockmax; ++i) {
		snapraid_info info = info_get(&state->infoarr, i);

		/* skip unused blocks */
		if (info == 0)
			continue;

		timemap[count++] = info_get_time(info);
	}

	if (!count) {
		/* LCOV_EXCL_START */
		log_fatal("The array appears to be empty.\n");
		exit(EXIT_FAILURE);
		/* LCOV_EXCL_STOP */
	}

	/* sort it */
	qsort(timemap, count, sizeof(time_t), time_compare);

	/* output the info map */
	i = 0;
	log_tag("info_count:%u\n", count);
	while (i < count) {
		unsigned j = i + 1;
		while (j < count && timemap[i] == timemap[j])
			++j;
		log_tag("info_time:%" PRIu64 ":%u\n", (uint64_t)timemap[i], j - i);
		i = j;
	}

	/* compute the limits from count/recentlimit */
	if (ps.plan == SCRUB_AUTO) {
		/* no more than the full count */
		if (countlimit > count)
			countlimit = count;

		/* decrease until we reach the specific recentlimit */
		while (countlimit > 0 && timemap[countlimit - 1] > recentlimit)
			--countlimit;

		/* if there is something to scrub */
		if (countlimit > 0) {
			/* get the most recent time we want to scrub */
			ps.timelimit = timemap[countlimit - 1];

			/* count how many entries for this exact time we have to scrub */
			/* if the blocks have all the same time, we end with countlimit == lastlimit */
			ps.lastlimit = 1;
			while (countlimit > ps.lastlimit && timemap[countlimit - ps.lastlimit - 1] == ps.timelimit)
				++ps.lastlimit;
		} else {
			/* if nothing to scrub, disable also other limits */
			ps.timelimit = 0;
			ps.lastlimit = 0;
		}

		log_tag("count_limit:%u\n", countlimit);
		log_tag("time_limit:%" PRIu64 "\n", (uint64_t)ps.timelimit);
		log_tag("last_limit:%u\n", ps.lastlimit);
	}

	/* free the temp vector */
	free(timemap);

	/* open the file for reading */
	for (l = 0; l < state->level; ++l) {
		ret = parity_open(&parity_handle[l], l, state->parity[l].path, state->file_mode);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			log_fatal("WARNING! Without an accessible %s file, it isn't possible to scrub.\n", lev_name(l));
			exit(EXIT_FAILURE);
			/* LCOV_EXCL_STOP */
		}
	}

	msg_progress("Scrubbing...\n");

	error = 0;

	ret = state_scrub_process(state, parity_handle, 0, blockmax, &ps, now);
	if (ret == -1) {
		++error;
		/* continue, as we are already exiting */
	}

	for (l = 0; l < state->level; ++l) {
		ret = parity_close(&parity_handle[l]);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			log_fatal("DANGER! Unexpected close error in %s disk.\n", lev_name(l));
			++error;
			/* continue, as we are already exiting */
			/* LCOV_EXCL_STOP */
		}
	}

	/* abort if required */
	if (error != 0)
		return -1;
	return 0;
}
Exemplo n.º 13
0
static int state_scrub_process(struct snapraid_state* state, struct snapraid_parity_handle* parity_handle, block_off_t blockstart, block_off_t blockmax, struct snapraid_plan* plan, time_t now)
{
	struct snapraid_io io;
	struct snapraid_handle* handle;
	void* rehandle_alloc;
	struct snapraid_rehash* rehandle;
	unsigned diskmax;
	block_off_t blockcur;
	unsigned j;
	unsigned buffermax;
	data_off_t countsize;
	block_off_t countpos;
	block_off_t countmax;
	block_off_t autosavedone;
	block_off_t autosavelimit;
	block_off_t autosavemissing;
	int ret;
	unsigned error;
	unsigned silent_error;
	unsigned io_error;
	unsigned l;
	unsigned* waiting_map;
	unsigned waiting_mac;
	char esc_buffer[ESC_MAX];

	/* maps the disks to handles */
	handle = handle_mapping(state, &diskmax);

	/* rehash buffers */
	rehandle = malloc_nofail_align(diskmax * sizeof(struct snapraid_rehash), &rehandle_alloc);

	/* we need 1 * data + 2 * parity */
	buffermax = diskmax + 2 * state->level;

	/* initialize the io threads */
	io_init(&io, state, state->opt.io_cache, buffermax, scrub_data_reader, handle, diskmax, scrub_parity_reader, 0, parity_handle, state->level);

	/* possibly waiting disks */
	waiting_mac = diskmax > RAID_PARITY_MAX ? diskmax : RAID_PARITY_MAX;
	waiting_map = malloc_nofail(waiting_mac * sizeof(unsigned));

	error = 0;
	silent_error = 0;
	io_error = 0;

	/* first count the number of blocks to process */
	countmax = 0;
	plan->countlast = 0;
	for (blockcur = blockstart; blockcur < blockmax; ++blockcur) {
		if (!block_is_enabled(plan, blockcur))
			continue;
		++countmax;
	}

	/* compute the autosave size for all disk, even if not read */
	/* this makes sense because the speed should be almost the same */
	/* if the disks are read in parallel */
	autosavelimit = state->autosave / (diskmax * state->block_size);
	autosavemissing = countmax; /* blocks to do */
	autosavedone = 0; /* blocks done */

	/* drop until now */
	state_usage_waste(state);

	countsize = 0;
	countpos = 0;
	plan->countlast = 0;

	/* start all the worker threads */
	io_start(&io, blockstart, blockmax, &block_is_enabled, plan);

	state_progress_begin(state, blockstart, blockmax, countmax);
	while (1) {
		unsigned char* buffer_recov[LEV_MAX];
		snapraid_info info;
		int error_on_this_block;
		int silent_error_on_this_block;
		int io_error_on_this_block;
		int block_is_unsynced;
		int rehash;
		void** buffer;

		/* go to the next block */
		blockcur = io_read_next(&io, &buffer);
		if (blockcur >= blockmax)
			break;

		/* until now is scheduling */
		state_usage_sched(state);

		/* one more block processed for autosave */
		++autosavedone;
		--autosavemissing;

		/* by default process the block, and skip it if something goes wrong */
		error_on_this_block = 0;
		silent_error_on_this_block = 0;
		io_error_on_this_block = 0;

		/* if all the blocks at this address are synced */
		/* if not, parity is not even checked */
		block_is_unsynced = 0;

		/* get block specific info */
		info = info_get(&state->infoarr, blockcur);

		/* if we have to use the old hash */
		rehash = info_get_rehash(info);

		/* for each disk, process the block */
		for (j = 0; j < diskmax; ++j) {
			struct snapraid_task* task;
			int read_size;
			unsigned char hash[HASH_SIZE];
			struct snapraid_block* block;
			int file_is_unsynced;
			struct snapraid_disk* disk;
			struct snapraid_file* file;
			block_off_t file_pos;
			unsigned diskcur;

			/* if the file on this disk is synced */
			/* if not, silent errors are assumed as expected error */
			file_is_unsynced = 0;

			/* until now is misc */
			state_usage_misc(state);

			/* get the next task */
			task = io_data_read(&io, &diskcur, waiting_map, &waiting_mac);

			/* until now is disk */
			state_usage_disk(state, handle, waiting_map, waiting_mac);

			/* get the task results */
			disk = task->disk;
			block = task->block;
			file = task->file;
			file_pos = task->file_pos;
			read_size = task->read_size;

			/* by default no rehash in case of "continue" */
			rehandle[diskcur].block = 0;

			/* if the disk position is not used */
			if (!disk)
				continue;

			/* if the block is unsynced, errors are expected */
			if (block_has_invalid_parity(block)) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;
				/* follow */
			}

			/* if the block is not used */
			if (!block_has_file(block))
				continue;

			/* if the block is unsynced, errors are expected */
			if (task->is_timestamp_different) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;
				/* follow */
			}

			/* handle error conditions */
			if (task->state == TASK_STATE_IOERROR) {
				++io_error;
				goto bail;
			}
			if (task->state == TASK_STATE_ERROR) {
				++error;
				goto bail;
			}
			if (task->state == TASK_STATE_ERROR_CONTINUE) {
				++error;
				error_on_this_block = 1;
				continue;
			}
			if (task->state == TASK_STATE_IOERROR_CONTINUE) {
				++io_error;
				if (io_error >= state->opt.io_error_limit) {
					/* LCOV_EXCL_START */
					log_fatal("DANGER! Too many input/output read error in a data disk, it isn't possible to scrub.\n");
					log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, task->path);
					log_fatal("Stopping at block %u\n", blockcur);
					goto bail;
					/* LCOV_EXCL_STOP */
				}

				/* otherwise continue */
				io_error_on_this_block = 1;
				continue;
			}
			if (task->state != TASK_STATE_DONE) {
				/* LCOV_EXCL_START */
				log_fatal("Internal inconsistency in task state\n");
				os_abort();
				/* LCOV_EXCL_STOP */
			}

			countsize += read_size;

			/* now compute the hash */
			if (rehash) {
				memhash(state->prevhash, state->prevhashseed, hash, buffer[diskcur], read_size);

				/* compute the new hash, and store it */
				rehandle[diskcur].block = block;
				memhash(state->hash, state->hashseed, rehandle[diskcur].hash, buffer[diskcur], read_size);
			} else {
				memhash(state->hash, state->hashseed, hash, buffer[diskcur], read_size);
			}

			/* until now is hash */
			state_usage_hash(state);

			if (block_has_updated_hash(block)) {
				/* compare the hash */
				if (memcmp(hash, block->hash, HASH_SIZE) != 0) {
					unsigned diff = memdiff(hash, block->hash, HASH_SIZE);

					log_tag("error:%u:%s:%s: Data error at position %u, diff bits %u\n", blockcur, disk->name, esc(file->sub, esc_buffer), file_pos, diff);

					/* it's a silent error only if we are dealing with synced files */
					if (file_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						log_error("Data error in file '%s' at position '%u', diff bits %u\n", task->path, file_pos, diff);
						++silent_error;
						silent_error_on_this_block = 1;
					}
					continue;
				}
			}
		}

		/* buffers for parity read and not computed */
		for (l = 0; l < state->level; ++l)
			buffer_recov[l] = buffer[diskmax + state->level + l];
		for (; l < LEV_MAX; ++l)
			buffer_recov[l] = 0;

		/* until now is misc */
		state_usage_misc(state);

		/* read the parity */
		for (l = 0; l < state->level; ++l) {
			struct snapraid_task* task;
			unsigned levcur;

			task = io_parity_read(&io, &levcur, waiting_map, &waiting_mac);

			/* until now is parity */
			state_usage_parity(state, waiting_map, waiting_mac);

			/* handle error conditions */
			if (task->state == TASK_STATE_IOERROR) {
				++io_error;
				goto bail;
			}
			if (task->state == TASK_STATE_ERROR) {
				++error;
				goto bail;
			}
			if (task->state == TASK_STATE_ERROR_CONTINUE) {
				++error;
				error_on_this_block = 1;

				/* if continuing on error, clear the missing buffer */
				buffer_recov[levcur] = 0;
				continue;
			}
			if (task->state == TASK_STATE_IOERROR_CONTINUE) {
				++io_error;
				if (io_error >= state->opt.io_error_limit) {
					/* LCOV_EXCL_START */
					log_fatal("DANGER! Too many input/output read error in the %s disk, it isn't possible to scrub.\n", lev_name(levcur));
					log_fatal("Ensure that disk '%s' is sane and can be read.\n", lev_config_name(levcur));
					log_fatal("Stopping at block %u\n", blockcur);
					goto bail;
					/* LCOV_EXCL_STOP */
				}

				/* otherwise continue */
				io_error_on_this_block = 1;

				/* if continuing on error, clear the missing buffer */
				buffer_recov[levcur] = 0;
				continue;
			}
			if (task->state != TASK_STATE_DONE) {
				/* LCOV_EXCL_START */
				log_fatal("Internal inconsistency in task state\n");
				os_abort();
				/* LCOV_EXCL_STOP */
			}
		}

		/* if we have read all the data required and it's correct, proceed with the parity check */
		if (!error_on_this_block && !silent_error_on_this_block && !io_error_on_this_block) {

			/* compute the parity */
			raid_gen(diskmax, state->level, state->block_size, buffer);

			/* compare the parity */
			for (l = 0; l < state->level; ++l) {
				if (buffer_recov[l] && memcmp(buffer[diskmax + l], buffer_recov[l], state->block_size) != 0) {
					unsigned diff = memdiff(buffer[diskmax + l], buffer_recov[l], state->block_size);

					log_tag("parity_error:%u:%s: Data error, diff bits %u\n", blockcur, lev_config_name(l), diff);

					/* it's a silent error only if we are dealing with synced blocks */
					if (block_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						log_fatal("Data error in parity '%s' at position '%u', diff bits %u\n", lev_config_name(l), blockcur, diff);
						++silent_error;
						silent_error_on_this_block = 1;
					}
				}
			}

			/* until now is raid */
			state_usage_raid(state);
		}

		if (silent_error_on_this_block || io_error_on_this_block) {
			/* set the error status keeping other info */
			info_set(&state->infoarr, blockcur, info_set_bad(info));
		} else if (error_on_this_block) {
			/* do nothing, as this is a generic error */
			/* likely caused by a not synced array */
		} else {
			/* if rehash is needed */
			if (rehash) {
				/* store all the new hash already computed */
				for (j = 0; j < diskmax; ++j) {
					if (rehandle[j].block)
						memcpy(rehandle[j].block->hash, rehandle[j].hash, HASH_SIZE);
				}
			}

			/* update the time info of the block */
			/* and clear any other flag */
			info_set(&state->infoarr, blockcur, info_make(now, 0, 0, 0));
		}

		/* mark the state as needing write */
		state->need_write = 1;

		/* count the number of processed block */
		++countpos;

		/* progress */
		if (state_progress(state, &io, blockcur, countpos, countmax, countsize)) {
			/* LCOV_EXCL_START */
			break;
			/* LCOV_EXCL_STOP */
		}

		/* autosave */
		if (state->autosave != 0
			&& autosavedone >= autosavelimit /* if we have reached the limit */
			&& autosavemissing >= autosavelimit /* if we have at least a full step to do */
		) {
			autosavedone = 0; /* restart the counter */

			/* until now is misc */
			state_usage_misc(state);

			state_progress_stop(state);

			msg_progress("Autosaving...\n");
			state_write(state);

			state_progress_restart(state);

			/* drop until now */
			state_usage_waste(state);
		}
	}

	state_progress_end(state, countpos, countmax, countsize);

	state_usage_print(state);

	if (error || silent_error || io_error) {
		msg_status("\n");
		msg_status("%8u file errors\n", error);
		msg_status("%8u io errors\n", io_error);
		msg_status("%8u data errors\n", silent_error);
	} else {
		/* print the result only if processed something */
		if (countpos != 0)
			msg_status("Everything OK\n");
	}

	if (error)
		log_fatal("WARNING! Unexpected file errors!\n");
	if (io_error)
		log_fatal("DANGER! Unexpected input/output errors! The failing blocks are now marked as bad!\n");
	if (silent_error)
		log_fatal("DANGER! Unexpected data errors! The failing blocks are now marked as bad!\n");
	if (io_error || silent_error) {
		log_fatal("Use 'snapraid status' to list the bad blocks.\n");
		log_fatal("Use 'snapraid -e fix' to recover.\n");
	}

	log_tag("summary:error_file:%u\n", error);
	log_tag("summary:error_io:%u\n", io_error);
	log_tag("summary:error_data:%u\n", silent_error);
	if (error + silent_error + io_error == 0)
		log_tag("summary:exit:ok\n");
	else
		log_tag("summary:exit:error\n");
	log_flush();

bail:
	/* stop all the worker threads */
	io_stop(&io);

	for (j = 0; j < diskmax; ++j) {
		struct snapraid_file* file = handle[j].file;
		struct snapraid_disk* disk = handle[j].disk;
		ret = handle_close(&handle[j]);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			log_tag("error:%u:%s:%s: Close error. %s\n", blockcur, disk->name, esc(file->sub, esc_buffer), strerror(errno));
			log_fatal("DANGER! Unexpected close error in a data disk.\n");
			++error;
			/* continue, as we are already exiting */
			/* LCOV_EXCL_STOP */
		}
	}

	free(handle);
	free(rehandle_alloc);
	free(waiting_map);
	io_done(&io);

	if (state->opt.expect_recoverable) {
		if (error + silent_error + io_error == 0)
			return -1;
	} else {
		if (error + silent_error + io_error != 0)
			return -1;
	}
	return 0;
}
Exemplo n.º 14
0
static int state_scrub_process(struct snapraid_state* state, struct snapraid_parity_handle** parity, block_off_t blockstart, block_off_t blockmax, struct snapraid_plan* plan, time_t now)
{
	struct snapraid_handle* handle;
	void* rehandle_alloc;
	struct snapraid_rehash* rehandle;
	unsigned diskmax;
	block_off_t i;
	unsigned j;
	void* buffer_alloc;
	void** buffer;
	unsigned buffermax;
	data_off_t countsize;
	block_off_t countpos;
	block_off_t countmax;
	block_off_t autosavedone;
	block_off_t autosavelimit;
	block_off_t autosavemissing;
	int ret;
	unsigned error;
	unsigned silent_error;
	unsigned io_error;
	unsigned l;

	/* maps the disks to handles */
	handle = handle_map(state, &diskmax);

	/* rehash buffers */
	rehandle = malloc_nofail_align(diskmax * sizeof(struct snapraid_rehash), &rehandle_alloc);

	/* we need disk + 2 for each parity level buffers */
	buffermax = diskmax + state->level * 2;

	buffer = malloc_nofail_vector_align(diskmax, buffermax, state->block_size, &buffer_alloc);
	if (!state->opt.skip_self)
		mtest_vector(buffermax, state->block_size, buffer);

	error = 0;
	silent_error = 0;
	io_error = 0;

	/* first count the number of blocks to process */
	countmax = 0;
	plan->countlast = 0;
	for (i = blockstart; i < blockmax; ++i) {
		if (!block_is_enabled(state, i, plan))
			continue;

		++countmax;
	}

	/* compute the autosave size for all disk, even if not read */
	/* this makes sense because the speed should be almost the same */
	/* if the disks are read in parallel */
	autosavelimit = state->autosave / (diskmax * state->block_size);
	autosavemissing = countmax; /* blocks to do */
	autosavedone = 0; /* blocks done */

	/* drop until now */
	state_usage_waste(state);

	countsize = 0;
	countpos = 0;
	plan->countlast = 0;
	state_progress_begin(state, blockstart, blockmax, countmax);
	for (i = blockstart; i < blockmax; ++i) {
		snapraid_info info;
		int error_on_this_block;
		int silent_error_on_this_block;
		int io_error_on_this_block;
		int block_is_unsynced;
		int rehash;

		if (!block_is_enabled(state, i, plan))
			continue;

		/* one more block processed for autosave */
		++autosavedone;
		--autosavemissing;

		/* by default process the block, and skip it if something goes wrong */
		error_on_this_block = 0;
		silent_error_on_this_block = 0;
		io_error_on_this_block = 0;

		/* if all the blocks at this address are synced */
		/* if not, parity is not even checked */
		block_is_unsynced = 0;

		/* get block specific info */
		info = info_get(&state->infoarr, i);

		/* if we have to use the old hash */
		rehash = info_get_rehash(info);

		/* for each disk, process the block */
		for (j = 0; j < diskmax; ++j) {
			int read_size;
			unsigned char hash[HASH_SIZE];
			struct snapraid_block* block;
			int file_is_unsynced;
			struct snapraid_disk* disk = handle[j].disk;
			struct snapraid_file* file;
			block_off_t file_pos;

			/* if the file on this disk is synced */
			/* if not, silent errors are assumed as expected error */
			file_is_unsynced = 0;

			/* by default no rehash in case of "continue" */
			rehandle[j].block = 0;

			/* if the disk position is not used */
			if (!disk) {
				/* use an empty block */
				memset(buffer[j], 0, state->block_size);
				continue;
			}

			/* if the block is not used */
			block = fs_par2block_get(disk, i);
			if (!block_has_file(block)) {
				/* use an empty block */
				memset(buffer[j], 0, state->block_size);
				continue;
			}

			/* get the file of this block */
			file = fs_par2file_get(disk, i, &file_pos);

			/* if the block is unsynced, errors are expected */
			if (block_has_invalid_parity(block)) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;
				/* follow */
			}

			/* until now is CPU */
			state_usage_cpu(state);

			/* if the file is different than the current one, close it */
			if (handle[j].file != 0 && handle[j].file != file) {
				/* keep a pointer at the file we are going to close for error reporting */
				struct snapraid_file* report = handle[j].file;
				ret = handle_close(&handle[j]);
				if (ret == -1) {
					/* LCOV_EXCL_START */
					/* This one is really an unexpected error, because we are only reading */
					/* and closing a descriptor should never fail */
					if (errno == EIO) {
						log_tag("error:%u:%s:%s: Close EIO error. %s\n", i, disk->name, esc(report->sub), strerror(errno));
						log_fatal("DANGER! Unexpected input/output close error in a data disk, it isn't possible to scrub.\n");
						log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, handle[j].path);
						log_fatal("Stopping at block %u\n", i);
						++io_error;
						goto bail;
					}

					log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc(report->sub), strerror(errno));
					log_fatal("WARNING! Unexpected close error in a data disk, it isn't possible to scrub.\n");
					log_fatal("Ensure that file '%s' can be accessed.\n", handle[j].path);
					log_fatal("Stopping at block %u\n", i);
					++error;
					goto bail;
					/* LCOV_EXCL_STOP */
				}
			}

			ret = handle_open(&handle[j], file, state->file_mode, log_error, 0);
			if (ret == -1) {
				if (errno == EIO) {
					/* LCOV_EXCL_START */
					log_tag("error:%u:%s:%s: Open EIO error. %s\n", i, disk->name, esc(file->sub), strerror(errno));
					log_fatal("DANGER! Unexpected input/output open error in a data disk, it isn't possible to scrub.\n");
					log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, handle[j].path);
					log_fatal("Stopping at block %u\n", i);
					++io_error;
					goto bail;
					/* LCOV_EXCL_STOP */
				}

				log_tag("error:%u:%s:%s: Open error. %s\n", i, disk->name, esc(file->sub), strerror(errno));
				++error;
				error_on_this_block = 1;
				continue;
			}

			/* check if the file is changed */
			if (handle[j].st.st_size != file->size
				|| handle[j].st.st_mtime != file->mtime_sec
				|| STAT_NSEC(&handle[j].st) != file->mtime_nsec
				/* don't check the inode to support filesystem without persistent inodes */
			) {
				/* report that the block and the file are not synced */
				block_is_unsynced = 1;
				file_is_unsynced = 1;
				/* follow */
			}

			/* note that we intentionally don't abort if the file has different attributes */
			/* from the last sync, as we are expected to return errors if running */
			/* in an unsynced array. This is just like the check command. */

			read_size = handle_read(&handle[j], file_pos, buffer[j], state->block_size, log_error, 0);
			if (read_size == -1) {
				if (errno == EIO) {
					log_tag("error:%u:%s:%s: Read EIO error at position %u. %s\n", i, disk->name, esc(file->sub), file_pos, strerror(errno));
					if (io_error >= state->opt.io_error_limit) {
						/* LCOV_EXCL_START */
						log_fatal("DANGER! Too many input/output read error in a data disk, it isn't possible to scrub.\n");
						log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, handle[j].path);
						log_fatal("Stopping at block %u\n", i);
						++io_error;
						goto bail;
						/* LCOV_EXCL_STOP */
					}

					log_error("Input/Output error in file '%s' at position '%u'\n", handle[j].path, file_pos);
					++io_error;
					io_error_on_this_block = 1;
					continue;
				}

				log_tag("error:%u:%s:%s: Read error at position %u. %s\n", i, disk->name, esc(file->sub), file_pos, strerror(errno));
				++error;
				error_on_this_block = 1;
				continue;
			}

			/* until now is disk */
			state_usage_disk(state, disk);

			countsize += read_size;

			/* now compute the hash */
			if (rehash) {
				memhash(state->prevhash, state->prevhashseed, hash, buffer[j], read_size);

				/* compute the new hash, and store it */
				rehandle[j].block = block;
				memhash(state->hash, state->hashseed, rehandle[j].hash, buffer[j], read_size);
			} else {
				memhash(state->hash, state->hashseed, hash, buffer[j], read_size);
			}

			if (block_has_updated_hash(block)) {
				/* compare the hash */
				if (memcmp(hash, block->hash, HASH_SIZE) != 0) {
					unsigned diff = memdiff(hash, block->hash, HASH_SIZE);

					log_tag("error:%u:%s:%s: Data error at position %u, diff bits %u\n", i, disk->name, esc(file->sub), file_pos, diff);

					/* it's a silent error only if we are dealing with synced files */
					if (file_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						log_error("Data error in file '%s' at position '%u', diff bits %u\n", handle[j].path, file_pos, diff);
						++silent_error;
						silent_error_on_this_block = 1;
					}
					continue;
				}
			}
		}

		/* if we have read all the data required and it's correct, proceed with the parity check */
		if (!error_on_this_block && !silent_error_on_this_block && !io_error_on_this_block) {
			unsigned char* buffer_recov[LEV_MAX];

			/* until now is CPU */
			state_usage_cpu(state);

			/* buffers for parity read and not computed */
			for (l = 0; l < state->level; ++l)
				buffer_recov[l] = buffer[diskmax + state->level + l];
			for (; l < LEV_MAX; ++l)
				buffer_recov[l] = 0;

			/* read the parity */
			for (l = 0; l < state->level; ++l) {
				ret = parity_read(parity[l], i, buffer_recov[l], state->block_size, log_error);
				if (ret == -1) {
					buffer_recov[l] = 0;

					if (errno == EIO) {
						log_tag("parity_error:%u:%s: Read EIO error. %s\n", i, lev_config_name(l), strerror(errno));
						if (io_error >= state->opt.io_error_limit) {
							/* LCOV_EXCL_START */
							log_fatal("DANGER! Too many input/output read error in the %s disk, it isn't possible to scrub.\n", lev_name(l));
							log_fatal("Ensure that disk '%s' is sane and can be read.\n", lev_config_name(l));
							log_fatal("Stopping at block %u\n", i);
							++io_error;
							goto bail;
							/* LCOV_EXCL_STOP */
						}

						log_error("Input/Output error in parity '%s' at position '%u'\n", lev_config_name(l), i);
						++io_error;
						io_error_on_this_block = 1;
						continue;
					}

					log_tag("parity_error:%u:%s: Read error. %s\n", i, lev_config_name(l), strerror(errno));
					++error;
					error_on_this_block = 1;
					continue;
				}

				/* until now is parity */
				state_usage_parity(state, l);
			}

			/* compute the parity */
			raid_gen(diskmax, state->level, state->block_size, buffer);

			/* compare the parity */
			for (l = 0; l < state->level; ++l) {
				if (buffer_recov[l] && memcmp(buffer[diskmax + l], buffer_recov[l], state->block_size) != 0) {
					unsigned diff = memdiff(buffer[diskmax + l], buffer_recov[l], state->block_size);

					log_tag("parity_error:%u:%s: Data error, diff bits %u\n", i, lev_config_name(l), diff);

					/* it's a silent error only if we are dealing with synced blocks */
					if (block_is_unsynced) {
						++error;
						error_on_this_block = 1;
					} else {
						log_fatal("Data error in parity '%s' at position '%u', diff bits %u\n", lev_config_name(l), i, diff);
						++silent_error;
						silent_error_on_this_block = 1;
					}
				}
			}
		}

		if (silent_error_on_this_block || io_error_on_this_block) {
			/* set the error status keeping other info */
			info_set(&state->infoarr, i, info_set_bad(info));
		} else if (error_on_this_block) {
			/* do nothing, as this is a generic error */
			/* likely caused by a not synced array */
		} else {
			/* if rehash is needed */
			if (rehash) {
				/* store all the new hash already computed */
				for (j = 0; j < diskmax; ++j) {
					if (rehandle[j].block)
						memcpy(rehandle[j].block->hash, rehandle[j].hash, HASH_SIZE);
				}
			}

			/* update the time info of the block */
			/* and clear any other flag */
			info_set(&state->infoarr, i, info_make(now, 0, 0, 0));
		}

		/* mark the state as needing write */
		state->need_write = 1;

		/* count the number of processed block */
		++countpos;

		/* progress */
		if (state_progress(state, i, countpos, countmax, countsize)) {
			/* LCOV_EXCL_START */
			break;
			/* LCOV_EXCL_STOP */
		}

		/* autosave */
		if (state->autosave != 0
			&& autosavedone >= autosavelimit /* if we have reached the limit */
			&& autosavemissing >= autosavelimit /* if we have at least a full step to do */
		) {
			autosavedone = 0; /* restart the counter */

			/* until now is CPU */
			state_usage_cpu(state);

			state_progress_stop(state);

			msg_progress("Autosaving...\n");
			state_write(state);

			state_progress_restart(state);

			/* drop until now */
			state_usage_waste(state);
		}
	}

	state_progress_end(state, countpos, countmax, countsize);

	state_usage_print(state);

	if (error || silent_error || io_error) {
		msg_status("\n");
		msg_status("%8u file errors\n", error);
		msg_status("%8u io errors\n", io_error);
		msg_status("%8u data errors\n", silent_error);
	} else {
		/* print the result only if processed something */
		if (countpos != 0)
			msg_status("Everything OK\n");
	}

	if (error)
		log_fatal("WARNING! Unexpected file errors!\n");
	if (io_error)
		log_fatal("DANGER! Unexpected input/output errors! The failing blocks are now marked as bad!\n");
	if (silent_error)
		log_fatal("DANGER! Unexpected data errors! The failing blocks are now marked as bad!\n");
	if (io_error || silent_error) {
		log_fatal("Use 'snapraid status' to list the bad blocks.\n");
		log_fatal("Use 'snapraid -e fix' to recover.\n");
	}

	log_tag("summary:error_file:%u\n", error);
	log_tag("summary:error_io:%u\n", io_error);
	log_tag("summary:error_data:%u\n", silent_error);
	if (error + silent_error + io_error == 0)
		log_tag("summary:exit:ok\n");
	else
		log_tag("summary:exit:error\n");
	log_flush();

bail:
	for (j = 0; j < diskmax; ++j) {
		struct snapraid_file* file = handle[j].file;
		struct snapraid_disk* disk = handle[j].disk;
		ret = handle_close(&handle[j]);
		if (ret == -1) {
			/* LCOV_EXCL_START */
			log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc(file->sub), strerror(errno));
			log_fatal("DANGER! Unexpected close error in a data disk.\n");
			++error;
			/* continue, as we are already exiting */
			/* LCOV_EXCL_STOP */
		}
	}

	free(handle);
	free(buffer_alloc);
	free(buffer);
	free(rehandle_alloc);

	if (state->opt.expect_recoverable) {
		if (error + silent_error + io_error == 0)
			return -1;
	} else {
		if (error + silent_error + io_error != 0)
			return -1;
	}
	return 0;
}