Example #1
0
static bool disk_scan_part(disk_t *disk, uint64_t offset, void *data, int data_size, struct scan_state *state)
{
	ssize_t ret;
	struct timespec t_start;
	struct timespec t_end;
	uint64_t t;
	int error = 0;
	io_result_t io_res;

	clock_gettime(CLOCK_MONOTONIC, &t_start);
	ret = disk_dev_read(&disk->dev, offset, data_size, data, &io_res);
	clock_gettime(CLOCK_MONOTONIC, &t_end);

	t = (t_end.tv_sec - t_start.tv_sec) * 1000000000 +
		t_end.tv_nsec - t_start.tv_nsec;
	const uint64_t t_msec = t / 1000000;

	// Perform logging
	data_log_raw(&disk->data_raw, offset/disk->sector_size, data_size/disk->sector_size, &io_res, t);
	data_log(&disk->data_log, offset/disk->sector_size, data_size/disk->sector_size, &io_res, t);

	// Handle error or incomplete data
	if (io_res.data != DATA_FULL || io_res.error != ERROR_NONE) {
		int s_errno = errno;
		ERROR("Error when reading at offset %" PRIu64 " size %d read %zd, errno=%d: %s", offset, data_size, ret, errno, strerror(errno));
		ERROR("Details: error=%s data=%s %02X/%02X/%02X", error_to_str(io_res.error), data_to_str(io_res.data),
				io_res.info.sense_key, io_res.info.asc, io_res.info.ascq);
		report_scan_error(disk, offset, data_size, t);
		disk->num_errors++;
		error = 1;
		if (io_res.error == ERROR_FATAL) {
			ERROR("Fatal error occurred, bailing out.");
			return false;
		}
		if (io_res.error == ERROR_UNKNOWN) {
			if (state->num_unknown_errors++ > 500) {
				ERROR("%u unknown errors occurred, assuming fatal issue.", state->num_unknown_errors);
				return false;
			}
			ERROR("Unknown error occurred, possibly untranslated error by storage layers, trying to continue.");

		}

		if (s_errno != EIO && s_errno != 0)
			abort();
		// TODO: What to do when no everything was read but errno is zero?
	}
	else {
		state->num_unknown_errors = 0; // Clear non-consecutive unknown errors
		report_scan_success(disk, offset, data_size, t);
	}

	hdr_record_value(disk->histogram, t / 1000);
	latency_bucket_add(disk, t_msec, state);

	if (t_msec > 1000) {
		VERBOSE("Scanning at offset %" PRIu64 " took %"PRIu64" msec", offset, t_msec);
	}

	if (disk->fix && (t_msec > 3000 || error)) {
		if (io_res.error != ERROR_UNCORRECTED) {
			INFO("Fixing region by rewriting, offset=%"PRIu64" size=%d", offset, data_size);
			ret = disk_dev_write(&disk->dev, offset, data_size, data, &io_res);
			if (ret != data_size) {
				ERROR("Error while attempting to rewrite the data! ret=%zd errno=%d: %s", ret, errno, strerror(errno));
			}
		} else {
			// When we correct uncorrectable errors we want to zero it out, this should reduce any confusion later on when the data is read
			unsigned fix_offset = 0;
			int fix_size = 4096;

			if (data_size < fix_size)
				fix_size = data_size;

			for (; data_size >= (int)(fix_offset + fix_size); fix_offset += fix_size) {
				disk_dev_read(&disk->dev, offset+fix_offset, fix_size, data, &io_res);
				if (io_res.error == ERROR_UNCORRECTED) {
					INFO("Fixing uncorrectable region by writing zeros, offset=%"PRIu64" size=%d", offset+fix_offset, fix_size);
					memset(data, 0, fix_size);
					ret = disk_dev_write(&disk->dev, offset+fix_offset, fix_size, data, &io_res);
					if (ret != data_size) {
						ERROR("Error while attempting to overwrite uncorrectable data! ret=%zd errno=%d: %s", ret, errno, strerror(errno));
					}
				}
			}
		}
	}

	return true;
}
Example #2
0
static bool disk_scan_part(disk_t *disk, uint64_t offset, void *data, int data_size, struct scan_state *state)
{
	ssize_t ret;
	struct timespec t_start;
	struct timespec t_end;
	uint64_t t;
	int error = 0;
	io_result_t io_res;

	clock_gettime(CLOCK_MONOTONIC, &t_start);
	ret = disk_dev_read(&disk->dev, offset, data_size, data, &io_res);
	clock_gettime(CLOCK_MONOTONIC, &t_end);

	t = (t_end.tv_sec - t_start.tv_sec) * 1000000000 +
		t_end.tv_nsec - t_start.tv_nsec;
	const uint64_t t_msec = t / 1000000;

	// Perform logging
	data_log_raw(&disk->data_raw, offset/disk->sector_size, data_size/disk->sector_size, &io_res, t);
	data_log(&disk->data_log, offset/disk->sector_size, data_size/disk->sector_size, &io_res, t);

	// Handle error or incomplete data
	if (io_res.data != DATA_FULL || io_res.error != ERROR_NONE) {
		int s_errno = errno;
		ERROR("Error when reading at offset %" PRIu64 " size %d read %zd: %m", offset, data_size, ret);
		ERROR("Details: error=%s data=%s %02X/%02X/%02X", error_to_str(io_res.error), data_to_str(io_res.data),
				io_res.info.sense_key, io_res.info.asc, io_res.info.ascq);
		report_scan_error(disk, offset, data_size, t);
		disk->num_errors++;
		error = 1;
		if (io_res.error == ERROR_FATAL) {
			ERROR("Fatal error occurred, bailing out.");
			return false;
		}
		if (io_res.error == ERROR_UNKNOWN) {
			if (state->num_unknown_errors++ > 500) {
				ERROR("%u unknown errors occurred, assuming fatal issue.", state->num_unknown_errors);
				return false;
			}
			ERROR("Unknown error occurred, possibly untranslated error by storage layers, trying to continue.");

		}

		if (s_errno != EIO && s_errno != 0)
			abort();
		// TODO: What to do when no everything was read but errno is zero?
	}
	else {
		state->num_unknown_errors = 0; // Clear non-consecutive unknown errors
		report_scan_success(disk, offset, data_size, t);
	}

	unsigned hist_idx = 0;
	while (t_msec >= histogram_time[hist_idx].top_val && hist_idx < ARRAY_SIZE(disk->histogram) - 1) {
		hist_idx++;
	}
	disk->histogram[hist_idx]++;

	latency_bucket_add(disk, t_msec, state);

	if (t_msec > 1000) {
		VERBOSE("Scanning at offset %" PRIu64 " took %"PRIu64" msec", offset, t_msec);
	}

	if (disk->fix && (t_msec > 3000 || error)) {
		INFO("Fixing region by rewriting, offset=%"PRIu64" size=%d", offset, data_size);
		ret = disk_dev_write(&disk->dev, offset, data_size, data, &io_res);
		if (ret != data_size) {
			ERROR("Error while attempting to rewrite the data! ret=%zd errno=%d: %m", ret, errno);
		}
	}

	return true;
}