Ejemplo n.º 1
0
byte*
mach_parse_compressed(
/*==================*/
			/* out: pointer to end of the stored field, NULL if
			not complete */
	byte*   ptr,   	/* in: pointer to buffer from where to read */
	byte*	end_ptr,/* in: pointer to end of the buffer */
	ulint*	val)	/* out: read value (< 2^32) */ 
{
	ulint	flag;

	ut_ad(ptr && end_ptr && val);

	if (ptr >= end_ptr) {

		return(NULL);
	}

	flag = mach_read_from_1(ptr);

	if (flag < 0x80UL) {
		*val = flag;
		return(ptr + 1);
		
	} else if (flag < 0xC0UL) {
		if (end_ptr < ptr + 2) {
			return(NULL);
		}
			
		*val = mach_read_from_2(ptr) & 0x7FFFUL;

		return(ptr + 2);
		
	} else if (flag < 0xE0UL) {
		if (end_ptr < ptr + 3) {
			return(NULL);
		}
			
		*val = mach_read_from_3(ptr) & 0x3FFFFFUL;

		return(ptr + 3);
	} else if (flag < 0xF0UL) {
		if (end_ptr < ptr + 4) {
			return(NULL);
		}
			
		*val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;

		return(ptr + 4);
	} else {
		ut_ad(flag == 0xF0UL);

		if (end_ptr < ptr + 5) {
			return(NULL);
		}

		*val = mach_read_from_4(ptr + 1);
		return(ptr + 5);
	}
}
Ejemplo n.º 2
0
void process_ibpage(page_t *page) {
    ulint page_id;
	rec_t *origin;
	ulint offsets[MAX_TABLE_FIELDS + 2];
	ulint offset, i;
	
	// Skip tables if filter used
    if (use_filter_id) {
        dulint index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
        if (index_id.low != filter_id.low || index_id.high != filter_id.high) {
            if (debug) {
            	page_id = mach_read_from_4(page + FIL_PAGE_OFFSET);
                printf("Skipped using index id filter: %lu!\n", page_id);
            }
            return;
        }
    }

	// Read page id
	page_id = mach_read_from_4(page + FIL_PAGE_OFFSET);
	if (debug) printf("Page id: %lu\n", page_id);

	// Check requested and actual formats
    if (!check_page_format(page)) return;

	// Find possible data area start point (at least 5 bytes of utility data)
	offset = 100 + record_extra_bytes;
	if (debug) printf("Starting offset: %lu. Checking %d table definitions.\n", offset, table_definitions_cnt);
	
	// Walk through all possible positions to the end of page 
	// (start of directory - extra bytes of the last rec)
	while (offset < UNIV_PAGE_SIZE - record_extra_bytes) {
		// Get record pointer
		origin = page + offset;
		if (debug) printf("\nChecking offset: %lu: ", offset);
		
		// Check all tables
		for (i = 0; i < table_definitions_cnt; i++) {
			// Get table info
			table_def_t *table = &(table_definitions[i]);
			if (debug) printf(" (%s) ", table->name);

			// Check if origin points to a valid record
			if (check_for_a_record(page, origin, table, offsets) && check_constraints(origin, table, offsets)) {
			    if (debug) printf("\n---------------------------------------------------\n"
			       			     "PAGE%lu: Found a table %s record: %p (offset = %lu)\n", page_id, table->name, origin, offset);
			    offset += process_ibrec(page, origin, table, offsets);
			    break;
		    }
		}

		// Check from next byte
		offset++;
	}
}
Ejemplo n.º 3
0
void
dfield_print(
/*=========*/
	dfield_t*	dfield)	 /* in: dfield */
{
	byte*	data;
	ulint	len;
	ulint	mtype;
	ulint	i;

	len = dfield_get_len(dfield);
	data = dfield_get_data(dfield);

	if (len == UNIV_SQL_NULL) {
		fputs("NULL", stderr);

		return;
	}

	mtype = dtype_get_mtype(dfield_get_type(dfield));

	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
	
		for (i = 0; i < len; i++) {
			int	c = *data++;
			putc(isprint(c) ? c : ' ', stderr);
		}
	} else if (mtype == DATA_INT) {
		ut_a(len == 4); /* only works for 32-bit integers */
		fprintf(stderr, "%d", (int)mach_read_from_4(data));
	} else {
		ut_error;
	}
}
Ejemplo n.º 4
0
UNIV_INTERN
ulint
mem_field_trailer_get_check(byte* field)
{
	return(mach_read_from_4(field
				+ mem_field_header_get_len(field)));
}
Ejemplo n.º 5
0
void
dfield_print_also_hex(
/*==================*/
	dfield_t*	dfield)	 /* in: dfield */
{
	byte*	data;
	ulint	len;
	ulint	mtype;
	ulint	i;
	ibool	print_also_hex;

	len = dfield_get_len(dfield);
	data = dfield_get_data(dfield);

	if (len == UNIV_SQL_NULL) {
		printf("NULL");

		return;
	}

	mtype = dtype_get_mtype(dfield_get_type(dfield));

	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {

		print_also_hex = FALSE;
	
		for (i = 0; i < len; i++) {

			if (isprint((char)(*data))) {
				printf("%c", (char)*data);
			} else {
				print_also_hex = TRUE;
				printf(" ");
			}

			data++;
		}

		if (!print_also_hex) {

			return;
		}

		printf(" Hex: ");
		
		data = dfield_get_data(dfield);
		
		for (i = 0; i < len; i++) {
			printf("%02lx", (ulint)*data);

			data++;
		}
	} else if (mtype == DATA_INT) {
		ut_a(len == 4); /* inly works for 32-bit integers */
		printf("%i", (int)mach_read_from_4(data));
	} else {
		ut_error;
	}
}
Ejemplo n.º 6
0
void
dfield_print_also_hex(
/*==================*/
	dfield_t*	dfield)	 /* in: dfield */
{
	byte*	data;
	ulint	len;
	ulint	mtype;
	ulint	i;
	ibool	print_also_hex;

	len = dfield_get_len(dfield);
	data = dfield_get_data(dfield);

	if (len == UNIV_SQL_NULL) {
		fputs("NULL", stderr);

		return;
	}

	mtype = dtype_get_mtype(dfield_get_type(dfield));

	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {

		print_also_hex = FALSE;
	
		for (i = 0; i < len; i++) {
			int c = *data++;
			if (!isprint(c)) {
				print_also_hex = TRUE;
				c = ' ';
			}
			putc(c, stderr);
		}

		if (!print_also_hex) {

			return;
		}

		fputs(" Hex: ", stderr);
		
		data = dfield_get_data(dfield);
		
		for (i = 0; i < len; i++) {
			fprintf(stderr, "%02lx", (ulint)*data);

			data++;
		}
	} else if (mtype == DATA_INT) {
		ut_a(len == 4); /* only works for 32-bit integers */
		fprintf(stderr, "%d", (int)mach_read_from_4(data));
	} else {
		ut_error;
	}
}
Ejemplo n.º 7
0
inline unsigned long long int get_uint_value(field_def_t *field, byte *value) {
	switch (field->fixed_length) {
		case 1: return mach_read_from_1(value);
		case 2: return mach_read_from_2(value);
		case 3: return mach_read_from_3(value) & 0x3FFFFFUL;
		case 4: return mach_read_from_4(value);
		case 8: return make_ulonglong(mach_read_from_8(value));
	}
	return 0;
}
Ejemplo n.º 8
0
inline long long int get_int_value(field_def_t *field, byte *value) {
	switch (field->fixed_length) {
		case 1: return mach_read_from_1(value) & ~(1<<7);
		case 2: return mach_read_from_2(value) & ~(1<<15);
		case 3: return mach_read_from_3(value) & 0x3FFFFFUL & ~(1L<<23);
		case 4: return mach_read_from_4(value) & ~(1L<<31);
		case 8: return make_longlong(mach_read_from_8(value)) & ~(1LL<<63);
	}
	return 0;
}
Ejemplo n.º 9
0
void process_ibpage(page_t *page) {
    static ulint id = 0;
	ulint page_id;
	dulint index_id;
	char tmp[256];
	int fn;
	
	// Get page info
	page_id = mach_read_from_4(page + FIL_PAGE_OFFSET);
	index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
	
    // Skip empty pages
    if (ignore_crap && index_id.high == 0 && index_id.low == 0) return;
	
	// Skip tables if filter used
    if (use_filter_id && (index_id.low != filter_id.low || index_id.high != filter_id.high)) return;
		
	if (count_pages) {
	    if (index_id.high >= 1000) {
            if (ignore_crap) return;
            printf("ERROR: Too high tablespace id! %ld >= 1000!\n", index_id.high);
            exit(1);
	    }

	    if (index_id.low >= 10000) {
            if (ignore_crap) return;
            printf("ERROR: Too high index id! %ld >= 10000!\n", index_id.low);
            exit(1);
	    }
	    
        page_counters[index_id.high][index_id.low]++;
        return;
	}
		
	// Create table directory
	sprintf(tmp, "pages-%u/%lu-%lu", (unsigned int)timestamp, index_id.high, index_id.low);
	mkdir(tmp, 0755);
	
	// Compose page file_name
	sprintf(tmp, "pages-%u/%lu-%lu/%lu-%08lu.page", (unsigned int)timestamp, index_id.high, index_id.low, id++, page_id);
	
	printf("Read page #%lu.. saving it to %s\n", page_id, tmp);

	// Save page data
	fn = open(tmp, O_WRONLY | O_CREAT | O_TRUNC, 0644);
	if (!fn) error("Can't open file to save page!");
	write(fn, page, UNIV_PAGE_SIZE);
	close(fn);	
}
Ejemplo n.º 10
0
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
UNIV_INTERN
void
dfield_print(
/*=========*/
	const dfield_t*	dfield)	/*!< in: dfield */
{
	const byte*	data;
	ulint		len;
	ulint		i;

	len = dfield_get_len(dfield);
	data = dfield_get_data(dfield);

	if (dfield_is_null(dfield)) {
		fputs("NULL", stderr);

		return;
	}

	switch (dtype_get_mtype(dfield_get_type(dfield))) {
	case DATA_CHAR:
	case DATA_VARCHAR:
		for (i = 0; i < len; i++) {
			int	c = *data++;
			putc(isprint(c) ? c : ' ', stderr);
		}

		if (dfield_is_ext(dfield)) {
			fputs("(external)", stderr);
		}
		break;
	case DATA_INT:
		ut_a(len == 4); /* only works for 32-bit integers */
		fprintf(stderr, "%d", (int)mach_read_from_4(data));
		break;
	default:
		ut_error;
	}
}
Ejemplo n.º 11
0
byte*
mach_dulint_parse_compressed(
/*=========================*/
			/* out: pointer to end of the stored field, NULL if
			not complete */
	byte*   ptr,   	/* in: pointer to buffer from where to read */
	byte*	end_ptr,/* in: pointer to end of the buffer */
	dulint*	val)	/* out: read value */ 
{
	ulint	high;
	ulint	low;
	ulint	size;

	ut_ad(ptr && end_ptr && val);

	if (end_ptr < ptr + 5) {

		return(NULL);
	}

	high = mach_read_compressed(ptr);

	size = mach_get_compressed_size(high);

	ptr += size;

	if (end_ptr < ptr + 4) {

		return(NULL);
	}

	low = mach_read_from_4(ptr);

	*val = ut_dulint_create(high, low);

	return(ptr + 4); 
}
Ejemplo n.º 12
0
/************************************************************************
Flushes possible buffered writes from the doublewrite memory buffer to disk,
and also wakes up the aio thread if simulated aio is used. It is very
important to call this function after a batch of writes has been posted,
and also when we may have to wait for a page latch! Otherwise a deadlock
of threads can occur. */
static
void
buf_flush_buffered_writes(void)
/*===========================*/
{
	buf_block_t*	block;
	byte*		write_buf;
	ulint		len;
	ulint		len2;
	ulint		i;

	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
		os_aio_simulated_wake_handler_threads();

		return;
	}

	mutex_enter(&(trx_doublewrite->mutex));

	/* Write first to doublewrite buffer blocks. We use synchronous
	aio and thus know that file write has been completed when the
	control returns. */

	if (trx_doublewrite->first_free == 0) {

		mutex_exit(&(trx_doublewrite->mutex));

		return;
	}

	for (i = 0; i < trx_doublewrite->first_free; i++) {

		block = trx_doublewrite->buf_block_arr[i];
		ut_a(block->state == BUF_BLOCK_FILE_PAGE);

		if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
		    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB: ERROR: The page to be written"
				" seems corrupt!\n"
				"InnoDB: The lsn fields do not match!"
				" Noticed in the buffer pool\n"
				"InnoDB: before posting to the"
				" doublewrite buffer.\n");
		}

		if (block->check_index_page_at_flush
		    && !page_simple_validate(block->frame)) {

			buf_page_print(block->frame);

			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB: Apparent corruption of an"
				" index page n:o %lu in space %lu\n"
				"InnoDB: to be written to data file."
				" We intentionally crash server\n"
				"InnoDB: to prevent corrupt data"
				" from ending up in data\n"
				"InnoDB: files.\n",
				(ulong) block->offset, (ulong) block->space);

			ut_error;
		}
	}

	/* increment the doublewrite flushed pages counter */
	srv_dblwr_pages_written+= trx_doublewrite->first_free;
	srv_dblwr_writes++;

	if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
		len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
	} else {
		len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
	}

	fil_io(OS_FILE_WRITE,
	       TRUE, TRX_SYS_SPACE,
	       trx_doublewrite->block1, 0, len,
	       (void*)trx_doublewrite->write_buf, NULL);

	write_buf = trx_doublewrite->write_buf;

	for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) {
		if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4)
		    != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE
					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB: ERROR: The page to be written"
				" seems corrupt!\n"
				"InnoDB: The lsn fields do not match!"
				" Noticed in the doublewrite block1.\n");
		}
	}

	if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
		len = (trx_doublewrite->first_free
		       - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;

		fil_io(OS_FILE_WRITE,
		       TRUE, TRX_SYS_SPACE,
		       trx_doublewrite->block2, 0, len,
		       (void*)(trx_doublewrite->write_buf
			       + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
			       * UNIV_PAGE_SIZE),
		       NULL);

		write_buf = trx_doublewrite->write_buf
			+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
		for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
		     len2 += UNIV_PAGE_SIZE) {
			if (mach_read_from_4(write_buf + len2
					     + FIL_PAGE_LSN + 4)
			    != mach_read_from_4(write_buf + len2
						+ UNIV_PAGE_SIZE
						- FIL_PAGE_END_LSN_OLD_CHKSUM
						+ 4)) {
				ut_print_timestamp(stderr);
				fprintf(stderr,
					"  InnoDB: ERROR: The page to be"
					" written seems corrupt!\n"
					"InnoDB: The lsn fields do not match!"
					" Noticed in"
					" the doublewrite block2.\n");
			}
		}
	}

	/* Now flush the doublewrite buffer data to disk */

	fil_flush(TRX_SYS_SPACE);

	/* We know that the writes have been flushed to disk now
	and in recovery we will find them in the doublewrite buffer
	blocks. Next do the writes to the intended positions. */

	for (i = 0; i < trx_doublewrite->first_free; i++) {
		block = trx_doublewrite->buf_block_arr[i];

		if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
		    != mach_read_from_4(block->frame + UNIV_PAGE_SIZE
					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB: ERROR: The page to be written"
				" seems corrupt!\n"
				"InnoDB: The lsn fields do not match!"
				" Noticed in the buffer pool\n"
				"InnoDB: after posting and flushing"
				" the doublewrite buffer.\n"
				"InnoDB: Page buf fix count %lu,"
				" io fix %lu, state %lu\n",
				(ulong)block->buf_fix_count,
				(ulong)block->io_fix,
				(ulong)block->state);
		}
		ut_a(block->state == BUF_BLOCK_FILE_PAGE);

		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
		       FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
		       (void*)block->frame, (void*)block);
	}

	/* Wake possible simulated aio thread to actually post the
	writes to the operating system */

	os_aio_simulated_wake_handler_threads();

	/* Wait that all async writes to tablespaces have been posted to
	the OS */

	os_aio_wait_until_no_pending_writes();

	/* Now we flush the data to disk (for example, with fsync) */

	fil_flush_file_spaces(FIL_TABLESPACE);

	/* We can now reuse the doublewrite memory buffer: */

	trx_doublewrite->first_free = 0;

	mutex_exit(&(trx_doublewrite->mutex));
}
Ejemplo n.º 13
0
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
	const dfield_t*	dfield)	/*!< in: dfield */
{
	const byte*	data;
	ulint		len;
	ulint		prtype;
	ulint		i;
	ibool		print_also_hex;

	len = dfield_get_len(dfield);
	data = dfield_get_data(dfield);

	if (dfield_is_null(dfield)) {
		fputs("NULL", stderr);

		return;
	}

	prtype = dtype_get_prtype(dfield_get_type(dfield));

	switch (dtype_get_mtype(dfield_get_type(dfield))) {
		dulint	id;
	case DATA_INT:
		switch (len) {
			ulint	val;
		case 1:
			val = mach_read_from_1(data);

			if (!(prtype & DATA_UNSIGNED)) {
				val &= ~0x80;
				fprintf(stderr, "%ld", (long) val);
			} else {
				fprintf(stderr, "%lu", (ulong) val);
			}
			break;

		case 2:
			val = mach_read_from_2(data);

			if (!(prtype & DATA_UNSIGNED)) {
				val &= ~0x8000;
				fprintf(stderr, "%ld", (long) val);
			} else {
				fprintf(stderr, "%lu", (ulong) val);
			}
			break;

		case 3:
			val = mach_read_from_3(data);

			if (!(prtype & DATA_UNSIGNED)) {
				val &= ~0x800000;
				fprintf(stderr, "%ld", (long) val);
			} else {
				fprintf(stderr, "%lu", (ulong) val);
			}
			break;

		case 4:
			val = mach_read_from_4(data);

			if (!(prtype & DATA_UNSIGNED)) {
				val &= ~0x80000000;
				fprintf(stderr, "%ld", (long) val);
			} else {
				fprintf(stderr, "%lu", (ulong) val);
			}
			break;

		case 6:
			id = mach_read_from_6(data);
			fprintf(stderr, "{%lu %lu}",
				ut_dulint_get_high(id),
				ut_dulint_get_low(id));
			break;

		case 7:
			id = mach_read_from_7(data);
			fprintf(stderr, "{%lu %lu}",
				ut_dulint_get_high(id),
				ut_dulint_get_low(id));
			break;
		case 8:
			id = mach_read_from_8(data);
			fprintf(stderr, "{%lu %lu}",
				ut_dulint_get_high(id),
				ut_dulint_get_low(id));
			break;
		default:
			goto print_hex;
		}
		break;

	case DATA_SYS:
		switch (prtype & DATA_SYS_PRTYPE_MASK) {
		case DATA_TRX_ID:
			id = mach_read_from_6(data);

			fprintf(stderr, "trx_id " TRX_ID_FMT,
				TRX_ID_PREP_PRINTF(id));
			break;

		case DATA_ROLL_PTR:
			id = mach_read_from_7(data);

			fprintf(stderr, "roll_ptr {%lu %lu}",
				ut_dulint_get_high(id), ut_dulint_get_low(id));
			break;

		case DATA_ROW_ID:
			id = mach_read_from_6(data);

			fprintf(stderr, "row_id {%lu %lu}",
				ut_dulint_get_high(id), ut_dulint_get_low(id));
			break;

		default:
			id = mach_dulint_read_compressed(data);

			fprintf(stderr, "mix_id {%lu %lu}",
				ut_dulint_get_high(id), ut_dulint_get_low(id));
		}
		break;

	case DATA_CHAR:
	case DATA_VARCHAR:
		print_also_hex = FALSE;

		for (i = 0; i < len; i++) {
			int c = *data++;

			if (!isprint(c)) {
				print_also_hex = TRUE;

				fprintf(stderr, "\\x%02x", (unsigned char) c);
			} else {
				putc(c, stderr);
			}
		}

		if (dfield_is_ext(dfield)) {
			fputs("(external)", stderr);
		}

		if (!print_also_hex) {
			break;
		}

		data = dfield_get_data(dfield);
		/* fall through */

	case DATA_BINARY:
	default:
print_hex:
		fputs(" Hex: ",stderr);

		for (i = 0; i < len; i++) {
			fprintf(stderr, "%02lx", (ulint) *data++);
		}

		if (dfield_is_ext(dfield)) {
			fputs("(external)", stderr);
		}
	}
}
Ejemplo n.º 14
0
/**********************************************************************//**
Try to relocate a block.
@return	TRUE if relocated */
static
ibool
buf_buddy_relocate(
/*===============*/
	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
	void*		src,		/*!< in: block to relocate */
	void*		dst,		/*!< in: free block to relocate to */
	ulint		i)		/*!< in: index of
					buf_pool->zip_free[] */
{
	buf_page_t*	bpage;
	const ulint	size	= BUF_BUDDY_LOW << i;
	ullint		usec	= ut_time_us(NULL);
	mutex_t*	mutex;
	ulint		space;
	ulint		page_no;

	ut_ad(buf_pool_mutex_own(buf_pool));
	ut_ad(!mutex_own(&buf_pool->zip_mutex));
	ut_ad(!ut_align_offset(src, size));
	ut_ad(!ut_align_offset(dst, size));
	ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
	UNIV_MEM_ASSERT_W(dst, size);

	/* We assume that all memory from buf_buddy_alloc()
	is used for compressed page frames. */

	/* We look inside the allocated objects returned by
	buf_buddy_alloc() and assume that each block is a compressed
	page that contains a valid space_id and page_no in the page
	header. Should the fields be invalid, we will be unable to
	relocate the block. */

	/* The src block may be split into smaller blocks,
	some of which may be free.  Thus, the
	mach_read_from_4() calls below may attempt to read
	from free memory.  The memory is "owned" by the buddy
	allocator (and it has been allocated from the buffer
	pool), so there is nothing wrong about this.  The
	mach_read_from_4() calls here will only trigger bogus
	Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
	space	= mach_read_from_4((const byte *) src
				   + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
	page_no	= mach_read_from_4((const byte *) src
				   + FIL_PAGE_OFFSET);
	/* Suppress Valgrind warnings about conditional jump
	on uninitialized value. */
	UNIV_MEM_VALID(&space, sizeof space);
	UNIV_MEM_VALID(&page_no, sizeof page_no);
	bpage = buf_page_hash_get(buf_pool, space, page_no);

	if (!bpage || bpage->zip.data != src) {
		/* The block has probably been freshly
		allocated by buf_LRU_get_free_block() but not
		added to buf_pool->page_hash yet.  Obviously,
		it cannot be relocated. */

		return(FALSE);
	}

	if (page_zip_get_size(&bpage->zip) != size) {
		/* The block is of different size.  We would
		have to relocate all blocks covered by src.
		For the sake of simplicity, give up. */
		ut_ad(page_zip_get_size(&bpage->zip) < size);

		return(FALSE);
	}

	/* The block must have been allocated, but it may
	contain uninitialized data. */
	UNIV_MEM_ASSERT_W(src, size);

	mutex = buf_page_get_mutex(bpage);

	mutex_enter(mutex);

	if (buf_page_can_relocate(bpage)) {
		/* Relocate the compressed page. */
		ut_a(bpage->zip.data == src);
		memcpy(dst, src, size);
		bpage->zip.data = dst;
		mutex_exit(mutex);
		UNIV_MEM_INVALID(src, size);
		{
			buf_buddy_stat_t*	buddy_stat
				= &buf_pool->buddy_stat[i];
			buddy_stat->relocated++;
			buddy_stat->relocated_usec
				+= ut_time_us(NULL) - usec;
		}
		return(TRUE);
	}

	mutex_exit(mutex);
	return(FALSE);
}
Ejemplo n.º 15
0
static void print_page(uchar *p)
{
  int type = mach_read_from_2(p + FIL_PAGE_TYPE);
  if (type == FIL_PAGE_TYPE_ALLOCATED) {
    return;
  }

  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_OFFSET",
      mach_read_from_4(p + FIL_PAGE_OFFSET));
  printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_SPACE_OR_CHKSUM",
      mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM));
  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_PREV",
      mach_read_from_4(p + FIL_PAGE_PREV));
  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_NEXT",
      mach_read_from_4(p + FIL_PAGE_NEXT));
  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_LSN",
      mach_read_from_4(p + FIL_PAGE_LSN));
  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_TYPE",
      mach_read_from_2(p + FIL_PAGE_TYPE));
  dulint flush_lsn_tuple = mach_read_from_6(p + FIL_PAGE_FILE_FLUSH_LSN);
  uint64_t flush_lsn = (((uint64_t) flush_lsn_tuple.high) << 32) +
      flush_lsn_tuple.low;
  printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "FIL_PAGE_FILE_FLUSH_LSN",
      flush_lsn);
  printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID",
      mach_read_from_4(p + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_END_LSN_OLD_CHKSUM",
      mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM));

  uchar *pd = p + FIL_PAGE_DATA;
  if (type == FIL_PAGE_TYPE_FSP_HDR) {
    printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_SPACE",
        mach_read_from_4(pd + FSEG_HDR_SPACE));
    printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_PAGE_NO",
        mach_read_from_4(pd + FSEG_HDR_PAGE_NO));
    printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_OFFSET",
        mach_read_from_4(pd + FSEG_HDR_OFFSET));
  } else if (type == FIL_PAGE_INDEX) {
    printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_N_HEAP",
        mach_read_from_2(pd + PAGE_N_HEAP));
    printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_FREE",
        mach_read_from_2(pd + PAGE_FREE));
    dulint index_id_tuple = mach_read_from_8(pd + PAGE_INDEX_ID);
    uint64_t index_id = (((uint64_t) index_id_tuple.high) << 32) +
        index_id_tuple.low;
    printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "PAGE_INDEX_ID",
        index_id);
    printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_LEAF",
        mach_read_from_4(pd + PAGE_BTR_SEG_LEAF + FSEG_HDR_SPACE));
    printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_TOP",
        mach_read_from_4(pd + PAGE_BTR_SEG_TOP + FSEG_HDR_SPACE));
#if 0
    int i;
    for (i = 0; i < 80; i += 4) {
      if (i == PAGE_BTR_SEG_LEAF ||
          i == PAGE_N_HEAP ||
          i == PAGE_INDEX_ID ||
          i == PAGE_INDEX_ID + 4) {
        continue;
      }
      char column_name[256];
      snprintf(column_name, sizeof(column_name), "FIL_PAGE_DATA + %2d", i);
      printf(COLUMN_NAME_FMT " %ld\n", column_name,
             mach_read_from_4(p + FIL_PAGE_DATA + i));
    }
#endif
  }
  printf("\n");
}
Ejemplo n.º 16
0
int main(int argc, char **argv)
{
  FILE *f;                     /* our input file */
  FILE *b;                     /* our output file */
  byte *p;                     /* storage of pages read input file*/
  byte *op;                    /* storage of pages read output file*/
  int bytes;                   /* bytes read count */
  ulint ct;                    /* current page number (0 based) */
  int now;                     /* current time */
  int lastt;                   /* last time */
  ulint oldcsum, noldcsum, oldcsumfield, noldcsumfield, csum, ncsum, csumfield, ncsumfield, logseq, nlogseq, logseqfield, nlogseqfield; /* ulints for checksum storage */
  struct stat st;              /* for stat, if you couldn't guess */
  unsigned long long int size; /* size of file (has to be 64 bits) */
  ulint pages;                 /* number of pages in file */
  ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */
  off_t offset= 0;
  off_t boffset= 0;
  int just_count= 0;          /* if true, just print page count */
  int verbose= 0;
  int debug= 0;
  int c;
  int fd;
  int fn;
  int posstat;
  off_t pos;


  /* make sure we have the right arguments */
  if (argc != 3)
  {
    printf("InnoDB offline file checksum utility.\n");
    printf("usage: %s  <backupfilename> <filename>\n", argv[0]);
    return 1;
  }

  /* stat the file to get size and page count */
  if (stat(argv[2], &st))
  {
    perror("error statting file");
    return 1;
  }
  size= st.st_size;
  pages= size / UNIV_PAGE_SIZE;
  if (just_count)
  {
    printf("%lu\n", pages);
    return 0;
  }
  else if (verbose)
  {
    printf("file %s= %llu bytes (%lu pages)...\n", argv[1], size, pages);
    printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
  }


  /* open the file for reading */
  f= fopen(argv[2], "rb");
  if (!f)
  {
    perror("error opening ibdata file");
    return 1;
  }

  /* open the file for readwrite */
  b= fopen(argv[1], "rb+");
  if (!b)
  {
    perror("error opening backup file");
    return 1;
  }

printf("Comparing files %s and %s\n", argv[1], argv[2]);
  /* seek to the necessary position */
  if (start_page)
  {
    fd= fileno(f);
    fn= fileno(b);
    if (!fd || !fn)
    {
      perror("unable to obtain file descriptor number");
      return 1;
    }


    offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE;
    boffset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE;


    if (lseek(fd, offset, SEEK_SET) != offset)
    {
      perror("unable to seek to necessary offset");
      return 1;
    }

    if (lseek(fn, boffset, SEEK_SET) != boffset)
    {
      perror("unable to seek to necessary offset");
      return 1;
    }

  }

  /* allocate buffer for reading (so we don't realloc every time) */
  p= (byte *)malloc(UNIV_PAGE_SIZE);
  op= (byte *)malloc(UNIV_PAGE_SIZE);

  /* main checksumming loop */
  ct= start_page;
  lastt= 0;
  while (!feof(f))
  {

    bytes= fread(p, 1, UNIV_PAGE_SIZE, f);

    if (!bytes && feof(f)) return 0;
    if (bytes != UNIV_PAGE_SIZE)
    {
      fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE);
      return 1;
    }

    /* get position before reading page from backup file */
    pos = ftell(b);

    /* read page from backup file */
    bytes= fread(op, 1, UNIV_PAGE_SIZE, b);

    /* check the "stored log sequence numbers" */
    logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4);
    nlogseq= mach_read_from_4(op + FIL_PAGE_LSN + 4);
    logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
    nlogseqfield= mach_read_from_4(op + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);

    /* Make sure our page LSNs make sense */
    if (logseq != logseqfield || nlogseq != nlogseqfield)
    {
      fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct);
      return 1;
    }

    /* get old method checksums */
    oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
    noldcsumfield= mach_read_from_4(op + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);


    /* get new method checksums */
    csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM);
    ncsumfield= mach_read_from_4(op + FIL_PAGE_SPACE_OR_CHKSUM);
	/* If any of the LSNs or checksums dont make assume we need the newer page */
        if(logseq != nlogseq || oldcsumfield != noldcsumfield || csumfield != ncsumfield) {
                printf("%lu:%lu:%lu:%lu:%lu\t Is different from \t%lu:%lu:%lu:%lu:%lu\n", ct, logseq, logseqfield,oldcsumfield,csumfield,ct, nlogseq, nlogseqfield,noldcsumfield,ncsumfield);
                printf("Page start position is %lu\n", pos);
		/* seek back to the start of this page */
                fseek(b, -UNIV_PAGE_SIZE, SEEK_CUR);
                printf("Successfully rewound position to %lu\n", pos);
		/* write the page from the newer file to the backup */
                fwrite(p, 1, UNIV_PAGE_SIZE, b);
		/* we should now be back at the end of the page */
                pos = ftell(b);
                printf("Wrote new page and back at %lu\n", pos);
exit;
        }
    /* do counter increase and progress printing */
    ct++;
  }
  return 0;
}
Ejemplo n.º 17
0
/************************************************************************
Loads definitions for table indexes. Adds them to the data dictionary cache.
*/
static
void
dict_load_indexes(
/*==============*/
	dict_table_t*	table,	/* in: table */
	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
{
	dict_table_t*	sys_indexes;
	dict_index_t*	sys_index;
	dict_index_t*	index;
	btr_pcur_t	pcur;
	dtuple_t*	tuple;
	dfield_t*	dfield;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	ulint		name_len;
	char*		name_buf;
	ulint		type;
	ulint		space;
	ulint		page_no;
	ulint		n_fields;
	byte*		buf;
	ibool		is_sys_table;
	dulint		id;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	if ((ut_dulint_get_high(table->id) == 0)
	    && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) {
		is_sys_table = TRUE;
	} else {
		is_sys_table = FALSE;
	}
	
	mtr_start(&mtr);

	sys_indexes = dict_table_get_low("SYS_INDEXES");
	sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);

	tuple = dtuple_create(heap, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	buf = mem_heap_alloc(heap, 8);
	mach_write_to_8(buf, table->id);

	dfield_set_data(dfield, buf, 8);
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
						BTR_SEARCH_LEAF, &pcur, &mtr);
   	for (;;) {
		if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {

			break;
		}

		rec = btr_pcur_get_rec(&pcur);
		
		field = rec_get_nth_field(rec, 0, &len);
		ut_ad(len == 8);

		if (ut_memcmp(buf, field, len) != 0) {
			break;
		}

		ut_a(!rec_get_deleted_flag(rec));

		field = rec_get_nth_field(rec, 1, &len);
		ut_ad(len == 8);
		id = mach_read_from_8(field);

		ut_a(0 == ut_strcmp("NAME",
			dict_field_get_col(
			dict_index_get_nth_field(
			dict_table_get_first_index(sys_indexes), 4))->name));
		
		field = rec_get_nth_field(rec, 4, &name_len);

		name_buf = mem_heap_alloc(heap, name_len + 1);
		ut_memcpy(name_buf, field, name_len);
		name_buf[name_len] = '\0';

		field = rec_get_nth_field(rec, 5, &len);
		n_fields = mach_read_from_4(field);

		field = rec_get_nth_field(rec, 6, &len);
		type = mach_read_from_4(field);

		field = rec_get_nth_field(rec, 7, &len);
		space = mach_read_from_4(field);

		ut_a(0 == ut_strcmp("PAGE_NO",
			dict_field_get_col(
			dict_index_get_nth_field(
			dict_table_get_first_index(sys_indexes), 8))->name));

		field = rec_get_nth_field(rec, 8, &len);
		page_no = mach_read_from_4(field);

		if (is_sys_table
		    && ((type & DICT_CLUSTERED)
		        || ((table == dict_sys->sys_tables)
		            && (name_len == ut_strlen("ID_IND"))
			    && (0 == ut_memcmp(name_buf, "ID_IND",
							name_len))))) {

			/* The index was created in memory already in
			booting */
		} else {
 			index = dict_mem_index_create(table->name, name_buf,
						space, type, n_fields);
			index->page_no = page_no;
			index->id = id;
		
			dict_load_fields(table, index, heap);

			dict_index_add_to_cache(table, index);
		}

		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
	} 

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);
}
Ejemplo n.º 18
0
/************************************************************************
Loads definitions for table columns. */
static
void
dict_load_columns(
/*==============*/
	dict_table_t*	table,	/* in: table */
	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
{
	dict_table_t*	sys_columns;
	dict_index_t*	sys_index;
	btr_pcur_t	pcur;
	dtuple_t*	tuple;
	dfield_t*	dfield;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	byte*		buf;
	char*		name_buf;
	char*		name;
	ulint		mtype;
	ulint		prtype;
	ulint		col_len;
	ulint		prec;
	ulint		i;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	mtr_start(&mtr);

	sys_columns = dict_table_get_low("SYS_COLUMNS");
	sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);

	tuple = dtuple_create(heap, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	buf = mem_heap_alloc(heap, 8);
	mach_write_to_8(buf, table->id);

	dfield_set_data(dfield, buf, 8);
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
						BTR_SEARCH_LEAF, &pcur, &mtr);
   	for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) {

		rec = btr_pcur_get_rec(&pcur);

		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));

		ut_a(!rec_get_deleted_flag(rec));
		
		field = rec_get_nth_field(rec, 0, &len);
		ut_ad(len == 8);
		ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0);

		field = rec_get_nth_field(rec, 1, &len);
		ut_ad(len == 4);
		ut_a(i == mach_read_from_4(field));

		ut_a(0 == ut_strcmp("NAME",
			dict_field_get_col(
			dict_index_get_nth_field(
			dict_table_get_first_index(sys_columns), 4))->name));

		field = rec_get_nth_field(rec, 4, &len);

		name_buf = mem_heap_alloc(heap, len + 1);
		ut_memcpy(name_buf, field, len);
		name_buf[len] = '\0';

		name = name_buf;

		field = rec_get_nth_field(rec, 5, &len);
		mtype = mach_read_from_4(field);

		field = rec_get_nth_field(rec, 6, &len);
		prtype = mach_read_from_4(field);

		field = rec_get_nth_field(rec, 7, &len);
		col_len = mach_read_from_4(field);

		ut_a(0 == ut_strcmp("PREC",
			dict_field_get_col(
			dict_index_get_nth_field(
			dict_table_get_first_index(sys_columns), 8))->name));

		field = rec_get_nth_field(rec, 8, &len);
		prec = mach_read_from_4(field);

		dict_mem_table_add_col(table, name, mtype, prtype, col_len,
									prec);
		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
	} 

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);
}
Ejemplo n.º 19
0
dict_table_t*
dict_load_table(
/*============*/
			/* out: table, NULL if does not exist */
	char*	name)	/* in: table name */
{
	dict_table_t*	table;
	dict_table_t*	sys_tables;
	btr_pcur_t	pcur;
	dict_index_t*	sys_index;
	dtuple_t*	tuple;
	mem_heap_t*	heap;
	dfield_t*	dfield;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	char*		buf;
	ulint		space;
	ulint		n_cols;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	heap = mem_heap_create(1000);
	
	mtr_start(&mtr);

	sys_tables = dict_table_get_low("SYS_TABLES");
	sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);

	tuple = dtuple_create(heap, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	dfield_set_data(dfield, name, ut_strlen(name));
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
					BTR_SEARCH_LEAF, &pcur, &mtr);
	rec = btr_pcur_get_rec(&pcur);

	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
					|| rec_get_deleted_flag(rec)) {
		/* Not found */

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);
		mem_heap_free(heap);
		
		return(NULL);
	}	

	field = rec_get_nth_field(rec, 0, &len);

	/* Check if the table name in record is the searched one */
	if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);
		mem_heap_free(heap);
		
		return(NULL);
	}

	ut_a(0 == ut_strcmp("SPACE",
		dict_field_get_col(
		dict_index_get_nth_field(
			dict_table_get_first_index(sys_tables), 9))->name));
	
	field = rec_get_nth_field(rec, 9, &len);
	space = mach_read_from_4(field);

	ut_a(0 == ut_strcmp("N_COLS",
		dict_field_get_col(
		dict_index_get_nth_field(
			dict_table_get_first_index(sys_tables), 4))->name));

	field = rec_get_nth_field(rec, 4, &len);
	n_cols = mach_read_from_4(field);

	table = dict_mem_table_create(name, space, n_cols);

	ut_a(0 == ut_strcmp("ID",
		dict_field_get_col(
		dict_index_get_nth_field(
			dict_table_get_first_index(sys_tables), 3))->name));

	field = rec_get_nth_field(rec, 3, &len);
	table->id = mach_read_from_8(field);

	field = rec_get_nth_field(rec, 5, &len);
	table->type = mach_read_from_4(field);

	if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
		ut_a(0);
	
		field = rec_get_nth_field(rec, 6, &len);
		table->mix_id = mach_read_from_8(field);

		field = rec_get_nth_field(rec, 8, &len);
		buf = mem_heap_alloc(heap, len);
		ut_memcpy(buf, field, len);

		table->cluster_name = buf;
	}

	if ((table->type == DICT_TABLE_CLUSTER)
	    || (table->type == DICT_TABLE_CLUSTER_MEMBER)) {
		
		field = rec_get_nth_field(rec, 7, &len);
		table->mix_len = mach_read_from_4(field);
	}

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	if (table->type == DICT_TABLE_CLUSTER_MEMBER) {
		/* Load the cluster table definition if not yet in
		memory cache */
		dict_table_get_low(table->cluster_name);
	}

	dict_load_columns(table, heap);

	dict_table_add_to_cache(table);

	dict_load_indexes(table, heap);
	
	ut_a(DB_SUCCESS == dict_load_foreigns(table->name));

	mem_heap_free(heap);

	return(table);
}
Ejemplo n.º 20
0
int main(int argc, char **argv)
{
  FILE *f;                     /* our input file */
  uchar *p;                     /* storage of pages read */
  int bytes;                   /* bytes read count */
  ulint ct;                    /* current page number (0 based) */
  int now;                     /* current time */
  int lastt;                   /* last time */
  ulint oldcsum, oldcsumfield, csum, csumfield, logseq, logseqfield; /* ulints for checksum storage */
  struct stat st;              /* for stat, if you couldn't guess */
  unsigned long long int size; /* size of file (has to be 64 bits) */
  ulint pages;                 /* number of pages in file */
  ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */
  off_t offset= 0;
  int just_count= 0;          /* if true, just print page count */
  int verbose= 0;
  int debug= 0;
  int c;
  int fd;

  /* remove arguments */
  while ((c= getopt(argc, argv, "cvds:e:p:")) != -1)
  {
    switch (c)
    {
    case 'v':
      verbose= 1;
      break;
    case 'c':
      just_count= 1;
      break;
    case 's':
      start_page= atoi(optarg);
      break;
    case 'e':
      end_page= atoi(optarg);
      use_end_page= 1;
      break;
    case 'p':
      start_page= atoi(optarg);
      end_page= atoi(optarg);
      use_end_page= 1;
      break;
    case 'd':
      debug= 1;
      break;
    case ':':
      fprintf(stderr, "option -%c requires an argument\n", optopt);
      return 1;
      break;
    case '?':
      fprintf(stderr, "unrecognized option: -%c\n", optopt);
      return 1;
      break;
    }
  }

  /* debug implies verbose... */
  if (debug) verbose= 1;

  /* make sure we have the right arguments */
  if (optind >= argc)
  {
    printf("InnoDB offline file checksum utility.\n");
    printf("usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", argv[0]);
    printf("\t-c\tprint the count of pages in the file\n");
    printf("\t-s n\tstart on this page number (0 based)\n");
    printf("\t-e n\tend at this page number (0 based)\n");
    printf("\t-p n\tcheck only this page (0 based)\n");
    printf("\t-v\tverbose (prints progress every 5 seconds)\n");
    printf("\t-d\tdebug mode (prints checksums for each page)\n");
    return 1;
  }

  /* stat the file to get size and page count */
  if (stat(argv[optind], &st))
  {
    perror("error statting file");
    return 1;
  }
  size= st.st_size;
  pages= size / UNIV_PAGE_SIZE;
  if (just_count)
  {
    printf("%lu\n", pages);
    return 0;
  }
  else if (verbose)
  {
    printf("file %s = %llu bytes (%lu pages)...\n", argv[optind], size, pages);
    printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
  }

  /* open the file for reading */
  f= fopen(argv[optind], "r");
  if (!f)
  {
    perror("error opening file");
    return 1;
  }

  /* seek to the necessary position */
  if (start_page)
  {
    fd= fileno(f);
    if (!fd)
    {
      perror("unable to obtain file descriptor number");
      return 1;
    }

    offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE;

    if (lseek(fd, offset, SEEK_SET) != offset)
    {
      perror("unable to seek to necessary offset");
      return 1;
    }
  }

  /* allocate buffer for reading (so we don't realloc every time) */
  p= (uchar *)malloc(UNIV_PAGE_SIZE);

  /* main checksumming loop */
  ct= start_page;
  lastt= 0;
  while (!feof(f))
  {
    bytes= fread(p, 1, UNIV_PAGE_SIZE, f);
    if (!bytes && feof(f)) return 0;
    if (bytes != UNIV_PAGE_SIZE)
    {
      fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE);
      return 1;
    }

    /* check the "stored log sequence numbers" */
    logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4);
    logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
    if (debug)
      printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield);
    if (logseq != logseqfield)
    {
      fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct);
      return 1;
    }

    /* check old method of checksumming */
    oldcsum= buf_calc_page_old_checksum(p);
    oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
    if (debug)
      printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield);
    if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && oldcsumfield != oldcsum)
    {
      fprintf(stderr, "page %lu invalid (fails old style checksum)\n", ct);
      return 1;
    }

    /* now check the new method */
    csum= buf_calc_page_new_checksum(p);
    csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM);
    if (debug)
      printf("page %lu: new style: calculated = %lu; recorded = %lu\n", ct, csum, csumfield);
    if (csumfield != 0 && csum != csumfield)
    {
      fprintf(stderr, "page %lu invalid (fails new style checksum)\n", ct);
      return 1;
    }

    /* end if this was the last page we were supposed to check */
    if (use_end_page && (ct >= end_page))
      return 0;

    /* do counter increase and progress printing */
    ct++;
    if (verbose)
    {
      if (ct % 64 == 0)
      {
        now= time(0);
        if (!lastt) lastt= now;
        if (now - lastt >= 1)
        {
          printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100);
          lastt= now;
        }
      }
    }
  }
  return 0;
}
Ejemplo n.º 21
0
/************************************************************************
Loads definitions for index fields. */
static
void
dict_load_fields(
/*=============*/
	dict_table_t*	table,	/* in: table */
	dict_index_t*	index,	/* in: index whose fields to load */
	mem_heap_t*	heap)	/* in: memory heap for temporary storage */
{
	dict_table_t*	sys_fields;
	dict_index_t*	sys_index;
	btr_pcur_t	pcur;
	dtuple_t*	tuple;
	dfield_t*	dfield;
	char*		col_name;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	byte*		buf;
	ulint		i;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	UT_NOT_USED(table);

	mtr_start(&mtr);

	sys_fields = dict_table_get_low("SYS_FIELDS");
	sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);

	tuple = dtuple_create(heap, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	buf = mem_heap_alloc(heap, 8);
	mach_write_to_8(buf, index->id);

	dfield_set_data(dfield, buf, 8);
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
						BTR_SEARCH_LEAF, &pcur, &mtr);
   	for (i = 0; i < index->n_fields; i++) {

		rec = btr_pcur_get_rec(&pcur);

		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
		ut_a(!rec_get_deleted_flag(rec));
		
		field = rec_get_nth_field(rec, 0, &len);
		ut_ad(len == 8);
		ut_a(ut_memcmp(buf, field, len) == 0);

		field = rec_get_nth_field(rec, 1, &len);
		ut_ad(len == 4);
		ut_a(i == mach_read_from_4(field));

		ut_a(0 == ut_strcmp("COL_NAME",
			dict_field_get_col(
			dict_index_get_nth_field(
			dict_table_get_first_index(sys_fields), 4))->name));

		field = rec_get_nth_field(rec, 4, &len);

		col_name = mem_heap_alloc(heap, len + 1);
		ut_memcpy(col_name, field, len);
		col_name[len] = '\0';
		
		dict_mem_index_add_field(index, col_name, 0);

		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
	} 

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);
}
Ejemplo n.º 22
0
/************************************************************************
Loads foreign key constraint col names (also for the referenced table). */
static
void
dict_load_foreign_cols(
/*===================*/
	char*		id,	/* in: foreign constraint id as a null-
				terminated string */
	dict_foreign_t*	foreign)/* in: foreign constraint object */
{
	dict_table_t*	sys_foreign_cols;
	dict_index_t*	sys_index;
	btr_pcur_t	pcur;
	dtuple_t*	tuple;
	dfield_t*	dfield;
	char*		col_name;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	ulint		i;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	foreign->foreign_col_names = mem_heap_alloc(foreign->heap,
					foreign->n_fields * sizeof(void*));

	foreign->referenced_col_names = mem_heap_alloc(foreign->heap,
					foreign->n_fields * sizeof(void*));
	mtr_start(&mtr);

	sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
	sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);

	tuple = dtuple_create(foreign->heap, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	dfield_set_data(dfield, id, ut_strlen(id));
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
						BTR_SEARCH_LEAF, &pcur, &mtr);
   	for (i = 0; i < foreign->n_fields; i++) {

		rec = btr_pcur_get_rec(&pcur);

		ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr));
		ut_a(!rec_get_deleted_flag(rec));
		
		field = rec_get_nth_field(rec, 0, &len);
		ut_a(len == ut_strlen(id));
		ut_a(ut_memcmp(id, field, len) == 0);

		field = rec_get_nth_field(rec, 1, &len);
		ut_a(len == 4);
		ut_a(i == mach_read_from_4(field));

		field = rec_get_nth_field(rec, 4, &len);

		col_name = mem_heap_alloc(foreign->heap, len + 1);
		ut_memcpy(col_name, field, len);
		col_name[len] = '\0';

		foreign->foreign_col_names[i] = col_name;

		field = rec_get_nth_field(rec, 5, &len);

		col_name = mem_heap_alloc(foreign->heap, len + 1);
		ut_memcpy(col_name, field, len);
		col_name[len] = '\0';

		foreign->referenced_col_names[i] = col_name;
		
		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
	} 

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);
}
Ejemplo n.º 23
0
uint64_t mach_read_from_8(byte* p)
{
	    return ( (uint64_t) mach_read_from_4(p) << 32  |
		         (uint64_t) mach_read_from_4(p + 4) );
}
Ejemplo n.º 24
0
void process_ibpage(page_t *page) {
    ulint page_id;
	rec_t *origin;
	ulint offsets[MAX_TABLE_FIELDS + 2];
	ulint offset, i;
    int is_page_valid = 0;
    int comp;
    unsigned int expected_records = 0;
    unsigned int actual_records = 0;
    int16_t b, infimum, supremum;
	
	// Skip tables if filter used
    if (use_filter_id) {
        dulint index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID);
        if (index_id.low != filter_id.low || index_id.high != filter_id.high) {
            if (debug) {
            	page_id = mach_read_from_4(page + FIL_PAGE_OFFSET);
                printf("Skipped using index id filter: %lu!\n", page_id);
            }
            return;
        }
    }

	// Read page id
	page_id = mach_read_from_4(page + FIL_PAGE_OFFSET);
	if (debug) printf("Page id: %lu\n", page_id);
	fprintf(f_result, "-- Page id: %lu", page_id);
	// Check requested and actual formats
    if (!check_page_format(page)) return;
	if(table_definitions_cnt == 0){
		fprintf(stderr, "There are no table definitions. Please check  include/table_defs.h\n");
		exit(EXIT_FAILURE);
		}
    is_page_valid = check_page(page, &expected_records);
    
    // comp == 1 if page in COMPACT format and 0 if REDUNDANT
    comp = page_is_comp(page);
    fprintf(f_result, ", Format: %s", (comp ) ? "COMPACT": "REDUNDANT");
    infimum = (comp) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM;
    supremum = (comp) ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM;
	// Find possible data area start point (at least 5 bytes of utility data)
	if(is_page_valid){
        b = mach_read_from_2(page + infimum - 2);
        offset = (comp) ? infimum + b : b;
        }
    else{
        offset = 100 + record_extra_bytes;
        }
    	fprintf(f_result, ", Records list: %s", is_page_valid? "Valid": "Invalid");
    	fprintf(f_result, ", Expected records: (%u %lu)", expected_records, mach_read_from_2(page + PAGE_HEADER + PAGE_N_RECS));
    	fprintf(f_result, "\n");
	if (debug) printf("Starting offset: %lu (%lX). Checking %d table definitions.\n", offset, offset, table_definitions_cnt);
	
	// Walk through all possible positions to the end of page 
	// (start of directory - extra bytes of the last rec)
    //is_page_valid = 0;
	while (offset < UNIV_PAGE_SIZE - record_extra_bytes && ( (offset != supremum ) || !is_page_valid) ) {
		// Get record pointer
		origin = page + offset;
		if (debug) printf("\nChecking offset: 0x%lX: ", offset);
		
		// Check all tables
		for (i = 0; i < table_definitions_cnt; i++) {
			// Get table info
			table_def_t *table = &(table_definitions[i]);
			if (debug) printf(" (%s) ", table->name);

			// Check if origin points to a valid record
			if (check_for_a_record(page, origin, table, offsets) && check_constraints(origin, table, offsets)) {
				actual_records++;
				if (debug) printf("\n---------------------------------------------------\n"
			       			  "PAGE%lu: Found a table %s record: %p (offset = %lu)\n", \
						  page_id, table->name, origin, offset);
                		if(is_page_valid){
					process_ibrec(page, origin, table, offsets);
                    			b = mach_read_from_2(page + offset - 2);
					offset = (comp) ? offset + b : b;
                    			}
				else{
					offset += process_ibrec(page, origin, table, offsets);
                    			}
                		if (debug) printf("Next offset: 0x%lX", offset);
			   		break;
		        	}
            		else{
                		if(is_page_valid){
					b = mach_read_from_2(page + offset - 2);
					offset = (comp) ? offset + b : b;
                    			}
				else{
					offset++;
					}
                		if (debug) printf("\nNext offset: %lX", offset);
               			}
			}
		}
	fprintf(f_result, "-- Page id: %lu", page_id);
	fprintf(f_result, ", Found records: %u", actual_records);
	fprintf(f_result, ", Lost records: %s", (actual_records != expected_records) ? "YES": "NO");
    	fprintf(f_result, ", Leaf page: %s", (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0)? "YES": "NO");
	fprintf(f_result, "\n");
}
Ejemplo n.º 25
0
/***************************************************************************
Loads a foreign key constraint to the dictionary cache. */
static
ulint
dict_load_foreign(
/*==============*/
			/* out: DB_SUCCESS or error code */
	char*	id)	/* in: foreign constraint id as a null-terminated
			string */
{	
	dict_foreign_t*	foreign;
	dict_table_t*	sys_foreign;
	btr_pcur_t	pcur;
	dict_index_t*	sys_index;
	dtuple_t*	tuple;
	mem_heap_t*	heap2;
	dfield_t*	dfield;
	rec_t*		rec;
	byte*		field;
	ulint		len;
	ulint		err;
	mtr_t		mtr;
	
	ut_ad(mutex_own(&(dict_sys->mutex)));

	heap2 = mem_heap_create(1000);
	
	mtr_start(&mtr);

	sys_foreign = dict_table_get_low("SYS_FOREIGN");
	sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);

	tuple = dtuple_create(heap2, 1);
	dfield = dtuple_get_nth_field(tuple, 0);

	dfield_set_data(dfield, id, ut_strlen(id));
	dict_index_copy_types(tuple, sys_index, 1);

	btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
					BTR_SEARCH_LEAF, &pcur, &mtr);
	rec = btr_pcur_get_rec(&pcur);

	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)
					|| rec_get_deleted_flag(rec)) {
		/* Not found */

		fprintf(stderr,
		"InnoDB: Error A: cannot load foreign constraint %s\n", id);

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);
		mem_heap_free(heap2);
		
		return(DB_ERROR);
	}	

	field = rec_get_nth_field(rec, 0, &len);

	/* Check if the id in record is the searched one */
	if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) {

		fprintf(stderr,
		"InnoDB: Error B: cannot load foreign constraint %s\n", id);

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);
		mem_heap_free(heap2);
		
		return(DB_ERROR);
	}

	/* Read the table names and the number of columns associated
	with the constraint */

	mem_heap_free(heap2);
	
	foreign = dict_mem_foreign_create();

	foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len));

	ut_a(len == 4);
	
	foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1);
				
	ut_memcpy(foreign->id, id, ut_strlen(id) + 1);

	field = rec_get_nth_field(rec, 3, &len);
							
	foreign->foreign_table_name = mem_heap_alloc(foreign->heap, 1 + len);
				
	ut_memcpy(foreign->foreign_table_name, field, len);
	foreign->foreign_table_name[len] = '\0';	
	
	field = rec_get_nth_field(rec, 4, &len);
							
	foreign->referenced_table_name = mem_heap_alloc(foreign->heap, 1 + len);
				
	ut_memcpy(foreign->referenced_table_name, field, len);
	foreign->referenced_table_name[len] = '\0';	

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	dict_load_foreign_cols(id, foreign);

	/* Note that there may already be a foreign constraint object in
	the dictionary cache for this constraint: then the following
	call only sets the pointers in it to point to the appropriate table
	and index objects and frees the newly created object foreign. */

	err = dict_foreign_add_to_cache(foreign);

	return(err); 
}
Ejemplo n.º 26
0
UNIV_INTERN
ulint
mem_field_header_get_check(byte* field)
{
	return(mach_read_from_4(field - sizeof(ulint)));
}
Ejemplo n.º 27
0
/*******************************************************************//**
Truncates the index tree associated with a row in SYS_INDEXES table.
@return	new root page number, or FIL_NULL on failure */
UNIV_INTERN
ulint
dict_truncate_index_tree(
    /*=====================*/
    dict_table_t*	table,	/*!< in: the table the index belongs to */
    ulint		space,	/*!< in: 0=truncate,
				nonzero=create the index tree in the
				given tablespace */
    btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
    mtr_t*		mtr)	/*!< in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
    ulint		root_page_no;
    ibool		drop = !space;
    ulint		zip_size;
    ulint		type;
    index_id_t	index_id;
    rec_t*		rec;
    const byte*	ptr;
    ulint		len;
    dict_index_t*	index;

    ut_ad(mutex_own(&(dict_sys->mutex)));
    ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
    rec = btr_pcur_get_rec(pcur);
    ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

    ut_ad(len == 4);

    root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

    if (drop && root_page_no == FIL_NULL) {
        /* The tree has been freed. */

        ut_print_timestamp(stderr);
        fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
                " a missing index of table %s!\n", table->name);
        drop = FALSE;
    }

    ptr = rec_get_nth_field_old(rec,
                                DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

    ut_ad(len == 4);

    if (drop) {
        space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
    }

    zip_size = fil_space_get_zip_size(space);

    if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
        /* It is a single table tablespace and the .ibd file is
        missing: do nothing */

        ut_print_timestamp(stderr);
        fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
                " a missing .ibd file of table %s!\n", table->name);
        return(FIL_NULL);
    }

    ptr = rec_get_nth_field_old(rec,
                                DICT_SYS_INDEXES_TYPE_FIELD, &len);
    ut_ad(len == 4);
    type = mach_read_from_4(ptr);

    ptr = rec_get_nth_field_old(rec, 1, &len);
    ut_ad(len == 8);
    index_id = mach_read_from_8(ptr);

    if (!drop) {

        goto create;
    }

    /* We free all the pages but the root page first; this operation
    may span several mini-transactions */

    btr_free_but_not_root(space, zip_size, root_page_no);

    /* Then we free the root page in the same mini-transaction where
    we create the b-tree and write its new root page number to the
    appropriate field in the SYS_INDEXES record: this mini-transaction
    marks the B-tree totally truncated */

    btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr);

    btr_free_root(space, zip_size, root_page_no, mtr);
create:
    /* We will temporarily write FIL_NULL to the PAGE_NO field
    in SYS_INDEXES, so that the database will not get into an
    inconsistent state in case it crashes between the mtr_commit()
    below and the following mtr_commit() call. */
    page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
                         FIL_NULL, mtr);

    /* We will need to commit the mini-transaction in order to avoid
    deadlocks in the btr_create() call, because otherwise we would
    be freeing and allocating pages in the same mini-transaction. */
    btr_pcur_store_position(pcur, mtr);
    mtr_commit(mtr);

    mtr_start(mtr);
    btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

    /* Find the index corresponding to this SYS_INDEXES record. */
    for (index = UT_LIST_GET_FIRST(table->indexes);
            index;
            index = UT_LIST_GET_NEXT(indexes, index)) {
        if (index->id == index_id) {
            root_page_no = btr_create(type, space, zip_size,
                                      index_id, index, mtr);
            index->page = (unsigned int) root_page_no;
            return(root_page_no);
        }
    }

    ut_print_timestamp(stderr);
    fprintf(stderr,
            "  InnoDB: Index %llu of table %s is missing\n"
            "InnoDB: from the data dictionary during TRUNCATE!\n",
            (ullint) index_id,
            table->name);

    return(FIL_NULL);
}
Ejemplo n.º 28
0
ulint
dict_truncate_index_tree(
/*=====================*/
				/* out: new root page number, or
				FIL_NULL on failure */
	dict_table_t*	table,	/* in: the table the index belongs to */
	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
	mtr_t*		mtr)	/* in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
	ulint		root_page_no;
	ulint		space;
	ulint		type;
	dulint		index_id;
	rec_t*		rec;
	byte*		ptr;
	ulint		len;
	ulint		comp;
	dict_index_t*	index;

	ut_ad(mutex_own(&(dict_sys->mutex)));
	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
	rec = btr_pcur_get_rec(pcur);
	ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

	ut_ad(len == 4);

	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (root_page_no == FIL_NULL) {
		/* The tree has been freed. */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing index of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

	ut_ad(len == 4);

	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (!fil_tablespace_exists_in_mem(space)) {
		/* It is a single table tablespace and the .ibd file is
		missing: do nothing */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing .ibd file of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_TYPE_FIELD, &len);
	ut_ad(len == 4);
	type = mach_read_from_4(ptr);

	ptr = rec_get_nth_field_old(rec, 1, &len);
	ut_ad(len == 8);
	index_id = mach_read_from_8(ptr);

	/* We free all the pages but the root page first; this operation
	may span several mini-transactions */

	btr_free_but_not_root(space, root_page_no);

	/* Then we free the root page in the same mini-transaction where
	we create the b-tree and write its new root page number to the
	appropriate field in the SYS_INDEXES record: this mini-transaction
	marks the B-tree totally truncated */

	comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
					 mtr));

	btr_free_root(space, root_page_no, mtr);
	/* We will temporarily write FIL_NULL to the PAGE_NO field
	in SYS_INDEXES, so that the database will not get into an
	inconsistent state in case it crashes between the mtr_commit()
	below and the following mtr_commit() call. */
	page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     FIL_NULL, mtr);

	/* We will need to commit the mini-transaction in order to avoid
	deadlocks in the btr_create() call, because otherwise we would
	be freeing and allocating pages in the same mini-transaction. */
	btr_pcur_store_position(pcur, mtr);
	mtr_commit(mtr);

	mtr_start(mtr);
	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

	/* Find the index corresponding to this SYS_INDEXES record. */
	for (index = UT_LIST_GET_FIRST(table->indexes);
	     index;
	     index = UT_LIST_GET_NEXT(indexes, index)) {
		if (!ut_dulint_cmp(index->id, index_id)) {
			break;
		}
	}

	root_page_no = btr_create(type, space, index_id, comp, mtr);
	if (index) {
		index->page = (unsigned int) root_page_no;
	} else {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"  InnoDB: Index %lu %lu of table %s is missing\n"
			"InnoDB: from the data dictionary during TRUNCATE!\n",
			ut_dulint_get_high(index_id),
			ut_dulint_get_low(index_id),
			table->name);
	}

	return(root_page_no);
}
Ejemplo n.º 29
0
/********************************************************************//**
Flush pages from flash cache.
@return	number of pages have been flushed to tablespace */
UNIV_INTERN
ulint	
fc_flush_to_disk(
/*==================*/
	ibool do_full_io)	/*!< in: whether do full io capacity */
{
	ulint distance;
	byte* page;
	ulint ret;
	ulint space;
	ulint offset;
	ulint page_type;
	ulint i, j;
	ulint pos;
	ulint zip_size;
	ulint block_offset, byte_offset;
	ulint fc_size = fc_get_size();
	ulint fc_blk_size = fc_get_block_size_byte();
	ulint start_offset;
   	ulint data_size;
	fc_block_t *flush_block = NULL;
	ulint c_flush = 0;
    
	ut_ad(!mutex_own(&fc->mutex));
	ut_a(fc->flush_buf->free_pos == 0);

	/* step 1: get the number of blocks need to flush to tablespace */
	flash_cache_mutex_enter();

	distance = fc_get_distance();
	start_offset = fc->flush_off;
    
	if ( distance == 0 ) {
		flash_cache_mutex_exit();
		return 0;
	} else if ( recv_recovery_on ) {
		if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)) {
			fc->n_flush_cur = 0;
		} else if ( distance < ( ( 1.0*srv_flash_cache_do_full_io_pct /100 ) * fc_size)) {
			fc->n_flush_cur = ut_min(PCT_IO_FC(10), distance);
		} else {
			fc->n_flush_cur = ut_min(PCT_IO_FC(100), distance);
		}
	} else if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)
		&& !do_full_io ) {
		flash_cache_mutex_exit();
		return 0;
	} else if ( distance < (( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size)
		&& !do_full_io ) {
		fc->n_flush_cur = PCT_IO_FC(srv_fc_write_cache_flush_pct);
	} else {
		ut_ad((distance > ( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) 
			|| do_full_io );
		fc->n_flush_cur = ut_min(PCT_IO_FC(srv_fc_full_flush_pct), distance);
	}

	flash_cache_mutex_exit();

	/* step 2: start to flush blocks use async io, set block io_fix IO_FIX_FLUSH */
	i = 0;
	while (i < fc->n_flush_cur) {
		ulint b_space;
		ulint b_offset;
		ulint raw_zip_size;
		ulint size;
		ulint fil_offset;
#ifdef UNIV_FLASH_CACHE_TRACE
		ulint is_v4_blk;
#endif
		byte* page_io;

		flash_cache_mutex_enter();
		pos = ( start_offset + i ) % fc_size;
		flush_block = fc_get_block(pos);

		if (flush_block == NULL) {
			i++;
			flash_cache_mutex_exit();
			continue;
		}

		/* we should get the mutex, as doublewrite may hit this block and invalid the block */
		flash_block_mutex_enter(flush_block->fil_offset);

		flash_cache_mutex_exit();
		
		data_size = fc_block_get_data_size(flush_block);

		if (flush_block->state != BLOCK_READY_FOR_FLUSH) {
			/* if readonly or merge write or already flushed*/
			ut_a (flush_block->state == BLOCK_NOT_USED
				|| flush_block->state == BLOCK_READ_CACHE
				|| flush_block->state == BLOCK_FLUSHED);
			
			i += data_size;

			flash_block_mutex_exit(flush_block->fil_offset);
			if (flush_block->state == BLOCK_NOT_USED) {
				//fc_block_detach(FALSE, flush_block);
				fc_block_free(flush_block);
			}
			
			continue;
		}

		zip_size = fil_space_get_zip_size(flush_block->space);
		if (zip_size == ULINT_UNDEFINED) {
			/* table has been droped, just set it BLOCK_FLUSHED */
#ifdef UNIV_FLASH_CACHE_TRACE
			ut_print_timestamp(fc->f_debug);
			fprintf(fc->f_debug, "space:%lu is droped, the page(%lu, %lu) need not to be flushed.\n",
			(ulong)flush_block->space, (ulong)flush_block->space, (ulong)flush_block->offset);
#endif
			flush_block->state = BLOCK_FLUSHED;
			i += data_size;
			c_flush += data_size;
			flash_block_mutex_exit(flush_block->fil_offset);
			continue;
		}

#ifdef UNIV_FLASH_CACHE_TRACE
		if (flush_block->state != BLOCK_READY_FOR_FLUSH) {
			fc_block_print(flush_block);
			ut_error;
		}
#endif

		flush_block->io_fix |= IO_FIX_FLUSH;

		/* 
		 * we should set block state BLOCK_FLUSHED,  if not, doublewrite may hit this block 
		 * and invalid this block and reduce the dirty count, but when finish flush ,we will 
		 * reduce the dirty count too, so it may reduce twice.
		 */
		flush_block->state = BLOCK_FLUSHED;
		
		/* save the block info, as the block may be invalided by doublewrite after release mutex */
		b_space = flush_block->space;
		b_offset = flush_block->offset;

		raw_zip_size = flush_block->raw_zip_size;
		size = flush_block->size;
		fil_offset = flush_block->fil_offset;
#ifdef UNIV_FLASH_CACHE_TRACE
		is_v4_blk = flush_block->is_v4_blk;
#endif
		/* release the block now, so read can hit in this blocks and read the data */
		flash_block_mutex_exit(flush_block->fil_offset);
		
		/*
		 * Only flush thread will update read_buf and flush_off/round. 
		 * there only single flush thread no need to lock read_buf
		 */
		page = fc->flush_buf->buf + fc->flush_buf->free_pos * fc_blk_size;

		if (raw_zip_size > 0) {
			ut_a((size * fc_blk_size) == UNIV_PAGE_SIZE);
			page_io = fc->flush_zip_read_buf;
		} else {
			page_io = page;
		}

		fc_io_offset(fil_offset, &block_offset, &byte_offset);
		ret = fil_io(OS_FILE_READ, TRUE, FLASH_CACHE_SPACE, 0,
				block_offset, byte_offset, data_size * fc_blk_size,
				page_io, NULL);
	
		if (ret != DB_SUCCESS) {
			ut_print_timestamp(stderr);
			fprintf(stderr, " InnoDB: Flash cache [Error]: unable to read page from flash cache.\n"
				"flash cache flush offset is:%lu.\n", (ulong)(start_offset + i));
			ut_error;
		}		

		if ((flush_block != NULL) && (flush_block->state == BLOCK_NOT_USED)) {
			goto skip;
		}

		/* decompress the compress data */
		if (raw_zip_size > 0) {
#ifdef UNIV_FLASH_CACHE_TRACE
			ulint blk_zip_size_byte;
			if (is_v4_blk) {
				blk_zip_size_byte = raw_zip_size * fc_get_block_size_byte();
			} else {
				blk_zip_size_byte = fc_block_compress_align(raw_zip_size) * fc_get_block_size_byte();
				ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ZIP_RAW_SIZE) == raw_zip_size);				
			} 

			ut_a(page_io);
			ut_a(page);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_HEADER) == FC_ZIP_PAGE_CHECKSUM);
			ut_a((ulint)mach_read_from_4(page_io + blk_zip_size_byte - FC_ZIP_PAGE_TAILER)
				== FC_ZIP_PAGE_CHECKSUM);	
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SIZE) == blk_zip_size_byte);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ORIG_SIZE) == UNIV_PAGE_SIZE);		
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SPACE) == b_space);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_OFFSET) == b_offset);	

			/* only qlz can do this check  */
			if (srv_flash_cache_compress_algorithm == FC_BLOCK_COMPRESS_QUICKLZ) {
				if (is_v4_blk) {
					ut_a(raw_zip_size * fc_get_block_size_byte()
						>= (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
				} else {
					ut_a(raw_zip_size 
						== (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
				}
				
				ut_a(UNIV_PAGE_SIZE == fc_qlz_size_decompressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
			}
#endif
			fc_block_do_decompress(DECOMPRESS_FLUSH, page_io, raw_zip_size, page);
		}

		space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
		offset = mach_read_from_4(page + FIL_PAGE_OFFSET);

		if ((space != b_space) || (offset != b_offset)) {
			ut_print_timestamp(stderr); 
			fc_block_print(flush_block);
			ut_error;
		}

		if (buf_page_is_corrupted(page, zip_size)) {
			buf_page_print(page, zip_size, BUF_PAGE_PRINT_NO_CRASH);
			ut_error;
		}		
		
		page_type = fil_page_get_type(page);
		if (page_type == FIL_PAGE_INDEX) {
			page_type = 1;
		}
		srv_flash_cache_flush_detail[page_type]++;
		
		ret = fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, FALSE, space, 
				zip_size, offset, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL);
		if (ret != DB_SUCCESS && ret != DB_TABLESPACE_DELETED) {
			ut_print_timestamp(stderr); 
			fc_block_print(flush_block);
			ut_error;
		}

		/* add  UNIV_PAGE_SIZE / fc_blk_size for safe */
		fc->flush_buf->free_pos += UNIV_PAGE_SIZE / fc_blk_size;	

skip:
		i += data_size;
		c_flush += data_size;	

		if ((fc->flush_buf->free_pos + UNIV_PAGE_SIZE / fc_blk_size) >= fc->flush_buf->size) {
			/* FIXME: is it safe to change n_flush, as step 3 will use n_flush */
			fc->n_flush_cur = i;
			break;
		}	
	}

	/* ok, now flush all async io to disk */
	fc_flush_sync_dbfile();

	/* step 3: all the flush blocks have sync to disk,  update the state and io_fix */
	j = 0;
	while (j < fc->n_flush_cur) {

		flash_cache_mutex_enter();
		pos = (start_offset + j) % fc_size;
		flush_block = fc_get_block(pos);

		if (flush_block  == NULL) {
			j++;
			flash_cache_mutex_exit();
			continue;
		}
		/* block state and io_fix may be changed by doublewrite and lru move */
		flash_block_mutex_enter(flush_block->fil_offset);
		flash_cache_mutex_exit();
		if (flush_block->io_fix & IO_FIX_FLUSH) {
			/* the block is already in BLOCK_FLUSHED state */
			flush_block->io_fix &= ~IO_FIX_FLUSH;
		} 
		
		data_size = fc_block_get_data_size(flush_block);
		flash_block_mutex_exit(flush_block->fil_offset);	
		
		j += data_size;
	}

	
	/*
	 * i and j may be different, as the last been flushed block may be invalid by doublewrite,
	 * so maybe i > j
	 */
	
	/* add the actual flushed blocks */
	srv_flash_cache_flush = srv_flash_cache_flush + c_flush; 

	/* step 4: update fc status and flush_off, and wake up threads that are sleep for space  */
	if (i > 0) {
		ut_a(i >= c_flush);

		flash_cache_mutex_enter();
		
		/*
		 * it is safe to inc flush off and sub dirty blocks at this time,
		 * as fc_validate is not work
		 */
		fc_inc_flush_off(i);
		flash_cache_log_mutex_enter();
		fc_log->current_stat->flush_offset = fc->flush_off;
		fc_log->current_stat->flush_round = fc->flush_round;	
		flash_cache_log_mutex_exit();		
		
		ut_a(srv_flash_cache_dirty >= c_flush);		
		srv_flash_cache_dirty -= c_flush;
		
		srv_fc_flush_should_commit_log_flush++;
		os_event_set(fc->wait_space_event);	

		fc->n_flush_cur = 0;
		
		flash_cache_mutex_exit();		
	}

	fc->flush_buf->free_pos = 0;
 
	return c_flush;
}