Пример #1
0
void
buf_LRU_print(void)
/*===============*/
{
	buf_block_t*	block;
	buf_frame_t*	frame;
	ulint		len;

	ut_ad(buf_pool);
	mutex_enter(&(buf_pool->mutex));

	fprintf(stderr, "Pool ulint clock %lu\n",
		(ulong) buf_pool->ulint_clock);

	block = UT_LIST_GET_FIRST(buf_pool->LRU);

	len = 0;

	while (block != NULL) {

		fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);

		if (block->old) {
			fputs("old ", stderr);
		}

		if (block->buf_fix_count) {
			fprintf(stderr, "buffix count %lu ",
				(ulong) block->buf_fix_count);
		}

		if (block->io_fix) {
			fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
		}

		if (ut_dulint_cmp(block->oldest_modification,
				  ut_dulint_zero) > 0) {
			fputs("modif. ", stderr);
		}

		frame = buf_block_get_frame(block);

		fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
			(ulong) block->LRU_position,
			(ulong) fil_page_get_type(frame),
			(ulong) ut_dulint_get_low
			(btr_page_get_index_id(frame)));

		block = UT_LIST_GET_NEXT(LRU, block);
		if (++len == 10) {
			len = 0;
			putc('\n', stderr);
		}
	}

	mutex_exit(&(buf_pool->mutex));
}
Пример #2
0
/********************************************************//**
Parses a log record written by mlog_write_string.
@return	parsed record end, NULL if not a complete record */
UNIV_INTERN
byte*
mlog_parse_string(
/*==============*/
	byte*	ptr,	/*!< in: buffer */
	byte*	end_ptr,/*!< in: buffer end */
	byte*	page,	/*!< in: page where to apply the log record, or NULL */
	void*	page_zip)/*!< in/out: compressed page, or NULL */
{
	ulint	offset;
	ulint	len;

	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);

	if (end_ptr < ptr + 4) {

		return(NULL);
	}

	offset = mach_read_from_2(ptr);
	ptr += 2;
	len = mach_read_from_2(ptr);
	ptr += 2;

	if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
	    || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) {
		recv_sys->found_corrupt_log = TRUE;

		return(NULL);
	}

	if (end_ptr < ptr + len) {

		return(NULL);
	}

	if (page) {
		if (UNIV_LIKELY_NULL(page_zip)) {
			memcpy(((page_zip_des_t*) page_zip)->data
				+ offset, ptr, len);
		}
		memcpy(page + offset, ptr, len);
	}

	return(ptr + len);
}
Пример #3
0
/********************************************************//**
Parses a log record written by mlog_write_ulint or mlog_write_dulint.
@return	parsed record end, NULL if not a complete record or a corrupt record */
UNIV_INTERN
byte*
mlog_parse_nbytes(
/*==============*/
	ulint	type,	/*!< in: log record type: MLOG_1BYTE, ... */
	byte*	ptr,	/*!< in: buffer */
	byte*	end_ptr,/*!< in: buffer end */
	byte*	page,	/*!< in: page where to apply the log record, or NULL */
	void*	page_zip)/*!< in/out: compressed page, or NULL */
{
	ulint	offset;
	ulint	val;
	dulint	dval;

	ut_a(type <= MLOG_8BYTES);
	ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);

	if (end_ptr < ptr + 2) {

		return(NULL);
	}

	offset = mach_read_from_2(ptr);
	ptr += 2;

	if (offset >= UNIV_PAGE_SIZE) {
		recv_sys->found_corrupt_log = TRUE;

		return(NULL);
	}

	if (type == MLOG_8BYTES) {
		ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval);

		if (ptr == NULL) {

			return(NULL);
		}

		if (page) {
			if (UNIV_LIKELY_NULL(page_zip)) {
				mach_write_to_8
					(((page_zip_des_t*) page_zip)->data
					 + offset, dval);
			}
			mach_write_to_8(page + offset, dval);
		}

		return(ptr);
	}

	ptr = mach_parse_compressed(ptr, end_ptr, &val);

	if (ptr == NULL) {

		return(NULL);
	}

	switch (type) {
	case MLOG_1BYTE:
		if (UNIV_UNLIKELY(val > 0xFFUL)) {
			goto corrupt;
		}
		if (page) {
			if (UNIV_LIKELY_NULL(page_zip)) {
				mach_write_to_1
					(((page_zip_des_t*) page_zip)->data
					 + offset, val);
			}
			mach_write_to_1(page + offset, val);
		}
		break;
	case MLOG_2BYTES:
		if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
			goto corrupt;
		}
		if (page) {
			if (UNIV_LIKELY_NULL(page_zip)) {
				mach_write_to_2
					(((page_zip_des_t*) page_zip)->data
					 + offset, val);
			}
			mach_write_to_2(page + offset, val);
		}
		break;
	case MLOG_4BYTES:
		if (page) {
			if (UNIV_LIKELY_NULL(page_zip)) {
				mach_write_to_4
					(((page_zip_des_t*) page_zip)->data
					 + offset, val);
			}
			mach_write_to_4(page + offset, val);
		}
		break;
	default:
	corrupt:
		recv_sys->found_corrupt_log = TRUE;
		ptr = NULL;
	}

	return(ptr);
}
Пример #4
0
/********************************************************************//**
Flush pages from flash cache.
@return	number of pages have been flushed to tablespace */
UNIV_INTERN
ulint	
fc_flush_to_disk(
/*==================*/
	ibool do_full_io)	/*!< in: whether do full io capacity */
{
	ulint distance;
	byte* page;
	ulint ret;
	ulint space;
	ulint offset;
	ulint page_type;
	ulint i, j;
	ulint pos;
	ulint zip_size;
	ulint block_offset, byte_offset;
	ulint fc_size = fc_get_size();
	ulint fc_blk_size = fc_get_block_size_byte();
	ulint start_offset;
   	ulint data_size;
	fc_block_t *flush_block = NULL;
	ulint c_flush = 0;
    
	ut_ad(!mutex_own(&fc->mutex));
	ut_a(fc->flush_buf->free_pos == 0);

	/* step 1: get the number of blocks need to flush to tablespace */
	flash_cache_mutex_enter();

	distance = fc_get_distance();
	start_offset = fc->flush_off;
    
	if ( distance == 0 ) {
		flash_cache_mutex_exit();
		return 0;
	} else if ( recv_recovery_on ) {
		if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)) {
			fc->n_flush_cur = 0;
		} else if ( distance < ( ( 1.0*srv_flash_cache_do_full_io_pct /100 ) * fc_size)) {
			fc->n_flush_cur = ut_min(PCT_IO_FC(10), distance);
		} else {
			fc->n_flush_cur = ut_min(PCT_IO_FC(100), distance);
		}
	} else if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)
		&& !do_full_io ) {
		flash_cache_mutex_exit();
		return 0;
	} else if ( distance < (( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size)
		&& !do_full_io ) {
		fc->n_flush_cur = PCT_IO_FC(srv_fc_write_cache_flush_pct);
	} else {
		ut_ad((distance > ( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) 
			|| do_full_io );
		fc->n_flush_cur = ut_min(PCT_IO_FC(srv_fc_full_flush_pct), distance);
	}

	flash_cache_mutex_exit();

	/* step 2: start to flush blocks use async io, set block io_fix IO_FIX_FLUSH */
	i = 0;
	while (i < fc->n_flush_cur) {
		ulint b_space;
		ulint b_offset;
		ulint raw_zip_size;
		ulint size;
		ulint fil_offset;
#ifdef UNIV_FLASH_CACHE_TRACE
		ulint is_v4_blk;
#endif
		byte* page_io;

		flash_cache_mutex_enter();
		pos = ( start_offset + i ) % fc_size;
		flush_block = fc_get_block(pos);

		if (flush_block == NULL) {
			i++;
			flash_cache_mutex_exit();
			continue;
		}

		/* we should get the mutex, as doublewrite may hit this block and invalid the block */
		flash_block_mutex_enter(flush_block->fil_offset);

		flash_cache_mutex_exit();
		
		data_size = fc_block_get_data_size(flush_block);

		if (flush_block->state != BLOCK_READY_FOR_FLUSH) {
			/* if readonly or merge write or already flushed*/
			ut_a (flush_block->state == BLOCK_NOT_USED
				|| flush_block->state == BLOCK_READ_CACHE
				|| flush_block->state == BLOCK_FLUSHED);
			
			i += data_size;

			flash_block_mutex_exit(flush_block->fil_offset);
			if (flush_block->state == BLOCK_NOT_USED) {
				//fc_block_detach(FALSE, flush_block);
				fc_block_free(flush_block);
			}
			
			continue;
		}

		zip_size = fil_space_get_zip_size(flush_block->space);
		if (zip_size == ULINT_UNDEFINED) {
			/* table has been droped, just set it BLOCK_FLUSHED */
#ifdef UNIV_FLASH_CACHE_TRACE
			ut_print_timestamp(fc->f_debug);
			fprintf(fc->f_debug, "space:%lu is droped, the page(%lu, %lu) need not to be flushed.\n",
			(ulong)flush_block->space, (ulong)flush_block->space, (ulong)flush_block->offset);
#endif
			flush_block->state = BLOCK_FLUSHED;
			i += data_size;
			c_flush += data_size;
			flash_block_mutex_exit(flush_block->fil_offset);
			continue;
		}

#ifdef UNIV_FLASH_CACHE_TRACE
		if (flush_block->state != BLOCK_READY_FOR_FLUSH) {
			fc_block_print(flush_block);
			ut_error;
		}
#endif

		flush_block->io_fix |= IO_FIX_FLUSH;

		/* 
		 * we should set block state BLOCK_FLUSHED,  if not, doublewrite may hit this block 
		 * and invalid this block and reduce the dirty count, but when finish flush ,we will 
		 * reduce the dirty count too, so it may reduce twice.
		 */
		flush_block->state = BLOCK_FLUSHED;
		
		/* save the block info, as the block may be invalided by doublewrite after release mutex */
		b_space = flush_block->space;
		b_offset = flush_block->offset;

		raw_zip_size = flush_block->raw_zip_size;
		size = flush_block->size;
		fil_offset = flush_block->fil_offset;
#ifdef UNIV_FLASH_CACHE_TRACE
		is_v4_blk = flush_block->is_v4_blk;
#endif
		/* release the block now, so read can hit in this blocks and read the data */
		flash_block_mutex_exit(flush_block->fil_offset);
		
		/*
		 * Only flush thread will update read_buf and flush_off/round. 
		 * there only single flush thread no need to lock read_buf
		 */
		page = fc->flush_buf->buf + fc->flush_buf->free_pos * fc_blk_size;

		if (raw_zip_size > 0) {
			ut_a((size * fc_blk_size) == UNIV_PAGE_SIZE);
			page_io = fc->flush_zip_read_buf;
		} else {
			page_io = page;
		}

		fc_io_offset(fil_offset, &block_offset, &byte_offset);
		ret = fil_io(OS_FILE_READ, TRUE, FLASH_CACHE_SPACE, 0,
				block_offset, byte_offset, data_size * fc_blk_size,
				page_io, NULL);
	
		if (ret != DB_SUCCESS) {
			ut_print_timestamp(stderr);
			fprintf(stderr, " InnoDB: Flash cache [Error]: unable to read page from flash cache.\n"
				"flash cache flush offset is:%lu.\n", (ulong)(start_offset + i));
			ut_error;
		}		

		if ((flush_block != NULL) && (flush_block->state == BLOCK_NOT_USED)) {
			goto skip;
		}

		/* decompress the compress data */
		if (raw_zip_size > 0) {
#ifdef UNIV_FLASH_CACHE_TRACE
			ulint blk_zip_size_byte;
			if (is_v4_blk) {
				blk_zip_size_byte = raw_zip_size * fc_get_block_size_byte();
			} else {
				blk_zip_size_byte = fc_block_compress_align(raw_zip_size) * fc_get_block_size_byte();
				ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ZIP_RAW_SIZE) == raw_zip_size);				
			} 

			ut_a(page_io);
			ut_a(page);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_HEADER) == FC_ZIP_PAGE_CHECKSUM);
			ut_a((ulint)mach_read_from_4(page_io + blk_zip_size_byte - FC_ZIP_PAGE_TAILER)
				== FC_ZIP_PAGE_CHECKSUM);	
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SIZE) == blk_zip_size_byte);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ORIG_SIZE) == UNIV_PAGE_SIZE);		
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SPACE) == b_space);
			ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_OFFSET) == b_offset);	

			/* only qlz can do this check  */
			if (srv_flash_cache_compress_algorithm == FC_BLOCK_COMPRESS_QUICKLZ) {
				if (is_v4_blk) {
					ut_a(raw_zip_size * fc_get_block_size_byte()
						>= (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
				} else {
					ut_a(raw_zip_size 
						== (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
				}
				
				ut_a(UNIV_PAGE_SIZE == fc_qlz_size_decompressed((const char *)(page_io + FC_ZIP_PAGE_DATA)));
			}
#endif
			fc_block_do_decompress(DECOMPRESS_FLUSH, page_io, raw_zip_size, page);
		}

		space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
		offset = mach_read_from_4(page + FIL_PAGE_OFFSET);

		if ((space != b_space) || (offset != b_offset)) {
			ut_print_timestamp(stderr); 
			fc_block_print(flush_block);
			ut_error;
		}

		if (buf_page_is_corrupted(page, zip_size)) {
			buf_page_print(page, zip_size, BUF_PAGE_PRINT_NO_CRASH);
			ut_error;
		}		
		
		page_type = fil_page_get_type(page);
		if (page_type == FIL_PAGE_INDEX) {
			page_type = 1;
		}
		srv_flash_cache_flush_detail[page_type]++;
		
		ret = fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, FALSE, space, 
				zip_size, offset, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL);
		if (ret != DB_SUCCESS && ret != DB_TABLESPACE_DELETED) {
			ut_print_timestamp(stderr); 
			fc_block_print(flush_block);
			ut_error;
		}

		/* add  UNIV_PAGE_SIZE / fc_blk_size for safe */
		fc->flush_buf->free_pos += UNIV_PAGE_SIZE / fc_blk_size;	

skip:
		i += data_size;
		c_flush += data_size;	

		if ((fc->flush_buf->free_pos + UNIV_PAGE_SIZE / fc_blk_size) >= fc->flush_buf->size) {
			/* FIXME: is it safe to change n_flush, as step 3 will use n_flush */
			fc->n_flush_cur = i;
			break;
		}	
	}

	/* ok, now flush all async io to disk */
	fc_flush_sync_dbfile();

	/* step 3: all the flush blocks have sync to disk,  update the state and io_fix */
	j = 0;
	while (j < fc->n_flush_cur) {

		flash_cache_mutex_enter();
		pos = (start_offset + j) % fc_size;
		flush_block = fc_get_block(pos);

		if (flush_block  == NULL) {
			j++;
			flash_cache_mutex_exit();
			continue;
		}
		/* block state and io_fix may be changed by doublewrite and lru move */
		flash_block_mutex_enter(flush_block->fil_offset);
		flash_cache_mutex_exit();
		if (flush_block->io_fix & IO_FIX_FLUSH) {
			/* the block is already in BLOCK_FLUSHED state */
			flush_block->io_fix &= ~IO_FIX_FLUSH;
		} 
		
		data_size = fc_block_get_data_size(flush_block);
		flash_block_mutex_exit(flush_block->fil_offset);	
		
		j += data_size;
	}

	
	/*
	 * i and j may be different, as the last been flushed block may be invalid by doublewrite,
	 * so maybe i > j
	 */
	
	/* add the actual flushed blocks */
	srv_flash_cache_flush = srv_flash_cache_flush + c_flush; 

	/* step 4: update fc status and flush_off, and wake up threads that are sleep for space  */
	if (i > 0) {
		ut_a(i >= c_flush);

		flash_cache_mutex_enter();
		
		/*
		 * it is safe to inc flush off and sub dirty blocks at this time,
		 * as fc_validate is not work
		 */
		fc_inc_flush_off(i);
		flash_cache_log_mutex_enter();
		fc_log->current_stat->flush_offset = fc->flush_off;
		fc_log->current_stat->flush_round = fc->flush_round;	
		flash_cache_log_mutex_exit();		
		
		ut_a(srv_flash_cache_dirty >= c_flush);		
		srv_flash_cache_dirty -= c_flush;
		
		srv_fc_flush_should_commit_log_flush++;
		os_event_set(fc->wait_space_event);	

		fc->n_flush_cur = 0;
		
		flash_cache_mutex_exit();		
	}

	fc->flush_buf->free_pos = 0;
 
	return c_flush;
}