void ArrayPage::initialize_volatile_page( Epoch initial_epoch, StorageId storage_id, VolatilePagePointer page_id, uint16_t payload_size, uint8_t level, const ArrayRange& array_range) { ASSERT_ND(initial_epoch.is_valid()); std::memset(this, 0, kPageSize); header_.init_volatile(page_id, storage_id, kArrayPageType); payload_size_ = payload_size; level_ = level; array_range_ = array_range; if (is_leaf()) { uint16_t records = get_leaf_record_count(); for (uint16_t i = 0; i < records; ++i) { get_leaf_record(i, payload_size)->owner_id_.xct_id_.set_epoch(initial_epoch); } } }
ErrorStack LogMapper::handle_process_buffer(const fs::DirectIoFile &file, IoBufStatus* status) { const Epoch base_epoch = parent_.get_base_epoch(); // only for assertions const Epoch until_epoch = parent_.get_valid_until_epoch(); // only for assertions // many temporary memory are used only within this method and completely cleared out // for every call. clear_storage_buckets(); char* buffer = reinterpret_cast<char*>(io_buffer_.get_block()); status->more_in_the_file_ = false; for (; status->cur_inbuf_ < status->end_inbuf_aligned_; ++processed_log_count_) { // Note: The loop here must be a VERY tight loop, iterated over every single log entry! // In most cases, we should be just calling bucket_log(). const log::LogHeader* header = reinterpret_cast<const log::LogHeader*>(buffer + status->cur_inbuf_); ASSERT_ND(header->log_length_ > 0); ASSERT_ND(status->buf_infile_aligned_ != 0 || status->cur_inbuf_ != 0 || header->get_type() == log::kLogCodeEpochMarker); // file starts with marker // we must be starting from epoch marker. ASSERT_ND(!status->first_read_ || header->get_type() == log::kLogCodeEpochMarker); ASSERT_ND(header->get_kind() == log::kRecordLogs || header->get_type() == log::kLogCodeEpochMarker || header->get_type() == log::kLogCodeFiller); if (UNLIKELY(header->log_length_ + status->cur_inbuf_ > status->end_inbuf_aligned_)) { // if a log goes beyond this read, stop processing here and read from that offset again. // this is simpler than glue-ing the fragment. This happens just once per 64MB read, // so not a big waste. if (status->to_infile(status->cur_inbuf_ + header->log_length_) > status->size_infile_aligned_) { // but it never spans two files. something is wrong. LOG(ERROR) << "inconsistent end of log entry. offset=" << status->to_infile(status->cur_inbuf_) << ", file=" << file << ", log header=" << *header; return ERROR_STACK_MSG(kErrorCodeSnapshotInvalidLogEnd, file.get_path().c_str()); } status->next_infile_ = status->to_infile(status->cur_inbuf_); status->more_in_the_file_ = true; break; } else if (UNLIKELY(header->get_type() == log::kLogCodeEpochMarker)) { // skip epoch marker const log::EpochMarkerLogType *marker = reinterpret_cast<const log::EpochMarkerLogType*>(header); ASSERT_ND(header->log_length_ == sizeof(log::EpochMarkerLogType)); ASSERT_ND(marker->log_file_ordinal_ == status->cur_file_ordinal_); ASSERT_ND(marker->log_file_offset_ == status->to_infile(status->cur_inbuf_)); ASSERT_ND(marker->new_epoch_ >= marker->old_epoch_); ASSERT_ND(!base_epoch.is_valid() || marker->new_epoch_ >= base_epoch); ASSERT_ND(marker->new_epoch_ <= until_epoch); if (status->first_read_) { ASSERT_ND(!base_epoch.is_valid() || marker->old_epoch_ <= base_epoch // otherwise we skipped some logs || marker->old_epoch_ == marker->new_epoch_); // the first marker (old==new) is ok status->first_read_ = false; } else { ASSERT_ND(!base_epoch.is_valid() || marker->old_epoch_ >= base_epoch); } } else if (UNLIKELY(header->get_type() == log::kLogCodeFiller)) { // skip filler log } else { bool bucketed = bucket_log(header->storage_id_, status->cur_inbuf_); if (UNLIKELY(!bucketed)) { // need to add a new bucket bool added = add_new_bucket(header->storage_id_); if (added) { bucketed = bucket_log(header->storage_id_, status->cur_inbuf_); ASSERT_ND(bucketed); } else { // runs out of bucket_memory. have to flush now. flush_all_buckets(); added = add_new_bucket(header->storage_id_); ASSERT_ND(added); bucketed = bucket_log(header->storage_id_, status->cur_inbuf_); ASSERT_ND(bucketed); } } } status->cur_inbuf_ += header->log_length_; } // This fixes Bug #100. When a full mapper buffer exactly ends with a complete log, // we must keep reading. Didn't if (status->cur_inbuf_ == status->end_inbuf_aligned_ && status->end_infile_ > status->to_infile(status->cur_inbuf_)) { LOG(INFO) << "Hooray, a full mapper buffer exactly ends with a complete log record. rare!"; status->next_infile_ = status->to_infile(status->cur_inbuf_); status->more_in_the_file_ = true; } // bucktized all logs. now let's send them out to reducers flush_all_buckets(); return kRetOk; }
ErrorStack MetaLogger::truncate_non_durable(Epoch saved_durable_epoch) { ASSERT_ND(saved_durable_epoch.is_valid()); const uint64_t from_offset = control_block_->oldest_offset_; const uint64_t to_offset = control_block_->durable_offset_; ASSERT_ND(from_offset <= to_offset); LOG(INFO) << "Truncating non-durable meta logs, if any. Right now meta logger's" << " oldest_offset_=" << from_offset << ", (meta logger's local) durable_offset_=" << to_offset << ", global saved_durable_epoch=" << saved_durable_epoch; ASSERT_ND(current_file_->is_opened()); // Currently, we need to read everything from oldest_offset_ to see from where // We might have non-durable logs. // TASK(Hideaki) We should change SavepointManager to emit globally_durable_offset_. later. const uint64_t read_size = to_offset - from_offset; if (read_size > 0) { memory::AlignedMemory buffer; buffer.alloc(read_size, 1U << 12, memory::AlignedMemory::kNumaAllocOnnode, 0); WRAP_ERROR_CODE(current_file_->seek(from_offset, fs::DirectIoFile::kDirectIoSeekSet)); WRAP_ERROR_CODE(current_file_->read_raw(read_size, buffer.get_block())); char* buf = reinterpret_cast<char*>(buffer.get_block()); uint64_t cur = 0; uint64_t first_non_durable_at = read_size; while (cur < read_size) { log::BaseLogType* entry = reinterpret_cast<log::BaseLogType*>(buf + cur); ASSERT_ND(entry->header_.get_kind() != log::kRecordLogs); const uint32_t log_length = entry->header_.log_length_; log::LogCode type = entry->header_.get_type(); ASSERT_ND(type != log::kLogCodeInvalid); if (type == log::kLogCodeFiller || type == log::kLogCodeEpochMarker) { // Skip filler/marker. These don't have XID } else { Epoch epoch = entry->header_.xct_id_.get_epoch(); if (epoch <= saved_durable_epoch) { // Mostly this case. } else { // Ok, found a non-durable entry! const uint64_t raw_offset = from_offset + cur; on_non_durable_meta_log_found(&entry->header_, saved_durable_epoch, raw_offset); ASSERT_ND(first_non_durable_at == read_size || first_non_durable_at < cur); first_non_durable_at = std::min(first_non_durable_at, cur); // We can break here, but let's read all and warn all of them. meta log should be tiny } } cur += log_length; } if (first_non_durable_at < read_size) { // NOTE: This happens. Although the meta logger itself immediately flushes all logs // to durable storages, the global durable_epoch is min(all_logger_durable_epoch). // Thus, when the user didn't invoke wait_on_commit, we might have to discard // some meta logs that are "durable by itself" but "non-durable regarding the whole database" LOG(WARNING) << "Found some meta logs that are not in durable epoch (" << saved_durable_epoch << "). We will truncate non-durable regions. new durable_offset=" << first_non_durable_at; control_block_->durable_offset_ = first_non_durable_at; engine_->get_savepoint_manager()->change_meta_logger_durable_offset(first_non_durable_at); } } else { // Even if all locally-durable regions are globally durable, // there still could be locally-non-durable regions (=not yet fsynced). // Will truncate such regions. LOG(ERROR) << "Meta log file has a non-durable region. Probably there" << " was a crash. Will truncate"; } const uint64_t new_offset = control_block_->durable_offset_; if (new_offset < current_file_->get_current_offset()) { LOG(WARNING) << "Truncating meta log file to " << new_offset << " from " << current_file_->get_current_offset(); WRAP_ERROR_CODE(current_file_->truncate(new_offset, true)); } WRAP_ERROR_CODE(current_file_->seek(new_offset, fs::DirectIoFile::kDirectIoSeekSet)); return kRetOk; }