void TLogReaderInterface::SCONames(std::vector<SCO>& out) { SCO prevContainer; const Entry* e; while ((e = nextLocation())){ const ClusterLocation l = e->clusterLocation(); const SCO tmp = l.sco(); if(not (tmp == prevContainer)) { out.push_back(tmp); prevContainer = tmp; } } }
void Scrubber::operator()() { // Set up the FilePool LOG_INFO("Instantiation the FilePool"); result_.snapshot_name = args_.snapshot_name; FilePool filepool(args_.scratch_dir); // Set up the backend SetupBackend(); VERIFY(backend_interface_.get()); LOG_INFO("getting the sco access data from the backend"); // Doesn't throw! NormalizedSCOAccessData access_data(*backend_interface_); youtils::wall_timer metadatascrubtime; try { // Get The Snapshot From the backend and get the tlogs to process LOG_INFO("Getting the snapshots file from the backend"); fs::path snapshot = filepool.newFile("snapshots"); backend_interface_->read(snapshot, snapshotFilename(), InsistOnLatestVersion::T); snapshot_persistor_.reset(new SnapshotPersistor(snapshot)); snapshot_num_ = snapshot_persistor_->getSnapshotNum(args_.snapshot_name); snapshot_persistor_->getTLogsInSnapshot(snapshot_num_, result_.tlog_names_in); } catch(std::exception& e) { throw ScrubberException(e.what(), ScrubberException::NoCleanup); } catch(...) { throw ScrubberException("Uknown Exception", ScrubberException::NoCleanup); } boost::this_thread::interruption_point(); LOG_INFO("Get the tlogs to scrub from the snapshot file"); VERIFY(not result_.tlog_names_in.empty()); std::shared_ptr<TLogReaderInterface> combined_tlog_reader(makeCombinedTLogReader(filepool.directory(), result_.tlog_names_in, backend_interface_->clone())); // Split the TLogs in parts and go through them LOG_INFO("Starting the TLog Splitter"); scrubbing::ScrubbingSCODataVector scrubbing_data_vector; TLogSplitter tlog_splitter(combined_tlog_reader, scrubbing_data_vector, static_cast<RegionExponent>(args_.region_size_exponent), filepool); boost::this_thread::interruption_point(); tlog_splitter(); boost::this_thread::interruption_point(); LOG_INFO("Stopped the TLog Splitter"); if(verbose_) { LOG_INFO("SCO information after the splitting\n" << scrubbing_data_vector); } const TLogSplitter::MapType& split_tlog_map = tlog_splitter.getMap(); LOG_INFO("Metadata scrubbing the region tlogs"); std::vector<fs::path> tlogs; for(TLogSplitter::MapType::const_iterator it = split_tlog_map.begin(); it != split_tlog_map.end(); ++it) { LOG_INFO("Handling tlog for region " << it->first); PartScrubber part_scrubber(it, scrubbing_data_vector, filepool, args_.region_size_exponent, args_.cluster_size_exponent); boost::this_thread::interruption_point(); part_scrubber(tlogs); boost::this_thread::interruption_point(); } LOG_INFO("Stopped the region metadatascrubs"); if(verbose_) { LOG_INFO("SCO information after the metadatascrub\n" << scrubbing_data_vector); } // At this point tlogs contains backwards ordered tlogs with the metadatascrubbed scos. // We merge them into one forward tlog... LOG_INFO("Merging the metadatascrubbed tlogs"); BackwardTLogMerger backward_merger; for(std::vector<fs::path>::const_iterator i = tlogs.begin(); i != tlogs.end(); ++i) { backward_merger.addTLogReader(new BackwardTLogReader(*i)); } const fs::path metadata_scrubbed = filepool.newFile("metadatascrubbed_tlog"); ClusterLocation last; boost::this_thread::interruption_point(); backward_merger(metadata_scrubbed, &last); boost::this_thread::interruption_point(); #ifndef NDEBUG uint64_t size = 0; for(std::vector<fs::path>::const_iterator i = tlogs.begin(); i != tlogs.end(); ++i) { size += fs::file_size(*i); } VERIFY(size == fs::file_size(metadata_scrubbed)); #endif double metadata_scrubbed_time = metadatascrubtime.elapsed(); youtils::wall_timer datascrubtime; // No we scrub this LOG_INFO("Starting the DataScrub"); SCOPool scopool(scrubbing_data_vector, metadata_scrubbed, filepool, *backend_interface_, args_.cluster_size_exponent, args_.sco_size, minimum_number_of_used_entries, access_data, last.sco(), result_.new_sconames); std::pair<volumedriver::CheckSum, uint64_t> sp_result = scopool(); // variable 'ss0' set but not used [-Werror=unused-but-set-variable] // CheckSum ss0 = sp_result.first; uint64_t relocNum = sp_result.second; LOG_INFO("Stopped the DataScrub"); if(verbose_) { LOG_INFO("SCO information after datascrub\n" << scrubbing_data_vector); } double data_scrub_time = datascrubtime.elapsed(); LOG_INFO("Starting the forward merging of tlogs"); ForwardTLogMerger forward_merger; forward_merger.addTLogReader(new TLogReader(scopool.rewritten_tlog_path())); forward_merger.addTLogReader(new TLogReader(scopool.nonrewritten_tlog_path())); const fs::path result_tlog(filepool.newFile("completely_scrubbed_tlog")); // variable 'cs1' set but not used [-Werror=unused-but-set-variable] // CheckSum cs1 = forward_merger(result_tlog); boost::this_thread::interruption_point(); forward_merger(result_tlog); boost::this_thread::interruption_point(); LOG_INFO("Finished the forward merging of tlogs"); LOG_INFO("Starting cutting up the tlog in digestible chuncks"); // Now we cut up the tlog into pieces again TLogCutter t(backend_interface_.get(), result_tlog, filepool); boost::this_thread::interruption_point(); result_.tlogs_out = t(); boost::this_thread::interruption_point(); VERIFY(not result_.tlogs_out.empty()); LOG_INFO("Finished cutting up the tlog in digestible chuncks"); if(fs::file_size(scopool.relocations_tlog_path()) > 0) { LOG_INFO("Starting cutting up relocations tlog"); std::vector<std::string> cutup_relocations; TheSonOfTLogCutter t(backend_interface_.get(), scopool.relocations_tlog_path(), filepool, cutup_relocations); t(); VERIFY(not cutup_relocations.empty()); result_.relocs = cutup_relocations; LOG_INFO("Finished cutting up relocations tlog"); } result_.relocNum = relocNum; scopool.getSCONamesToBeDeleted(result_.sconames_to_be_deleted); scopool.get_access_data(result_.prefetch); if(not args_.apply_immediately) { UUID result_id; std::string result_name = std::string(scrub_result_string) + result_id.str(); VERIFY(isScrubbingResultString(result_name)); // No interruption points here fs::path result_file_name = filepool.newFile(result_name); youtils::Serialization::serializeAndFlush<boost::archive::text_oarchive>(result_file_name, result_); // work around ALBA uploads timing out but eventually succeeding in the // background, leading to overwrite on retry. TODO("AR: use OverwriteObject::F instead"); VERIFY(not backend_interface_->objectExists(result_name)); backend_interface_->write(result_file_name, result_name, OverwriteObject::T); LOG_INFO("Gotten " << result_.tlog_names_in.size() << " tlogs from the backend and written " << result_.tlogs_out.size() << " back"); LOG_INFO("Metadata scrub took " << metadata_scrubbed_time << " seconds"); LOG_INFO("Gotten " << scopool.numberOfSCOSReadFromBackend() << " scos from the backend and written " << scopool.numberOfSCOSWrittenToBackend() << " back"); LOG_INFO("Data scrub took " << data_scrub_time << " seconds "); LOG_INFO("Scrubbing result is in " << result_name); scrubbing_result_name = result_name; } else { LOG_INFO("Applying the scrubbing result immediately on the backend"); boost::this_thread::interruption_point(); snapshot_persistor_->replace(result_.tlog_names_in, result_.tlogs_out, snapshot_num_); snapshot_persistor_->setSnapshotScrubbed(snapshot_num_, true); fs::path new_snaps = filepool.newFile("new_snapshots.xml"); snapshot_persistor_->saveToFile(new_snaps, SyncAndRename::T); boost::this_thread::interruption_point(); backend_interface_->write(new_snaps, snapshotFilename(), OverwriteObject::T); LOG_INFO("Wrote the new snapshots.xml and its data to the backend, cleaning up"); boost::this_thread::interruption_point(); for (const SCO sconame : result_.sconames_to_be_deleted) { LOG_INFO("Deleting SCO " << sconame.str()); backend_interface_->remove(sconame.str()); } boost::this_thread::interruption_point(); for (const auto& tlog_id : result_.tlog_names_in) { const auto tlog_name(boost::lexical_cast<std::string>(tlog_id)); LOG_INFO("Deleting TLog " << tlog_name); backend_interface_->remove(tlog_name); } } }