aku_Status Sequencer::merge_and_compress(PageHeader* target) { bool owns_lock = sequence_number_.load() % 2; // progress_flag_ must be odd to start if (!owns_lock) { return AKU_EBUSY; } if (ready_.size() == 0) { return AKU_ENO_DATA; } UncompressedChunk chunk_header; auto consumer = [&](TimeSeriesValue const& val) { val.add_to_header(&chunk_header); return true; }; kway_merge<TimeOrderMergePredicate, AKU_CURSOR_DIR_FORWARD>(ready_, consumer); ready_.clear(); UncompressedChunk reindexed_header; if (!CompressionUtil::convert_from_time_order(chunk_header, &reindexed_header)) { AKU_PANIC("Invalid chunk"); } auto status = target->complete_chunk(reindexed_header); if (status != AKU_SUCCESS) { return status; } sequence_number_.fetch_add(1); // progress_flag_ is even again return AKU_SUCCESS; }
SeriesMatcher::SeriesMatcher(uint64_t starting_id) : table(StringTools::create_table(0x1000)) , series_id(starting_id) { if (starting_id == 0u) { AKU_PANIC("Bad series ID"); } }
uint64_t MetadataStorage::get_prev_largest_id() { auto query = "SELECT max(storage_id) FROM akumuli_series;"; try { auto results = select_query(query); auto row = results.at(0); if (row.empty()) { AKU_PANIC("Can't get max storage id"); } auto id = row.at(0); if (id == "") { // Table is empty return 1ul; } return boost::lexical_cast<uint64_t>(id); } catch(...) { (*logger_)(AKU_LOG_ERROR, boost::current_exception_diagnostic_information().c_str()); AKU_PANIC("Can't get max storage id"); } }
// move sorted runs to ready_ collection int Sequencer::make_checkpoint_(aku_Timestamp new_checkpoint) { int flag = sequence_number_.fetch_add(1) + 1; if (flag % 2 != 0) { auto old_top = get_timestamp_(checkpoint_); checkpoint_ = new_checkpoint; vector<PSortedRun> new_runs; for (auto& sorted_run: runs_) { auto it = lower_bound(sorted_run->begin(), sorted_run->end(), TimeSeriesValue(old_top, AKU_LIMITS_MAX_ID, 0)); // Check that compression threshold is reached if (it == sorted_run->begin()) { // all timestamps are newer than old_top, do nothing new_runs.push_back(move(sorted_run)); continue; } else if (it == sorted_run->end()) { // all timestamps are older than old_top, move them ready_.push_back(move(sorted_run)); } else { // it is in between of the sorted run - split PSortedRun run(new SortedRun()); copy(sorted_run->begin(), it, back_inserter(*run)); // copy old ready_.push_back(move(run)); run.reset(new SortedRun()); copy(it, sorted_run->end(), back_inserter(*run)); // copy new new_runs.push_back(move(run)); } } Lock guard(runs_resize_lock_); swap(runs_, new_runs); size_t ready_size = 0u; for (auto& sorted_run: ready_) { ready_size += sorted_run->size(); } if (ready_size < c_threshold_) { // If ready doesn't contains enough data compression wouldn't be efficient, // we need to wait for more data to come // We should make sorted runs in ready_ array searchable again for (auto& sorted_run: ready_) { runs_.push_back(sorted_run); } ready_.clear(); flag = sequence_number_.fetch_add(1) + 1; } } else { AKU_PANIC("macke_checkpoint_ should be called from one thread"); } return flag; }
AnomalyDetector::AnomalyDetector(boost::property_tree::ptree const& ptree, std::shared_ptr<Node> next) : next_(next) { validate_anomaly_detector_params(ptree); double threshold = ptree.get<double>("threshold"); uint32_t bits = ptree.get<uint32_t>("bits", 10u); uint32_t nhashes = ptree.get<uint32_t>("hashes", 3u); AnomalyDetector::FcastMethod method = parse_anomaly_detector_type(ptree); double alpha = ptree.get<double>("alpha", 0.0); double beta = ptree.get<double>("beta", 0.0); double gamma = ptree.get<double>("gamma", 0.0); int period = ptree.get<int>("period", 0); validate_coef(alpha, 0.0, 1.0, "`alpha` should be in [0, 1] range"); validate_coef(beta, 0.0, 1.0, "`beta` should be in [0, 1] range"); validate_coef(gamma, 0.0, 1.0, "`gamma` should be in [0, 1] range"); switch(method) { case SMA: detector_ = AnomalyDetectorUtil::create_precise_sma(threshold, period); break; case SMA_SKETCH: detector_ = AnomalyDetectorUtil::create_approx_sma(nhashes, 1 << bits, threshold, period); break; case EWMA: detector_ = AnomalyDetectorUtil::create_precise_ewma(threshold, alpha); break; case EWMA_SKETCH: detector_ = AnomalyDetectorUtil::create_approx_ewma(nhashes, 1 << bits, threshold, alpha); break; case DOUBLE_EXP_SMOOTHING: detector_ = AnomalyDetectorUtil::create_precise_double_exp_smoothing(threshold, alpha, gamma); break; case DOUBLE_EXP_SMOOTHING_SKETCH: detector_ = AnomalyDetectorUtil::create_approx_double_exp_smoothing(nhashes, 1 << bits, threshold, alpha, gamma); break; case HOLT_WINTERS: detector_ = AnomalyDetectorUtil::create_precise_holt_winters(threshold, alpha, beta, gamma, period); break; case HOLT_WINTERS_SKETCH: detector_ = AnomalyDetectorUtil::create_approx_holt_winters(nhashes, 1 << bits, threshold, alpha, beta, gamma, period); break; default: AKU_PANIC("AnomalyDetector building error"); } }
aku_Status Sequencer::merge_and_compress(PageHeader* target, bool enforce_write) { bool owns_lock = sequence_number_.load() % 2; // progress_flag_ must be odd to start if (!owns_lock) { return AKU_EBUSY; } if (ready_.size() == 0) { return AKU_ENO_DATA; } aku_Status status = AKU_SUCCESS; while(!ready_.empty()) { UncompressedChunk chunk_header; chunk_header.paramids.reserve(c_threshold_); chunk_header.timestamps.reserve(c_threshold_); chunk_header.values.reserve(c_threshold_); int threshold = (int)c_threshold_; auto push_to_header = [&](TimeSeriesValue const& val) { if (threshold-->0) { val.add_to_header(&chunk_header); return true; } return false; }; kway_merge<TimeOrderMergePredicate, AKU_CURSOR_DIR_FORWARD>(ready_, push_to_header); if (enforce_write || chunk_header.paramids.size() >= c_threshold_) { UncompressedChunk reindexed_header; if (!CompressionUtil::convert_from_time_order(chunk_header, &reindexed_header)) { AKU_PANIC("Invalid chunk"); } status = target->complete_chunk(reindexed_header); } else { // Wait for more data status = AKU_ENO_DATA; } if (status != AKU_SUCCESS) { PSortedRun run(new SortedRun()); for (int i = 0; i < (int)chunk_header.paramids.size(); i++) { run->push_back(TimeSeriesValue(chunk_header.timestamps.at(i), chunk_header.paramids.at(i), chunk_header.values.at(i))); } ready_.push_back(std::move(run)); if (status == AKU_ENO_DATA) { status = AKU_SUCCESS; } break; } } if(!ready_.empty()) { Lock guard(runs_resize_lock_); for(auto sorted_run: ready_) { runs_.push_back(sorted_run); } ready_.clear(); } sequence_number_.fetch_add(1); // progress_flag_ is even again return status; }
bool scan_compressed_entries(uint32_t current_index, aku_Entry const* probe_entry, bool binary_search=false) { aku_Status status = AKU_SUCCESS; std::shared_ptr<UncompressedChunk> chunk_header, header; auto npages = page_->get_numpages(); // This needed to prevent key collision auto nopens = page_->get_open_count(); // between old and new page data, when auto pageid = page_->get_page_id(); // page is reallocated. auto key = std::make_tuple(npages*nopens + pageid, current_index); if (cache_ && cache_->contains(key)) { // Fast path header = cache_->get(key); } else { chunk_header.reset(new UncompressedChunk()); header.reset(new UncompressedChunk()); auto pdesc = reinterpret_cast<CompressedChunkDesc const*>(&probe_entry->value[0]); auto pbegin = (const unsigned char*)page_->read_entry_data(pdesc->begin_offset); auto pend = (const unsigned char*)page_->read_entry_data(pdesc->end_offset); auto probe_length = pdesc->n_elements; boost::crc_32_type checksum; checksum.process_block(pbegin, pend); if (checksum.checksum() != pdesc->checksum) { AKU_PANIC("File damaged!"); } status = CompressionUtil::decode_chunk(chunk_header.get(), pbegin, pend, probe_length); if (status != AKU_SUCCESS) { AKU_PANIC("Can't decode chunk"); } // TODO: depending on a query type we can use chunk order or convert back to time-order. // If we extract evertyhing it is better to convert to time order. If we picking some // parameter ids it is better to check if this ids present in a chunk and extract values // in chunk order and only after that - convert results to time-order. // Convert from chunk order to time order if (!CompressionUtil::convert_from_chunk_order(*chunk_header, header.get())) { AKU_PANIC("Bad chunk"); } if (cache_) { cache_->put(key, header); } } int start_pos = 0; if (IS_BACKWARD_) { start_pos = static_cast<int>(header->timestamps.size() - 1); } bool probe_in_time_range = true; auto queryproc = query_; auto page = page_; auto put_entry = [&header, queryproc, page] (uint32_t i) { aku_PData pdata; pdata.type = AKU_PAYLOAD_FLOAT; pdata.float64 = header->values.at(i); pdata.size = sizeof(aku_Sample); aku_Sample result = { header->timestamps.at(i), header->paramids.at(i), pdata, }; return queryproc->put(result); }; if (IS_BACKWARD_) { for (int i = static_cast<int>(start_pos); i >= 0; i--) { probe_in_time_range = lowerbound_ <= header->timestamps[i] && upperbound_ >= header->timestamps[i]; if (probe_in_time_range) { if (!put_entry(i)) { probe_in_time_range = false; break; } } else { probe_in_time_range = lowerbound_ <= header->timestamps[i]; if (!probe_in_time_range) { break; } } } } else { auto end_pos = (int)header->timestamps.size(); for (auto i = start_pos; i != end_pos; i++) { probe_in_time_range = lowerbound_ <= header->timestamps[i] && upperbound_ >= header->timestamps[i]; if (probe_in_time_range) { if (!put_entry(i)) { probe_in_time_range = false; break; } } else { probe_in_time_range = upperbound_ >= header->timestamps[i]; if (!probe_in_time_range) { break; } } } } return probe_in_time_range; }