Example #1
0
aku_Status Sequencer::merge_and_compress(PageHeader* target) {
    bool owns_lock = sequence_number_.load() % 2;  // progress_flag_ must be odd to start
    if (!owns_lock) {
        return AKU_EBUSY;
    }
    if (ready_.size() == 0) {
        return AKU_ENO_DATA;
    }

    UncompressedChunk chunk_header;

    auto consumer = [&](TimeSeriesValue const& val) {
        val.add_to_header(&chunk_header);
        return true;
    };

    kway_merge<TimeOrderMergePredicate, AKU_CURSOR_DIR_FORWARD>(ready_, consumer);
    ready_.clear();

    UncompressedChunk reindexed_header;
    if (!CompressionUtil::convert_from_time_order(chunk_header, &reindexed_header)) {
        AKU_PANIC("Invalid chunk");
    }

    auto status = target->complete_chunk(reindexed_header);
    if (status != AKU_SUCCESS) {
        return status;
    }
    sequence_number_.fetch_add(1);  // progress_flag_ is even again
    return AKU_SUCCESS;
}
Example #2
0
SeriesMatcher::SeriesMatcher(uint64_t starting_id)
    : table(StringTools::create_table(0x1000))
    , series_id(starting_id)
{
    if (starting_id == 0u) {
        AKU_PANIC("Bad series ID");
    }
}
Example #3
0
uint64_t MetadataStorage::get_prev_largest_id() {
    auto query = "SELECT max(storage_id) FROM akumuli_series;";
    try {
        auto results = select_query(query);
        auto row = results.at(0);
        if (row.empty()) {
            AKU_PANIC("Can't get max storage id");
        }
        auto id = row.at(0);
        if (id == "") {
            // Table is empty
            return 1ul;
        }
        return boost::lexical_cast<uint64_t>(id);
    } catch(...) {
        (*logger_)(AKU_LOG_ERROR, boost::current_exception_diagnostic_information().c_str());
        AKU_PANIC("Can't get max storage id");
    }
}
Example #4
0
// move sorted runs to ready_ collection
int Sequencer::make_checkpoint_(aku_Timestamp new_checkpoint) {
    int flag = sequence_number_.fetch_add(1) + 1;
    if (flag % 2 != 0) {
        auto old_top = get_timestamp_(checkpoint_);
        checkpoint_ = new_checkpoint;
        vector<PSortedRun> new_runs;
        for (auto& sorted_run: runs_) {
            auto it = lower_bound(sorted_run->begin(), sorted_run->end(), TimeSeriesValue(old_top, AKU_LIMITS_MAX_ID, 0));
            // Check that compression threshold is reached
            if (it == sorted_run->begin()) {
                // all timestamps are newer than old_top, do nothing
                new_runs.push_back(move(sorted_run));
                continue;
            } else if (it == sorted_run->end()) {
                // all timestamps are older than old_top, move them
                ready_.push_back(move(sorted_run));
            } else {
                // it is in between of the sorted run - split
                PSortedRun run(new SortedRun());
                copy(sorted_run->begin(), it, back_inserter(*run));  // copy old
                ready_.push_back(move(run));
                run.reset(new SortedRun());
                copy(it, sorted_run->end(), back_inserter(*run));  // copy new
                new_runs.push_back(move(run));
            }
        }

        Lock guard(runs_resize_lock_);
        swap(runs_, new_runs);
        size_t ready_size = 0u;
        for (auto& sorted_run: ready_) {
            ready_size += sorted_run->size();
        }
        if (ready_size < c_threshold_) {
            // If ready doesn't contains enough data compression wouldn't be efficient,
            //  we need to wait for more data to come
            // We should make sorted runs in ready_ array searchable again
            for (auto& sorted_run: ready_) {
                runs_.push_back(sorted_run);
            }
            ready_.clear();
            flag = sequence_number_.fetch_add(1) + 1;
        }
    } else {
        AKU_PANIC("macke_checkpoint_ should be called from one thread");
    }
    return flag;
}
Example #5
0
AnomalyDetector::AnomalyDetector(boost::property_tree::ptree const& ptree, std::shared_ptr<Node> next)
    : next_(next)
{
    validate_anomaly_detector_params(ptree);
    double threshold = ptree.get<double>("threshold");
    uint32_t bits = ptree.get<uint32_t>("bits", 10u);
    uint32_t nhashes = ptree.get<uint32_t>("hashes", 3u);
    AnomalyDetector::FcastMethod method = parse_anomaly_detector_type(ptree);
    double alpha = ptree.get<double>("alpha", 0.0);
    double beta = ptree.get<double>("beta", 0.0);
    double gamma = ptree.get<double>("gamma", 0.0);
    int period = ptree.get<int>("period", 0);
    validate_coef(alpha, 0.0, 1.0, "`alpha` should be in [0, 1] range");
    validate_coef(beta,  0.0, 1.0, "`beta` should be in [0, 1] range");
    validate_coef(gamma, 0.0, 1.0, "`gamma` should be in [0, 1] range");

    switch(method) {
    case SMA:
        detector_ = AnomalyDetectorUtil::create_precise_sma(threshold, period);
        break;
    case SMA_SKETCH:
        detector_ = AnomalyDetectorUtil::create_approx_sma(nhashes, 1 << bits, threshold, period);
        break;
    case EWMA:
        detector_ = AnomalyDetectorUtil::create_precise_ewma(threshold, alpha);
        break;
    case EWMA_SKETCH:
        detector_ = AnomalyDetectorUtil::create_approx_ewma(nhashes, 1 << bits, threshold, alpha);
        break;
    case DOUBLE_EXP_SMOOTHING:
        detector_ = AnomalyDetectorUtil::create_precise_double_exp_smoothing(threshold, alpha, gamma);
        break;
    case DOUBLE_EXP_SMOOTHING_SKETCH:
        detector_ = AnomalyDetectorUtil::create_approx_double_exp_smoothing(nhashes, 1 << bits, threshold, alpha, gamma);
        break;
    case HOLT_WINTERS:
        detector_ = AnomalyDetectorUtil::create_precise_holt_winters(threshold, alpha, beta, gamma, period);
        break;
    case HOLT_WINTERS_SKETCH:
        detector_ = AnomalyDetectorUtil::create_approx_holt_winters(nhashes, 1 << bits, threshold, alpha, beta, gamma, period);
        break;
    default:
        AKU_PANIC("AnomalyDetector building error");
    }
}
Example #6
0
aku_Status Sequencer::merge_and_compress(PageHeader* target, bool enforce_write) {
    bool owns_lock = sequence_number_.load() % 2;  // progress_flag_ must be odd to start
    if (!owns_lock) {
        return AKU_EBUSY;
    }
    if (ready_.size() == 0) {
        return AKU_ENO_DATA;
    }

    aku_Status status = AKU_SUCCESS;

    while(!ready_.empty()) {
        UncompressedChunk chunk_header;
        chunk_header.paramids.reserve(c_threshold_);
        chunk_header.timestamps.reserve(c_threshold_);
        chunk_header.values.reserve(c_threshold_);
        int threshold = (int)c_threshold_;
        auto push_to_header = [&](TimeSeriesValue const& val) {
            if (threshold-->0) {
                val.add_to_header(&chunk_header);
                return true;
            }
            return false;
        };
        kway_merge<TimeOrderMergePredicate, AKU_CURSOR_DIR_FORWARD>(ready_, push_to_header);
        if (enforce_write || chunk_header.paramids.size() >= c_threshold_) {
            UncompressedChunk reindexed_header;
            if (!CompressionUtil::convert_from_time_order(chunk_header, &reindexed_header)) {
                AKU_PANIC("Invalid chunk");
            }
            status = target->complete_chunk(reindexed_header);
        } else {
            // Wait for more data
            status = AKU_ENO_DATA;
        }
        if (status != AKU_SUCCESS) {
            PSortedRun run(new SortedRun());
            for (int i = 0; i < (int)chunk_header.paramids.size(); i++) {
                run->push_back(TimeSeriesValue(chunk_header.timestamps.at(i),
                                               chunk_header.paramids.at(i),
                                               chunk_header.values.at(i)));
            }
            ready_.push_back(std::move(run));
            if (status == AKU_ENO_DATA) {
                status = AKU_SUCCESS;
            }
            break;
        }
    }

    if(!ready_.empty()) {
        Lock guard(runs_resize_lock_);
        for(auto sorted_run: ready_) {
            runs_.push_back(sorted_run);
        }
        ready_.clear();
    }

    sequence_number_.fetch_add(1);  // progress_flag_ is even again

    return status;
}
Example #7
0
    bool scan_compressed_entries(uint32_t current_index, aku_Entry const* probe_entry, bool binary_search=false) {
        aku_Status status = AKU_SUCCESS;
        std::shared_ptr<UncompressedChunk> chunk_header, header;

        auto npages = page_->get_numpages();    // This needed to prevent key collision
        auto nopens = page_->get_open_count();  // between old and new page data, when
        auto pageid = page_->get_page_id();     // page is reallocated.

        auto key = std::make_tuple(npages*nopens + pageid, current_index);

        if (cache_ && cache_->contains(key)) {
            // Fast path
            header = cache_->get(key);
        } else {
            chunk_header.reset(new UncompressedChunk());
            header.reset(new UncompressedChunk());
            auto pdesc  = reinterpret_cast<CompressedChunkDesc const*>(&probe_entry->value[0]);
            auto pbegin = (const unsigned char*)page_->read_entry_data(pdesc->begin_offset);
            auto pend   = (const unsigned char*)page_->read_entry_data(pdesc->end_offset);
            auto probe_length = pdesc->n_elements;

            boost::crc_32_type checksum;
            checksum.process_block(pbegin, pend);
            if (checksum.checksum() != pdesc->checksum) {
                AKU_PANIC("File damaged!");
            }

            status = CompressionUtil::decode_chunk(chunk_header.get(), pbegin, pend, probe_length);
            if (status != AKU_SUCCESS) {
                AKU_PANIC("Can't decode chunk");
            }

            // TODO: depending on a query type we can use chunk order or convert back to time-order.
            // If we extract evertyhing it is better to convert to time order. If we picking some
            // parameter ids it is better to check if this ids present in a chunk and extract values
            // in chunk order and only after that - convert results to time-order.

            // Convert from chunk order to time order
            if (!CompressionUtil::convert_from_chunk_order(*chunk_header, header.get())) {
                AKU_PANIC("Bad chunk");
            }

            if (cache_) {
                cache_->put(key, header);
            }
        }

        int start_pos = 0;
        if (IS_BACKWARD_) {
            start_pos = static_cast<int>(header->timestamps.size() - 1);
        }
        bool probe_in_time_range = true;

        auto queryproc = query_;
        auto page = page_;

        auto put_entry = [&header, queryproc, page] (uint32_t i) {
            aku_PData pdata;
            pdata.type = AKU_PAYLOAD_FLOAT;
            pdata.float64 = header->values.at(i);
            pdata.size = sizeof(aku_Sample);
            aku_Sample result = {
                header->timestamps.at(i),
                header->paramids.at(i),
                pdata,
            };
            return queryproc->put(result);
        };

        if (IS_BACKWARD_) {
            for (int i = static_cast<int>(start_pos); i >= 0; i--) {
                probe_in_time_range = lowerbound_ <= header->timestamps[i] &&
                                      upperbound_ >= header->timestamps[i];
                if (probe_in_time_range) {
                    if (!put_entry(i)) {
                        probe_in_time_range = false;
                        break;
                    }
                } else {
                    probe_in_time_range = lowerbound_ <= header->timestamps[i];
                    if (!probe_in_time_range) {
                        break;
                    }
                }
            }
        } else {
            auto end_pos = (int)header->timestamps.size();
            for (auto i = start_pos; i != end_pos; i++) {
                probe_in_time_range = lowerbound_ <= header->timestamps[i] &&
                                      upperbound_ >= header->timestamps[i];
                if (probe_in_time_range) {
                    if (!put_entry(i)) {
                        probe_in_time_range = false;
                        break;
                    }
                } else {
                    probe_in_time_range = upperbound_ >= header->timestamps[i];
                    if (!probe_in_time_range) {
                        break;
                    }
                }
            }
        }
        return probe_in_time_range;
    }