예제 #1
0
파일: metric.cpp 프로젝트: 0x0all/madlib
void
closestColumnsAndDistances(
    const MappedMatrix& inMatrix,
    const MappedColumnVector& inVector,
    DistanceFunction& inMetric,
    RandomAccessIterator ioFirst,
    RandomAccessIterator ioLast) {

    ReverseLexicographicComparator<
        typename std::iterator_traits<RandomAccessIterator>::value_type>
            comparator;

    std::fill(ioFirst, ioLast,
        std::make_tuple(0, std::numeric_limits<double>::infinity()));
    for (Index i = 0; i < inMatrix.cols(); ++i) {
        double currentDist
            = AnyType_cast<double>(
                inMetric(MappedColumnVector(inMatrix.col(i)), inVector)
            );

        // outIndicesAndDistances is a heap, so the first element is maximal
        if (currentDist < std::get<1>(*ioFirst)) {
            // Unfortunately, the STL does not have a decrease-key function,
            // so we are wasting a bit of performance here
            std::pop_heap(ioFirst, ioLast, comparator);
            *(ioLast - 1) = std::make_tuple(i, currentDist);
            std::push_heap(ioFirst, ioLast, comparator);
        }
    }
    std::sort_heap(ioFirst, ioLast, comparator);
}
예제 #2
0
AnyType
get_row_from_2d_array::run(AnyType & args) {
    MappedMatrix input = args[0].getAs<MappedMatrix>();
    int index = args[1].getAs<int>() - 1; // database index starts from 1
    if (index < 0 or index >= input.cols()) {
        std::stringstream err_msg;
        err_msg << "Out-of-bound index: " << index << " >= " << input.cols();
        throw std::runtime_error(err_msg.str());
    }
    MutableNativeColumnVector ret(this->allocateArray<double>(input.rows()));
    ret = input.col(static_cast<Index>(index));

    return ret;
}
예제 #3
0
AnyType matrix_vec_mult_in_mem_2d::run(AnyType & args){
    MappedColumnVector vec = args[0].getAs<MappedColumnVector>();
    MappedMatrix mat = args[1].getAs<MappedMatrix>();

    // Note mat is constructed in the column-first order
    // which means that mat is actually transposed
    if(vec.size() != mat.cols()){
        throw std::invalid_argument(
            "dimensions mismatch: vec.size() != matrix.rows()");
    };

    // trans(vec) * trans(mat) = mat * vec
    Matrix r = mat * vec;
    ColumnVector v = r.col(0);
    return v;
}
예제 #4
0
AnyType matrix_vec_mult_in_mem_1d::run(AnyType & args){
    MappedColumnVector vec1 = args[0].getAs<MappedColumnVector>();
    // matrix stored as a 1-d array
    MappedColumnVector vec2 = args[1].getAs<MappedColumnVector>();

    if(vec2.size() % vec1.size() != 0){
        throw std::invalid_argument(
            "dimensions mismatch: matrix.size() is not multiples of vec.size()");
    };

    MappedMatrix mat;
    // the rebinding happens in column-major
    mat.rebind(vec2.memoryHandle(), vec1.size(), vec2.size()/vec1.size());
    Matrix r = trans(mat) * vec1;
    ColumnVector v = r.col(0);
    return v;
}
예제 #5
0
파일: metric.cpp 프로젝트: haradh1/madlib
std::tuple<Index, double>
closestColumnAndDistance(
    const MappedMatrix& inMatrix,
    const MappedColumnVector& inVector,
    DistanceFunction& inMetric) {

    Index closestColumn = 0;
    double minDist = std::numeric_limits<double>::infinity();

    for (Index i = 0; i < inMatrix.cols(); ++i) {
        double currentDist
            = AnyType_cast<double>(
                inMetric(MappedColumnVector(inMatrix.col(i)), inVector)
            );
        if (currentDist < minDist) {
            closestColumn = i;
            minDist = currentDist;
        }
    }

    return std::tuple<Index, double>(closestColumn, minDist);
}
예제 #6
0
/*
 * Permute each categorical variable and predict
 */
AnyType
rf_cat_imp_score::run(AnyType &args) {
    if (args[0].isNull() || args[7].isNull()) { return Null(); }
    Tree dt = args[0].getAs<ByteString>();
    MutableNativeIntegerVector cat_features;
    NativeColumnVector con_features;
    try {
        if (args[1].isNull()){
            // no cat features
            return Null();
        }
        else {
            MutableNativeIntegerVector xx_cat = args[1].getAs<MutableNativeIntegerVector>();
            cat_features.rebind(xx_cat.memoryHandle(), xx_cat.size());
        }
        if (args[2].isNull()){
            con_features.rebind(this->allocateArray<double>(0));
        }
        else {
            NativeColumnVector xx_con = args[2].getAs<NativeColumnVector>();
            con_features.rebind(xx_con.memoryHandle(), xx_con.size());
        }
    } catch (const ArrayWithNullException &e) {
        // not expect to reach here
        // if max_surr = 0, nulls are filtered
        // otherwise, mapped to -1 or NaN
        return Null();
    }

    MappedIntegerVector cat_n_levels = args[3].getAs<MappedIntegerVector>();

    int n_permutations = args[4].getAs<int>();
    double y = args[5].getAs<double>();
    bool is_classification = args[6].getAs<bool>();
    MappedMatrix distributions = args[7].getAs<MappedMatrix>();

    // returning
    MutableNativeColumnVector permuted_predictions(
            this->allocateArray<double>(cat_n_levels.size()));

    // permute each and predict
    NativeRandomNumberGenerator generator;
    for (int p = 0; p < n_permutations; p ++) {
        for (Index i = 0; i < cat_n_levels.size(); i ++) {
            int orig_i = cat_features(i);
            discrete_distribution<> ddist(distributions.col(i).data(),
                    distributions.col(i).data() + cat_n_levels(i) + 1);
            variate_generator<NativeRandomNumberGenerator, discrete_distribution<> >
                    rvt(generator, ddist);

            cat_features(i) = rvt() - 1;

            // calling NativeIntegerVector for a const cast
            // see EigenIntegration_impl.hpp in ports for details
            double prediction = dt.predict_response(
                NativeIntegerVector(cat_features.memoryHandle()), con_features);
            double score = 0.;
            if (is_classification) {
                score = y - prediction < 1e-3 ? 1. : 0.;
            } else {
                score = - (y - prediction) * (y - prediction);
            }
            permuted_predictions(i) += score;

            cat_features(i) = orig_i;
        }
    }
    permuted_predictions /= n_permutations;
    return permuted_predictions;
}
예제 #7
0
/*
 * Permute each continuous variable and predict
 */
AnyType
rf_con_imp_score::run(AnyType &args) {
    if (args[0].isNull() || args[7].isNull()) { return Null(); }
    Tree dt = args[0].getAs<ByteString>();
    NativeIntegerVector cat_features;
    MutableNativeColumnVector con_features;
    try {
        if (args[1].isNull()){
            // no cat features
            cat_features.rebind(this->allocateArray<int>(0));
        }
        else {
            NativeIntegerVector xx_cat = args[1].getAs<NativeIntegerVector>();
            cat_features.rebind(xx_cat.memoryHandle(), xx_cat.size());
        }
        if (args[2].isNull()){
            //no con features
            return Null();
        }
        else {
            MutableNativeColumnVector xx_con = args[2].getAs<MutableNativeColumnVector>();
            con_features.rebind(xx_con.memoryHandle(), xx_con.size());
        }
    } catch (const ArrayWithNullException &e) {
        // not expect to reach here
        // if max_surr = 0, nulls are filtered
        // otherwise, mapped to -1 or NaN
        return Null();
    }

    // con_splits size = num_con_features x num_bins
    // When num_con_features = 0, the input will be an empty string that is read
    // as a ByteString
    ConSplitsResult<RootContainer> splits_results = args[3].getAs<ByteString>();

    int n_permutations = args[4].getAs<int>();
    double y = args[5].getAs<double>();
    bool is_classification = args[6].getAs<bool>();
    MappedMatrix distributions = args[7].getAs<MappedMatrix>();

    // returning
    MutableNativeColumnVector permuted_predictions(
            this->allocateArray<double>(con_features.size()));

    // permute each and predict
    NativeRandomNumberGenerator generator;
    for (int p = 0; p < n_permutations; p ++) {
        for (Index i = 0; i < con_features.size(); i ++) {
            double orig_i = con_features(i);
            discrete_distribution<> ddist(distributions.col(i).data(),
                    distributions.col(i).data() + distributions.rows());
            variate_generator<NativeRandomNumberGenerator, discrete_distribution<> >
                    rvt(generator, ddist);

            int outcome = rvt();
            if (outcome == 0) {
                con_features(i) = std::numeric_limits<double>::quiet_NaN();
            } else if (outcome == static_cast<int>(distributions.rows()) - 1) {
                // bin value that is larger than the last separator (last value in con_splits)
                con_features(i) = splits_results.con_splits(i, outcome-2) + 1.;
            } else {
                con_features(i) = splits_results.con_splits(i, outcome-1);
            }

            // calling NativeColumnVector for a const cast
            // see EigenIntegration_impl.hpp in ports for details
            double prediction = dt.predict_response(
                cat_features, NativeColumnVector(con_features.memoryHandle()));
            double score = 0.;
            if (is_classification) {
                score = y - prediction < 1e-3 ? 1. : 0.;
            } else {
                score = - (y - prediction) * (y - prediction);
            }
            permuted_predictions(i) += score;

            con_features(i) = orig_i;
        }
    }
    permuted_predictions /= n_permutations;
    return permuted_predictions;
}