Ejemplo n.º 1
0
double
distTanimoto(
    const MappedColumnVector& inX,
    const MappedColumnVector& inY) {

    // Note that this is not a metric in general!
    double dotProduct = dot(inX, inY);
    double tanimoto = inX.squaredNorm() + inY.squaredNorm();
    return (tanimoto - 2 * dotProduct) / (tanimoto - dotProduct);
}
Ejemplo n.º 2
0
AnyType
normalize_sum_array::run(AnyType &args){
    const MappedColumnVector input_vector = args[0].getAs<MappedColumnVector>();
    const double sum_target = args[1].getAs<double>();

    double sum_input_vector = input_vector.sum();
    // Avoid divide by zero by dividing by a small number if sum is small
    double VAR_IMP_EPSILON = 1e-6;
    if (sum_input_vector < VAR_IMP_EPSILON)
        sum_input_vector = VAR_IMP_EPSILON;
    ColumnVector output_vector = input_vector * sum_target / sum_input_vector;
    return output_vector;
}
Ejemplo n.º 3
0
double
squaredAngle(
    const MappedColumnVector& inX,
    const MappedColumnVector& inY) {

    double cosine = dot(inX, inY) / (inX.norm() * inY.norm());
    if (cosine > 1)
        cosine = 1;
    else if (cosine < -1)
        cosine = -1;
    double angle = std::acos(cosine);
    return angle * angle;
}
Ejemplo n.º 4
0
AnyType row_fold::run(AnyType & args){
    MappedColumnVector vec = args[0].getAs<MappedColumnVector>();
    MappedIntegerVector pat = args[1].getAs<MappedIntegerVector>();

    if (vec.size() != pat.sum()) {
        throw std::invalid_argument(
            "dimensions mismatch: row_in.size() != pattern.sum()");
    }

    ColumnVector r(pat.size());
    for (int i = 0, j = 0; i < pat.size(); j += pat[i++])
        r[i] = vec.segment(j, pat[i]).prod();

    return r;
}
Ejemplo n.º 5
0
AnyType matrix_vec_mult_in_mem_2d::run(AnyType & args){
    MappedColumnVector vec = args[0].getAs<MappedColumnVector>();
    MappedMatrix mat = args[1].getAs<MappedMatrix>();

    // Note mat is constructed in the column-first order
    // which means that mat is actually transposed
    if(vec.size() != mat.cols()){
        throw std::invalid_argument(
            "dimensions mismatch: vec.size() != matrix.rows()");
    };

    // trans(vec) * trans(mat) = mat * vec
    Matrix r = mat * vec;
    ColumnVector v = r.col(0);
    return v;
}
Ejemplo n.º 6
0
// -----------------------------------------------------------------------
// Linear regression
// -----------------------------------------------------------------------
AnyType
linregr_transition::run(AnyType& args) {
    MutableLinRegrState state = args[0].getAs<MutableByteString>();
    if (args[1].isNull() || args[2].isNull()) { return args[0]; }
    double y = args[1].getAs<double>();
    MappedColumnVector x;
    try {
        MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
        x.rebind(xx.memoryHandle(), xx.size());
    } catch (const ArrayWithNullException &e) {
        return args[0];
    }

    state << MutableLinRegrState::tuple_type(x, y);
    return state.storage();
}
Ejemplo n.º 7
0
double
distAngle(
    const MappedColumnVector& inX,
    const MappedColumnVector& inY) {

	// Deal with the undefined case where one of the norm is zero
	// Angle is not defined. Just return \pi.
	double xnorm = inX.norm(), ynorm = inY.norm();
	if (xnorm < std::numeric_limits<double>::denorm_min()
		|| ynorm < std::numeric_limits<double>::denorm_min())
		return std::acos(-1);
	
    double cosine = dot(inX, inY) / (xnorm * ynorm);
    if (cosine > 1)
        cosine = 1;
    else if (cosine < -1)
        cosine = -1;
    return std::acos(cosine);
}
AnyType
multi_response_glm_multinom_logit_transition::run(AnyType& args) {
    MutableMultiResponseGLMState state = args[0].getAs<MutableByteString>();
    if (state.terminated || args[1].isNull() || args[2].isNull()) {
        return args[0];
    }
    double y = args[1].getAs<double>();
    MappedColumnVector x;
    try {
        MappedColumnVector xx = args[2].getAs<MappedColumnVector>();
        x.rebind(xx.memoryHandle(), xx.size());
    } catch (const ArrayWithNullException &e) {
        return args[0];
    }
    if (state.empty()) {
        state.num_features = static_cast<uint16_t>(x.size());
        state.num_categories = args[4].getAs<uint16_t>();
        state.optimizer.num_coef = static_cast<uint16_t>(
                state.num_features * (state.num_categories-1));

        // MADLIB-667: GPDB limits the single array size to be 1GB, which means
        // that the size of a double array cannot be large than 134217727
        // because (134217727 * 8) / (1024 * 1024) = 1023. And solve
        // state_size = x^2 + 2^x + 6 <= 134217727 will give x <= 11584.
        uint32_t state_size = 6 +
                state.optimizer.num_coef * state.optimizer.num_coef +
                2 * state.optimizer.num_coef;
        if(state_size > 134217727){
            throw std::runtime_error(
                "The product of number of independent variables and number of "
                "categories cannot be larger than 11584.");
        }

        state.resize();
        if (!args[3].isNull()) {
            MultiResponseGLMState prev_state = args[3].getAs<ByteString>();
            state = prev_state;
            state.reset();
        }
    }
    state << MutableMultiResponseGLMState::tuple_type(x, y);
    return state.storage();
}