double distTanimoto( const MappedColumnVector& inX, const MappedColumnVector& inY) { // Note that this is not a metric in general! double dotProduct = dot(inX, inY); double tanimoto = inX.squaredNorm() + inY.squaredNorm(); return (tanimoto - 2 * dotProduct) / (tanimoto - dotProduct); }
AnyType normalize_sum_array::run(AnyType &args){ const MappedColumnVector input_vector = args[0].getAs<MappedColumnVector>(); const double sum_target = args[1].getAs<double>(); double sum_input_vector = input_vector.sum(); // Avoid divide by zero by dividing by a small number if sum is small double VAR_IMP_EPSILON = 1e-6; if (sum_input_vector < VAR_IMP_EPSILON) sum_input_vector = VAR_IMP_EPSILON; ColumnVector output_vector = input_vector * sum_target / sum_input_vector; return output_vector; }
double squaredAngle( const MappedColumnVector& inX, const MappedColumnVector& inY) { double cosine = dot(inX, inY) / (inX.norm() * inY.norm()); if (cosine > 1) cosine = 1; else if (cosine < -1) cosine = -1; double angle = std::acos(cosine); return angle * angle; }
AnyType row_fold::run(AnyType & args){ MappedColumnVector vec = args[0].getAs<MappedColumnVector>(); MappedIntegerVector pat = args[1].getAs<MappedIntegerVector>(); if (vec.size() != pat.sum()) { throw std::invalid_argument( "dimensions mismatch: row_in.size() != pattern.sum()"); } ColumnVector r(pat.size()); for (int i = 0, j = 0; i < pat.size(); j += pat[i++]) r[i] = vec.segment(j, pat[i]).prod(); return r; }
AnyType matrix_vec_mult_in_mem_2d::run(AnyType & args){ MappedColumnVector vec = args[0].getAs<MappedColumnVector>(); MappedMatrix mat = args[1].getAs<MappedMatrix>(); // Note mat is constructed in the column-first order // which means that mat is actually transposed if(vec.size() != mat.cols()){ throw std::invalid_argument( "dimensions mismatch: vec.size() != matrix.rows()"); }; // trans(vec) * trans(mat) = mat * vec Matrix r = mat * vec; ColumnVector v = r.col(0); return v; }
// ----------------------------------------------------------------------- // Linear regression // ----------------------------------------------------------------------- AnyType linregr_transition::run(AnyType& args) { MutableLinRegrState state = args[0].getAs<MutableByteString>(); if (args[1].isNull() || args[2].isNull()) { return args[0]; } double y = args[1].getAs<double>(); MappedColumnVector x; try { MappedColumnVector xx = args[2].getAs<MappedColumnVector>(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } state << MutableLinRegrState::tuple_type(x, y); return state.storage(); }
double distAngle( const MappedColumnVector& inX, const MappedColumnVector& inY) { // Deal with the undefined case where one of the norm is zero // Angle is not defined. Just return \pi. double xnorm = inX.norm(), ynorm = inY.norm(); if (xnorm < std::numeric_limits<double>::denorm_min() || ynorm < std::numeric_limits<double>::denorm_min()) return std::acos(-1); double cosine = dot(inX, inY) / (xnorm * ynorm); if (cosine > 1) cosine = 1; else if (cosine < -1) cosine = -1; return std::acos(cosine); }
AnyType multi_response_glm_multinom_logit_transition::run(AnyType& args) { MutableMultiResponseGLMState state = args[0].getAs<MutableByteString>(); if (state.terminated || args[1].isNull() || args[2].isNull()) { return args[0]; } double y = args[1].getAs<double>(); MappedColumnVector x; try { MappedColumnVector xx = args[2].getAs<MappedColumnVector>(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } if (state.empty()) { state.num_features = static_cast<uint16_t>(x.size()); state.num_categories = args[4].getAs<uint16_t>(); state.optimizer.num_coef = static_cast<uint16_t>( state.num_features * (state.num_categories-1)); // MADLIB-667: GPDB limits the single array size to be 1GB, which means // that the size of a double array cannot be large than 134217727 // because (134217727 * 8) / (1024 * 1024) = 1023. And solve // state_size = x^2 + 2^x + 6 <= 134217727 will give x <= 11584. uint32_t state_size = 6 + state.optimizer.num_coef * state.optimizer.num_coef + 2 * state.optimizer.num_coef; if(state_size > 134217727){ throw std::runtime_error( "The product of number of independent variables and number of " "categories cannot be larger than 11584."); } state.resize(); if (!args[3].isNull()) { MultiResponseGLMState prev_state = args[3].getAs<ByteString>(); state = prev_state; state.reset(); } } state << MutableMultiResponseGLMState::tuple_type(x, y); return state.storage(); }