GainPair continuous_gini_gain(const VectorXd &values, const VectorXi &classes) { IntsSet unique_classes(classes.data(),classes.data() + classes.size()); if(unique_classes.size() == 1) return std::make_pair(0, 0); VectorXi lower_contingency_table = VectorXi::Zero(unique_classes.size()); // upper_contingency_table are the counts for each class VectorXi upper_contingency_table = get_cross_table(classes); double total_gini = gini(upper_contingency_table.cast<double>()); double best_gain = 0; double best_threshold = 0; Ints indices = argsort<double>(values.data(), values.size()); IntsSet::iterator begin = unique_classes.begin(); unsigned int n_classes = classes.size(); // Scan the classes in the order obtained from sorting the values for (unsigned int i = 0; i < n_classes; ++i) { int current_class = classes[indices[i]]; // IntsSet::iterator it = unique_classes.find(current_class); unsigned int position = std::distance(begin, it); lower_contingency_table(position) += 1; upper_contingency_table(position) -= 1; double P_low = 1. * i / n_classes; double G_low = gini(lower_contingency_table.cast<double>()); double P_upp = 1. * (n_classes - i) / n_classes; double G_upp = gini(upper_contingency_table.cast<double>()); double G = P_low * G_low + P_upp * G_upp; double g = total_gini - G; if(g > best_gain) { best_gain = g; best_threshold = values[indices[i]]; } } return std::make_pair(best_gain, best_threshold); }
void FFOgkBasis( const MatrixXd& xi, const int& calcM, const int& intercept, VectorXi& warn, const int& h, VectorXi& dIn, int w3 ){ double (*pFo[])(Ref<VectorXd>,int)={&qn,&scaleTau2}; double (*qFo[])(Ref<VectorXd>,int)={&Fmedian,&scaleTau2}; const int p=xi.cols(),n=xi.rows(),h0=(n+1)/2; double b1=0.0,b2=0.0; const double tol=1e-8; int i,j; MatrixXd x=xi; RowVectorXd lamba(p); MatrixXd x2=x; if(intercept){ for(i=0;i<p;i++) lamba(i)=qCalc(x2.col(i),1,qFo[calcM]); x.rowwise()-=lamba; } for(i=0;i<p;i++) lamba(i)=pCalc(x2.col(i),0,pFo[calcM]); for(i=0;i<p;i++) warn(i)=(lamba(i)<tol)?1:0; i=warn.sum(); if(i>0) return; for(i=0;i<p;i++) x.col(i).array()/=lamba(i); VectorXd dvec1=VectorXd::Ones(p); MatrixXd U=dvec1.asDiagonal(); VectorXd sYi(n); VectorXd sYj(n); VectorXd dY(n); for(i=0;i<p;++i){ sYi=x.col(i); for(j=0;j<i;++j){ sYj=x.col(j); dY=sYi+sYj; b1=pCalc(dY,0,pFo[calcM]); b1*=b1; dY=sYi-sYj; b2=pCalc(dY,0,pFo[calcM]); b2*=b2; U(i,j)=0.25*(b1-b2); U(j,i)=U(i,j); } } JacobiSVD<MatrixXd> svd(U,ComputeThinV); x2=x*svd.matrixV(); for(i=0;i<p;i++) lamba(i)=pCalc(x2.col(i),0,pFo[calcM]); for(i=0;i<p;i++) warn(i)=(lamba(i)<tol)?1:0; i=warn.sum(); if(i>0) return; for(i=0;i<p;i++) x2.col(i).array()/=lamba(i); dY=x2.array().abs2().rowwise().sum(); dIn.setLinSpaced(n,0,n-1); std::nth_element(dIn.data(),dIn.data()+h,dIn.data()+dIn.size(),IdLess(dY.data())); cov_CStep(dIn,x,h,h,w3); return; }
void cov_CStep( VectorXi& dIn, MatrixXd& x, const int h, const int h0, int w3 ){ const int n=x.rows(),p=x.cols(); double w1,w0; int w2=1,i; MatrixXd xSub(h,p); for(i=0;i<h0;i++) xSub.row(i)=x.row(dIn(i)); RowVectorXd xSub_mean(p); xSub_mean=xSub.topRows(h0).colwise().mean(); xSub.topRows(h0).rowwise()-=xSub_mean; x.rowwise()-=xSub_mean; MatrixXd Sig(p,p); //Sig=xSub.topRows(h0).adjoint()*xSub.topRows(h0); Sig.setZero().selfadjointView<Lower>().rankUpdate(xSub.topRows(h0).transpose()); Sig.array()/=(double)(h0-1); LDLT<MatrixXd> chol=Sig.ldlt(); MatrixXd b=MatrixXd::Identity(p,p); chol.solveInPlace(b); w1=chol.vectorD().array().minCoeff(); VectorXd dP(n); if(w1>1e-6){ w1=std::numeric_limits<double>::max(); dP=((x*b).cwiseProduct(x)).rowwise().sum(); } else { w2=0; w3=0; w1=chol.vectorD().array().log().sum()*2.00; } while(w2){ dIn.setLinSpaced(n,0,n-1); std::nth_element(dIn.data(),dIn.data()+h,dIn.data()+dIn.size(),IdLess(dP.data())); for(i=0;i<h;i++) xSub.row(i)=x.row(dIn(i)); xSub_mean=xSub.colwise().mean(); xSub.rowwise()-=xSub_mean; x.rowwise()-=xSub_mean; //Sig=xSub.adjoint()*xSub; Sig.setZero().selfadjointView<Lower>().rankUpdate(xSub.transpose()); Sig.array()/=(double)(h-1); chol=Sig.ldlt(); b=MatrixXd::Identity(p,p); chol.solveInPlace(b); if(chol.vectorD().array().minCoeff()>1e-6){ w0=w1; w1=chol.vectorD().array().log().sum()*2.00; dP=((x*b).cwiseProduct(x)).rowwise().sum(); (w0-w1<1e-3)?(w2=0):(w2=1); } else { w2=0; w3=0; } } }
VectorXi get_cross_table(const VectorXi &values) { IntsSet vals(values.data(), values.data() + values.size()); VectorXi cross_table = VectorXi::Zero(vals.size()); IntsSet::const_iterator val_begin = vals.begin(); IntsSet::iterator value_it; for (int i = 0; i < values.size(); ++i) { value_it = vals.find(values(i)); unsigned int j = std::distance(val_begin, value_it); cross_table(j) += 1; } return cross_table; }
tuple<MatrixType, VectorType> shuffle_data_set(const MatrixType & X, const VectorType & y) { VectorXi indices = VectorXi::LinSpaced(X.cols(), 0, X.cols()); std::random_shuffle(indices.data(), indices.data() + X.cols()); //the following statement is evaluated "in-place", without any temporary. So this is definitely the right way to go. MatrixType shuffled_X = X * indices.asPermutation(); VectorType shuffled_y = y * indices.asPermutation(); return make_tuple(shuffled_X, shuffled_y); }
VectorXi IOUSet::computeTree(const VectorXb & s) const { VectorXi r = VectorXi::Zero(parent_.size()); std::copy( s.data(), s.data()+s.size(), r.data() ); for( int i=0; i<r.size(); i++ ) if( parent_[i] >= 0 ) r[ parent_[i] ] += r[i]; return r; }
MatrixXi get_cross_table(const VectorXi &values, const VectorXi &classes) { if(classes.rows() != values.rows()) { throw ValueError("The vector of classes and values do not have same size"); } IntsSet vals(values.data(), values.data() + values.size()); IntsSet cls(classes.data(), classes.data() + classes.size()); MatrixXi cross_table = MatrixXi::Zero(vals.size(), cls.size()); IntsSet::const_iterator cls_begin = cls.begin(); IntsSet::const_iterator val_begin = vals.begin(); IntsSet::iterator class_it; IntsSet::iterator value_it; for (int i = 0; i < classes.size(); ++i) { class_it = cls.find( classes(i)); value_it = vals.find(values(i)); unsigned int j = std::distance(val_begin, value_it); unsigned int k = std::distance(cls_begin, class_it); cross_table(j, k) += 1; } return cross_table; }
void ordering(const int & _first_ordered_node) { t_ordering_ = clock(); // full problem ordering if (_first_ordered_node == 0) { // ordering ordering constraints node_ordering_restrictions_.resize(n_nodes_); node_ordering_restrictions_ = A_nodes_.bottomRows(1).transpose(); // computing nodes partial ordering_ A_nodes_.makeCompressed(); PermutationMatrix<Dynamic, Dynamic, int> incr_permutation_nodes(n_nodes_); orderer_(A_nodes_, incr_permutation_nodes, node_ordering_restrictions_.data()); // node ordering to variable ordering PermutationMatrix<Dynamic, Dynamic, int> incr_permutation(A_.cols()); nodePermutation2VariablesPermutation(incr_permutation_nodes, incr_permutation); // apply partial_ordering orderings A_nodes_ = (A_nodes_ * incr_permutation_nodes.transpose()).sparseView(); A_ = (A_ * incr_permutation.transpose()).sparseView(); // ACCUMULATING PERMUTATIONS accumulatePermutation(incr_permutation_nodes); } // partial ordering else { int ordered_nodes = n_nodes_ - _first_ordered_node; int unordered_nodes = n_nodes_ - ordered_nodes; if (ordered_nodes > 2) // only reordering when involved nodes in the measurement are not the two last ones { // SUBPROBLEM ORDERING (from first node variable to last one) //std::cout << "ordering partial_ordering problem: " << _first_ordered_node << " to "<< n_nodes_ - 1 << std::endl; SparseMatrix<int> sub_A_nodes_ = A_nodes_.rightCols(ordered_nodes); // _partial_ordering ordering_ constraints node_ordering_restrictions_.resize(ordered_nodes); node_ordering_restrictions_ = sub_A_nodes_.bottomRows(1).transpose(); // computing nodes partial ordering_ sub_A_nodes_.makeCompressed(); PermutationMatrix<Dynamic, Dynamic, int> partial_permutation_nodes(ordered_nodes); orderer_(sub_A_nodes_, partial_permutation_nodes, node_ordering_restrictions_.data()); // node ordering to variable ordering PermutationMatrix<Dynamic, Dynamic, int> partial_permutation(A_.cols()); nodePermutation2VariablesPermutation(partial_permutation_nodes, partial_permutation); // apply partial_ordering orderings int ordered_variables = A_.cols() - nodes_.at(_first_ordered_node).location; A_nodes_.rightCols(ordered_nodes) = (A_nodes_.rightCols(ordered_nodes) * partial_permutation_nodes.transpose()).sparseView(); A_.rightCols(ordered_variables) = (A_.rightCols(ordered_variables) * partial_permutation.transpose()).sparseView(); R_.rightCols(ordered_variables) = (R_.rightCols(ordered_variables) * partial_permutation.transpose()).sparseView(); // ACCUMULATING PERMUTATIONS accumulatePermutation(partial_permutation_nodes); } } time_ordering_ += ((double) clock() - t_ordering_) / CLOCKS_PER_SEC; }