예제 #1
0
GainPair continuous_gini_gain(const VectorXd &values, const VectorXi &classes) {
  IntsSet unique_classes(classes.data(),classes.data() + classes.size()); 
  if(unique_classes.size() == 1) return std::make_pair(0, 0);

  VectorXi lower_contingency_table = VectorXi::Zero(unique_classes.size());
  // upper_contingency_table are the counts for each class
  VectorXi upper_contingency_table = get_cross_table(classes);
  double total_gini = gini(upper_contingency_table.cast<double>());
  double best_gain = 0;
  double best_threshold = 0;
  
  Ints indices = argsort<double>(values.data(), values.size());
  IntsSet::iterator begin = unique_classes.begin();  
  unsigned int n_classes = classes.size();
  // Scan the classes in the order obtained from sorting the values
  for (unsigned int i = 0; i < n_classes; ++i) {
    int current_class = classes[indices[i]]; // 
    IntsSet::iterator it = unique_classes.find(current_class);  
    unsigned int position = std::distance(begin, it); 
    lower_contingency_table(position) += 1;
    upper_contingency_table(position) -= 1;
    double P_low = 1. * i / n_classes;
    double G_low = gini(lower_contingency_table.cast<double>());
    double P_upp = 1. * (n_classes - i) / n_classes;
    double G_upp = gini(upper_contingency_table.cast<double>());
    double G = P_low * G_low + P_upp * G_upp;
    double g = total_gini - G;
    if(g > best_gain) {
      best_gain = g;
      best_threshold = values[indices[i]];
    }
  }
  return std::make_pair(best_gain, best_threshold);
}
예제 #2
0
파일: DetR.cpp 프로젝트: cran/DetR
void FFOgkBasis(
		const MatrixXd& xi,
		const int& calcM,
		const int& intercept,
		VectorXi& warn,
		const int& h,
		VectorXi& dIn,
		int w3
	){
	double (*pFo[])(Ref<VectorXd>,int)={&qn,&scaleTau2}; 
	double (*qFo[])(Ref<VectorXd>,int)={&Fmedian,&scaleTau2}; 
	const int p=xi.cols(),n=xi.rows(),h0=(n+1)/2;	
	double b1=0.0,b2=0.0;
	const double tol=1e-8;
	int i,j;
	MatrixXd x=xi;
	RowVectorXd lamba(p);
	MatrixXd x2=x;
	if(intercept){
		for(i=0;i<p;i++)	lamba(i)=qCalc(x2.col(i),1,qFo[calcM]);
		x.rowwise()-=lamba;	
	}
	for(i=0;i<p;i++)		lamba(i)=pCalc(x2.col(i),0,pFo[calcM]);
	for(i=0;i<p;i++)		warn(i)=(lamba(i)<tol)?1:0;
	i=warn.sum();
	if(i>0)				return;
	for(i=0;i<p;i++)		x.col(i).array()/=lamba(i);
	VectorXd dvec1=VectorXd::Ones(p);
	MatrixXd U=dvec1.asDiagonal();
	VectorXd sYi(n);
	VectorXd sYj(n);
	VectorXd dY(n);
	for(i=0;i<p;++i){
		sYi=x.col(i);
		for(j=0;j<i;++j){
			sYj=x.col(j);
			dY=sYi+sYj;
			b1=pCalc(dY,0,pFo[calcM]);
			b1*=b1;
			dY=sYi-sYj;
			b2=pCalc(dY,0,pFo[calcM]);
			b2*=b2;
			U(i,j)=0.25*(b1-b2);
			U(j,i)=U(i,j);	
		}		
	}
	JacobiSVD<MatrixXd> svd(U,ComputeThinV);
	x2=x*svd.matrixV();
	for(i=0;i<p;i++)		lamba(i)=pCalc(x2.col(i),0,pFo[calcM]);
	for(i=0;i<p;i++)		warn(i)=(lamba(i)<tol)?1:0;
	i=warn.sum();
	if(i>0)				return;
	for(i=0;i<p;i++)		x2.col(i).array()/=lamba(i);
	dY=x2.array().abs2().rowwise().sum();
	dIn.setLinSpaced(n,0,n-1);
	std::nth_element(dIn.data(),dIn.data()+h,dIn.data()+dIn.size(),IdLess(dY.data()));
	cov_CStep(dIn,x,h,h,w3);
	return;
}
예제 #3
0
파일: DetR.cpp 프로젝트: cran/DetR
void cov_CStep(
		VectorXi& dIn,
		MatrixXd& x,
		const int h,
		const int h0,
		int w3
	){
	const int n=x.rows(),p=x.cols();
	double w1,w0;
	int w2=1,i;
	MatrixXd xSub(h,p);
	for(i=0;i<h0;i++) 	xSub.row(i)=x.row(dIn(i));
	RowVectorXd xSub_mean(p);
	xSub_mean=xSub.topRows(h0).colwise().mean();	
	xSub.topRows(h0).rowwise()-=xSub_mean;
	x.rowwise()-=xSub_mean;
	MatrixXd Sig(p,p);
	//Sig=xSub.topRows(h0).adjoint()*xSub.topRows(h0);
	Sig.setZero().selfadjointView<Lower>().rankUpdate(xSub.topRows(h0).transpose());
	Sig.array()/=(double)(h0-1);
	LDLT<MatrixXd> chol=Sig.ldlt();
	MatrixXd b=MatrixXd::Identity(p,p);
	chol.solveInPlace(b);
	w1=chol.vectorD().array().minCoeff();
	VectorXd dP(n);	
	if(w1>1e-6){
		w1=std::numeric_limits<double>::max();
		dP=((x*b).cwiseProduct(x)).rowwise().sum();
	} else {
		w2=0;
		w3=0;
		w1=chol.vectorD().array().log().sum()*2.00;
	}
	while(w2){	
		dIn.setLinSpaced(n,0,n-1);
		std::nth_element(dIn.data(),dIn.data()+h,dIn.data()+dIn.size(),IdLess(dP.data()));
		for(i=0;i<h;i++) 	xSub.row(i)=x.row(dIn(i));
		xSub_mean=xSub.colwise().mean();	
		xSub.rowwise()-=xSub_mean;
		x.rowwise()-=xSub_mean;
		//Sig=xSub.adjoint()*xSub;
		Sig.setZero().selfadjointView<Lower>().rankUpdate(xSub.transpose());
		Sig.array()/=(double)(h-1);
		chol=Sig.ldlt();
		b=MatrixXd::Identity(p,p);
		chol.solveInPlace(b);
		if(chol.vectorD().array().minCoeff()>1e-6){
			w0=w1;
			w1=chol.vectorD().array().log().sum()*2.00;
			dP=((x*b).cwiseProduct(x)).rowwise().sum();
			(w0-w1<1e-3)?(w2=0):(w2=1);
		} else {
			w2=0;
			w3=0;
		}
	}
} 
예제 #4
0
VectorXi get_cross_table(const VectorXi &values) {
  IntsSet vals(values.data(), values.data() + values.size());
  VectorXi cross_table = VectorXi::Zero(vals.size());
  IntsSet::const_iterator val_begin = vals.begin();
  IntsSet::iterator value_it;
  for (int i = 0; i < values.size(); ++i) {
    value_it = vals.find(values(i));
    unsigned int j = std::distance(val_begin, value_it); 
    cross_table(j) += 1;
  }
  return cross_table;
}
예제 #5
0
tuple<MatrixType, VectorType> shuffle_data_set(const MatrixType & X,
        const VectorType & y)
{

    VectorXi indices = VectorXi::LinSpaced(X.cols(), 0, X.cols());
    std::random_shuffle(indices.data(), indices.data() + X.cols());
    //the following statement is evaluated "in-place", without any temporary. So this is definitely the right way to go.
    MatrixType shuffled_X = X * indices.asPermutation();
    VectorType shuffled_y = y * indices.asPermutation();

    return make_tuple(shuffled_X, shuffled_y);
}
VectorXi IOUSet::computeTree(const VectorXb & s) const {
    VectorXi r = VectorXi::Zero(parent_.size());
    std::copy( s.data(), s.data()+s.size(), r.data() );

    for( int i=0; i<r.size(); i++ )
        if( parent_[i] >= 0 )
            r[ parent_[i] ] += r[i];
    return r;
}
예제 #7
0
MatrixXi get_cross_table(const VectorXi &values, const VectorXi &classes) {
  if(classes.rows() != values.rows()) {
    throw ValueError("The vector of classes and values do not have same size"); 
  }
  IntsSet vals(values.data(), values.data() + values.size());
  IntsSet cls(classes.data(), classes.data() + classes.size());
  MatrixXi cross_table = MatrixXi::Zero(vals.size(), cls.size());
  IntsSet::const_iterator cls_begin = cls.begin();
  IntsSet::const_iterator val_begin = vals.begin();
  IntsSet::iterator class_it;
  IntsSet::iterator value_it;
  for (int i = 0; i < classes.size(); ++i) {
    class_it  = cls.find( classes(i));
    value_it = vals.find(values(i));
    unsigned int j = std::distance(val_begin, value_it); 
    unsigned int k = std::distance(cls_begin, class_it); 
    cross_table(j, k) += 1;
  }
  return cross_table;
}
예제 #8
0
        void ordering(const int & _first_ordered_node)
        {
            t_ordering_ = clock();

            // full problem ordering
            if (_first_ordered_node == 0)
            {
                // ordering ordering constraints
                node_ordering_restrictions_.resize(n_nodes_);
                node_ordering_restrictions_ = A_nodes_.bottomRows(1).transpose();

                // computing nodes partial ordering_
                A_nodes_.makeCompressed();
                PermutationMatrix<Dynamic, Dynamic, int> incr_permutation_nodes(n_nodes_);
                orderer_(A_nodes_, incr_permutation_nodes, node_ordering_restrictions_.data());

                // node ordering to variable ordering
                PermutationMatrix<Dynamic, Dynamic, int> incr_permutation(A_.cols());
                nodePermutation2VariablesPermutation(incr_permutation_nodes, incr_permutation);

                // apply partial_ordering orderings
                A_nodes_ = (A_nodes_ * incr_permutation_nodes.transpose()).sparseView();
                A_ = (A_ * incr_permutation.transpose()).sparseView();

                // ACCUMULATING PERMUTATIONS
                accumulatePermutation(incr_permutation_nodes);
            }

            // partial ordering
            else
            {
                int ordered_nodes = n_nodes_ - _first_ordered_node;
                int unordered_nodes = n_nodes_ - ordered_nodes;
                if (ordered_nodes > 2) // only reordering when involved nodes in the measurement are not the two last ones
                {
                    // SUBPROBLEM ORDERING (from first node variable to last one)
                    //std::cout << "ordering partial_ordering problem: " << _first_ordered_node << " to "<< n_nodes_ - 1 << std::endl;
                    SparseMatrix<int> sub_A_nodes_ = A_nodes_.rightCols(ordered_nodes);

                    // _partial_ordering ordering_ constraints
                    node_ordering_restrictions_.resize(ordered_nodes);
                    node_ordering_restrictions_ = sub_A_nodes_.bottomRows(1).transpose();

                    // computing nodes partial ordering_
                    sub_A_nodes_.makeCompressed();
                    PermutationMatrix<Dynamic, Dynamic, int> partial_permutation_nodes(ordered_nodes);
                    orderer_(sub_A_nodes_, partial_permutation_nodes, node_ordering_restrictions_.data());

                    // node ordering to variable ordering
                    PermutationMatrix<Dynamic, Dynamic, int> partial_permutation(A_.cols());
                    nodePermutation2VariablesPermutation(partial_permutation_nodes, partial_permutation);

                    // apply partial_ordering orderings
                    int ordered_variables = A_.cols() - nodes_.at(_first_ordered_node).location;
                    A_nodes_.rightCols(ordered_nodes) = (A_nodes_.rightCols(ordered_nodes) * partial_permutation_nodes.transpose()).sparseView();
                    A_.rightCols(ordered_variables) = (A_.rightCols(ordered_variables) * partial_permutation.transpose()).sparseView();
                    R_.rightCols(ordered_variables) = (R_.rightCols(ordered_variables) * partial_permutation.transpose()).sparseView();

                    // ACCUMULATING PERMUTATIONS
                    accumulatePermutation(partial_permutation_nodes);
                }
            }
            time_ordering_ += ((double) clock() - t_ordering_) / CLOCKS_PER_SEC;
        }