Ejemplo n.º 1
0
int main()
{
	char * SampleSet = "iris.data.txt";
	int  k = 3;
	CKNN cknn(k);
	cknn.Load(SampleSet);
	cknn.get_accuracy();
	system("pause");
	return 0 ;
}
Ejemplo n.º 2
0
/*! \section example51 Example 51: (DOS) Result regularization using secondary criterion.

It is known that feature selection may over-fit. As in the case of over-trained classifiers,
over-selected feature subsets may generalize poorly. This unwanted effect can lead to
serious degradation of generalization ability, i.e., model or decision-rule behavior 
on previously unknown data. It has been suggested (Raudys: Feature Over-Selection, LNCS 4109, 2006, 
or Somol et al., ICPR 2010) that preferring a subset with slightly-worse-than-maximal criterion
value can actually improve generalization. FST3 makes this possible through result tracking
and subsequent selection of alternative solution by means of secondary criterion maximization.
In this example we show a 3-Nearest Neighbor Wrapper based feature selection process, where
the final result is eventually chosen among a group of solutions close enough to the achieved
maximum, so as to optimize the secondary criterion. The group of solutions to select from is defined 
by means of a user-selected margin value (permitted primary criterion value difference from the known
maximum). In this case we show that even the simplest secondary criterion (mere preference of 
smaller subsets) can improve classifcation accuracy on previously unknown data.
*/
int main()
{
	try{
	typedef double RETURNTYPE; 	typedef double DATATYPE;  typedef double REALTYPE;
	typedef unsigned int IDXTYPE;  typedef unsigned int DIMTYPE;  typedef short BINTYPE;
	typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET;
	typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER;
	typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER;
	typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV;
	typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050;
	typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format
	//typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format
	typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE;
	typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN;
	typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN;
	typedef FST::Criterion_Subset_Size<RETURNTYPE,SUBSET> CRITSUBSIZE;
	typedef FST::Criterion_Negative<CRITSUBSIZE,RETURNTYPE,SUBSET> NEGATIVECRIT;
	typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR;
	typedef FST::Result_Tracker_Regularizer<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,NEGATIVECRIT> TRACKER;

		std::cout << "Starting Example 51: (DOS) Result regularization using secondary criterion..." << std::endl;
	// keep second half of data for independent testing of final classification performance
		PSPLITTER dsp_outer(new SPLITTER5050());
	// in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation
		PSPLITTER dsp_inner(new SPLITTERCV(3));
	// do not scale data
		boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>());
	// set-up data access
		boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); 
		splitters->push_back(dsp_outer); splitters->push_back(dsp_inner);
		boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/waveform_40.trn",splitters,dsc));
		da->initialize();
	// initiate access to split data parts
		da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed.");
		da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure.");
	// initiate the storage for subset to-be-selected
		boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures()));  sub->deselect_all();
	// set-up 3-Nearest Neighbor classifier based on Euclidean distances
		boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3);
	// wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.)
		boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN);
		wknn->initialize(cknn,da);
	// set-up the standard sequential search step object (option: hybrid, ensemble)
		boost::shared_ptr<EVALUATOR> eval(new EVALUATOR);
	// set-up Dynamic Oscillating Search procedure
		FST::Search_DOS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval);
		srch.set_delta(3);
	// set-up the regularizing result tracker
		boost::shared_ptr<TRACKER> tracker(new TRACKER);
	// register the result tracker with the used search step object
		eval->enable_result_tracking(tracker);
	// run the search
		std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << *tracker << std::endl << std::endl;
		RETURNTYPE critval_train, critval_test;
		srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default)
		if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished.");
	// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
		da->setSplittingDepth(0);
		cknn->train(da,sub);
		cknn->test(critval_test,da);
		std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl;
	// set-up the secondary criterion (regularization criterion); in this case to minimize subset size
		boost::shared_ptr<CRITSUBSIZE> critsubsiz(new CRITSUBSIZE); //Criterion_Subset_Size does not need to be initialized
		boost::shared_ptr<NEGATIVECRIT> regulcrit(new NEGATIVECRIT(critsubsiz)); //Criterion_Negative does not need to be initialized
	// select final solution among those recorded by tracker (show more alternatives for various margins)
		tracker->set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default)
		for(unsigned int i=1; i<10; i++) 
		{
			RETURNTYPE margin=(double)i*0.001;
			da->setSplittingDepth(1); // necessary with criteria than need access to training data
			if(!tracker->optimize_within_margin(margin,critval_train,critval_test,sub,regulcrit)) throw FST::fst_error("tracker->optimize_within_margin() failed.");
			std::cout << std::endl << "Regularized (margin="<<margin<<") result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl;
		// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
			da->setSplittingDepth(0);
			cknn->train(da,sub);
			cknn->test(critval_test,da);
			std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl;
		}
	}
	catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;}
	catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;}
	return 0;
}
Ejemplo n.º 3
0
/*! \section example40 Example 40: Exhaustive (optimal) feature selection.

Selects features exhaustively, i.e., evaluates all possible feature combinations. This approach
is guaranteed to find optimum with respect to the chosen criterion, but its exponential time
complexity renders it prohibitive for even moderately dimensioned tasks. Here it is demonstrated
on 15-dimensional data with 3-NN (based on L1.5 distance) wrapper classification accuracy as FS criterion - note how
time consuming the computation is even for relatively low-dimensional case. Classification accuracy 
(i.e, FS wrapper criterion value) is estimated on the first 50% of data samples by means of 3-fold cross-validation. 
The final classification performance on the selected subspace is eventually validated on the second 50% of data. 
Exhaustive search is called here in d-optimizing setting, invoked by parameter 0 in search(0,...), which is 
otherwise used to specify the required subset size. Optional result tracking is employed here to reveal
duplicate solutions yielding the same maximum criterion value (see also \ref example60).
*/
int main()
{
	try{
	typedef double RETURNTYPE; 	typedef double DATATYPE;  typedef double REALTYPE;
	typedef unsigned int IDXTYPE;  typedef unsigned int DIMTYPE;  typedef short BINTYPE;
	typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET;
	typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER;
	typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER;
	typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV;
	typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050;
	typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format
	//typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format
	typedef FST::Distance_Lp<DATATYPE,REALTYPE,DIMTYPE,SUBSET,3,2> DISTANCE;
	typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN;
	typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN;
	typedef FST::Result_Tracker_Dupless<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET> TRACKER;

		std::cout << "Starting Example 40: Exhaustive (optimal) feature selection..." << std::endl;
	// keep second half of data for independent testing of final classification performance
		PSPLITTER dsp_outer(new SPLITTER5050());
	// in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation
		PSPLITTER dsp_inner(new SPLITTERCV(3));
	// do not scale data
		boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>());
	// set-up data access
		boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); 
		splitters->push_back(dsp_outer); splitters->push_back(dsp_inner);
		boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc));
		da->initialize();
	// initiate access to split data parts
		da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed.");
		da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure.");
	// initiate the storage for subset to-be-selected
		boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures()));  sub->deselect_all();
	// set-up 3-Nearest Neighbor classifier based on Euclidean distances
		boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3);
	// wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.)
		boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN);
		wknn->initialize(cknn,da);
	// set-up Exhaustive Search procedure
		FST::Search_Exhaustive<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> srch;
	// set-up result tracker to enable logging of candidate solutions, ordered descending by value 
	// (optionally limit the number of kept records to 50000 highest valued to prevent memory exhaustion due to possibly excessive number of candidates)
		boost::shared_ptr<TRACKER> tracker(new TRACKER(50000));
	// let the tracker register only solution no worse than "the best known criterion value minus 0.05"
		tracker->set_inclusion_margin(0.05);
	// register the result tracker with the used search object
		srch.enable_result_tracking(tracker);
	// run the search
		std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << std::endl;
		RETURNTYPE critval_train, critval_test;
		srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default)
		if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished.");
	// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
		da->setSplittingDepth(0);
		cknn->train(da,sub);
		cknn->test(critval_test,da);
		std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl;
	// report tracker contents
		std::cout << "Result tracker records " << tracker->size(0.0) << " solutions with criterion value equal to " << critval_train << "." << std::endl << std::endl;
		for(unsigned int i=1;i<10;i++) std::cout << "Result tracker records " << tracker->size((double)i*0.005) << " solutions with criterion value greater or equal to " << critval_train-(double)i*0.005 << "." << std::endl << std::endl;
		TRACKER::PResultRec result;
		if(tracker->get_first(result) && tracker->size(0.0)>1) 
		{
			RETURNTYPE firstvalue=result->value;
			std::cout << "All recorded feature subsets yielding the same best known criterion value " << firstvalue << ":" << std::endl;
			while(tracker->get_next(result) && result->value==firstvalue) std::cout << *(result->sub) << std::endl;
		}
	}
	catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;}
	catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;}
	return 0;
}
Ejemplo n.º 4
0
/*! \section example61 Example 61: Feature selection that respects pre-specified feature weights.

In many applications it is desirable to optimize feature subsets not only with respect
to the primary objective (e.g., decision rule accuracy), but also with respect
to additional factors like known feature acquisition cost. In many cases there might be
only negligible difference in discriminatory ability among several features, while
the cost of measuring their value may differ a lot. In such a case it is certainly
better to select the cheaper feature. In other scenarios it might be even advantageous
to trade a minor degradation of classifcation accuracy for substantial saving in
measurement acquisition cost. For such cases FST3 implements a mechanism that
allows to control the feature accuracy vs. feature cost trade-off. It is made possible 
through result tracking and subsequent selection of alternative solution so as to minimize
the sum of pre-specified feature weights. The lower-weight solution is selected
from the pool of all known solutions that differ from the best one by less than
a user-specifed margin (permitted primary criterion value difference from the known
maximum value). In this example we illustrate how to add the respective mechanism to 
standard wrapper based feature selection. Here we select features so as to maximize 
3-Nearest Neighbor accuracy; then several lower-weight solutions are identified
and validated, for various margin values.
*/
int main()
{
	try{
	typedef double RETURNTYPE; 	typedef double DATATYPE;  typedef double REALTYPE;
	typedef unsigned int IDXTYPE;  typedef unsigned int DIMTYPE;  typedef short BINTYPE;
	typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET;
	typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER;
	typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER;
	typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV;
	typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050;
	typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format
	//typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format
	typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE;
	typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN;
	typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN;
	typedef FST::Criterion_Sum_Of_Weights<RETURNTYPE,DIMTYPE,SUBSET> WEIGHCRIT;
	typedef FST::Criterion_Negative<WEIGHCRIT,RETURNTYPE,SUBSET> NEGATIVECRIT;
	typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR;
	typedef FST::Result_Tracker_Regularizer<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,NEGATIVECRIT> TRACKER;

		std::cout << "Starting Example 61: Feature selection that respects pre-specified feature weights..." << std::endl;
	// keep second half of data for independent testing of final classification performance
		PSPLITTER dsp_outer(new SPLITTER5050());
	// in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation
		PSPLITTER dsp_inner(new SPLITTERCV(3));
	// do not scale data
		boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>());
	// set-up data access
		boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); 
		splitters->push_back(dsp_outer); splitters->push_back(dsp_inner);
		boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc));
		da->initialize();
	// initiate access to split data parts
		da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed.");
		da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure.");
	// initiate the storage for subset to-be-selected
		boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures()));  sub->deselect_all();
	// set-up 3-Nearest Neighbor classifier based on Euclidean distances
		boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3);
	// wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.)
		boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN);
		wknn->initialize(cknn,da);
	// set-up the standard sequential search step object (option: hybrid, ensemble, threaded, etc.)
		boost::shared_ptr<EVALUATOR> eval(new EVALUATOR);
	// set-up Sequential Forward Floating Selection search procedure
		FST::Search_SFFS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval);
		srch.set_search_direction(FST::FORWARD);
	// set-up tracker of intermediate results
		boost::shared_ptr<TRACKER> tracker(new TRACKER);
	// register the result tracker with the used search step object
		eval->enable_result_tracking(tracker);
	// run the search
		std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << *tracker << std::endl << std::endl;
		RETURNTYPE critval_train, critval_test;
		if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished.");
	// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
		da->setSplittingDepth(0);
		cknn->train(da,sub);
		cknn->test(critval_test,da);
		if(!wknn->evaluate(critval_train,sub)) throw FST::fst_error("crit call failure.");
		std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << ", crit value="<< critval_train << std::endl << std::endl;
	// set-up the secondary criterion to minimize the sum of feature weights
	// (note that the concrete weight values shown here are sample only)
		RETURNTYPE feature_cost[]={1, 1.2, 1, 1.3, 1.02, 2.4, 3.9, 1.2, 7.1, 22, 9.52, 1.08, 3.27, 1.44, 1.04};
		assert(sizeof(feature_cost)/sizeof(RETURNTYPE)==da->getNoOfFeatures());
		boost::shared_ptr<WEIGHCRIT> weightsumcrit(new WEIGHCRIT);
		weightsumcrit->initialize(da->getNoOfFeatures(),feature_cost);
		boost::shared_ptr<NEGATIVECRIT> critminw(new NEGATIVECRIT(weightsumcrit));
	// select final solution among those recorded by tracker (show more alternatives for various margins)
		for(unsigned int i=0; i<10; i++) 
		{
			const RETURNTYPE margin=(double)i*0.005;
			if(!tracker->optimize_within_margin(margin,critval_train,critval_test,sub,critminw)) throw FST::fst_error("tracker2->optimize_within_margin() failed.");
			std::cout << std::endl << "Weight-optimized result (primary criterion margin="<<margin<<"): " << std::endl << *sub << "Criterion value=" << critval_train << std::endl << "Sum of weights=" << -critval_test << std::endl;
		// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
			da->setSplittingDepth(0);
			cknn->train(da,sub);
			cknn->test(critval_test,da);
			std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl;
		}
	}
	catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;}
	catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;}
	return 0;
}
Ejemplo n.º 5
0
/*! \section example21 Example 21: Generalized sequential feature subset search.

All sequential search algorithms (SFS, SFFS, OS, DOS, SFRS) can be extended to operate in "generalized" setting
(term coined in Devijver, Kittler book). In each step of a generalized sequential
search algorithm not only one best feature is added to current subset nor one worst
feature is removed from current subset; instead, g-tuples of features are considered.
Searching for such group of g features that improves the current subset the most when added
(or such that degrades the current subset the least when removed) is more computationally
complex but increases the chance of finding the optimum or a result closer to optimum 
(nevertheless, improvement is not guaranteed and in some cases the result can actually degrade). 
The value g is to be set by user; the higher the value g, the slower the search 
(time complexity increases exponentially with increasing g). Note that setting g equal 
to the number of all features would effectively emulate the operation of exhaustive search.
In this example features are selected using the generalized (G)SFFS algorithm (G=2) and 3-NN wrapper 
classification accuracy as FS criterion. Classification accuracy (i.e, FS wrapper criterion value) is 
estimated on the first 50% of data samples by means of 3-fold cross-validation. The final classification 
performance on the selected subspace is eventually validated on the second 50% of data. (G)SFFS is called
here in d-optimizing setting, invoked by parameter 0 in search(0,...), which is otherwise
used to specify the required subset size.

\note Note that in this context the term generalization does not! relate to classification performance 
on independent data.

*/
int main()
{
	try{
	typedef double RETURNTYPE; 	typedef double DATATYPE;  typedef double REALTYPE;
	typedef unsigned int IDXTYPE;  typedef unsigned int DIMTYPE;  typedef short BINTYPE;
	typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET;
	typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER;
	typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER;
	typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV;
	typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050;
	typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format
	//typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format
	typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE;
	typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN;
	typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN;
	typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR;

		std::cout << "Starting Example 21: Generalized sequential feature subset search..." << std::endl;
	// keep second half of data for independent testing of final classification performance
		PSPLITTER dsp_outer(new SPLITTER5050());
	// in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation
		PSPLITTER dsp_inner(new SPLITTERCV(3));
	// do not scale data
		boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>());
	// set-up data access
		boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); 
		splitters->push_back(dsp_outer); splitters->push_back(dsp_inner);
		boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc));
		da->initialize();
	// initiate access to split data parts
		da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed.");
		da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure.");
	// initiate the storage for subset to-be-selected
		boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures()));  sub->deselect_all();
	// set-up 3-Nearest Neighbor classifier based on Euclidean distances
		boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(5);
	// wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.)
		boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN);
		wknn->initialize(cknn,da);
	// set-up the standard sequential search step object (option: hybrid, ensemble, etc.)
		boost::shared_ptr<EVALUATOR> eval(new EVALUATOR);
	// set-up Sequential Forward Floating Selection search procedure
		FST::Search_SFFS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval);
		srch.set_search_direction(FST::FORWARD); // try FST::BACKWARD
	// set the size of feature groups to be evaluated for inclusion/removal in each sequential step (can be applied to SFS, SFFS, OS, DOS, SFRS)
		srch.set_generalization_level(2);
	// run the search
		std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << std::endl;
		RETURNTYPE critval_train, critval_test;
		srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default)
		if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished.");
	// (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data
		da->setSplittingDepth(0);
		cknn->train(da,sub);
		cknn->test(critval_test,da);
		std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl;
	// (optionally) list the best known solutions for each cardinality as recorded throughout the course of search
		std::cout << "Best recorded solution for subset size:" << std::endl;
		for(DIMTYPE d=1;d<=sub->get_n();d++) 
		if(srch.get_result(d,critval_train,sub)) std::cout << d << ": val="<< critval_train << ", "<<*sub << std::endl;
	}
	catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;}
	catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;}
	return 0;
}