static int DirectSound_init(options_type *options) { HRESULT hr; UINT error_mode; dsc_proc dsc; /* Turn off error dialog for this call */ error_mode = SetErrorMode(0); hDLL = LoadLibrary("dsound.dll"); SetErrorMode(error_mode); if (hDLL == NULL) return 1; dsc = (dsc_proc)GetProcAddress(hDLL,"DirectSoundCreate"); if (dsc == NULL) return 1; hr = dsc(NULL, &ds, NULL); if (FAILED(hr)) { ErrorMsg("Unable to initialize DirectSound"); return 1; } attenuation = 0; new_sound_data = FALSE; return 0; }
DescriptionPtr Openable::toDescriptionStruct(ActorFeaturePtr cmp) { OpenableDescriptionPtr dsc(new OpenableDescription); OpenablePtr cmpO = std::dynamic_pointer_cast<Openable>(cmp); if ( cmpO ) { if ( _lockId != cmpO->_lockId ) dsc->lockId = _lockId; if ( _locked != cmpO->_locked ) dsc->locked = _locked; if ( _scriptId != cmpO->_scriptId ) dsc->scriptId = _scriptId; if ( _closed != cmpO->_closed ) dsc->closed = _closed; if ( _lockLevel != cmpO->_lockLevel ) dsc->lockLevel = _lockLevel; if ( _openedState != cmpO->_openedState ) dsc->openedState = _openedState; if ( _closedState != cmpO->_closedState ) dsc->closedState = _closedState; } else { dsc->lockId = _lockId; dsc->locked = _locked; dsc->scriptId = _scriptId; dsc->closed = _closed; dsc->lockLevel = _lockLevel; dsc->openedState = _openedState; dsc->closedState = _closedState; } return dsc; }
/*! \section example33 Example 33: Oscillating Search in very high-dimensional feature selection. Very high-dimensional feature selection in text categorization, with dimensionality in the order of 10000 or 100000. The standard approach is BIF, yet we show here that a non-trivial search procedure (OS) can be feasible. Here OS is applied in its fastest form (delta=1), initialized by means of BIF. We use Multinomial Bhattacharyya distance as the feature selection criterion (it has been shown capable of overperforming traditional tools like Information Gain etc., cf. Novovicova et al., LNCS 4109, 2006). Randomly sampled 50% of data is used for multinomial model parameter estimation to be used in the actual feature selection process, another (disjunct) 40% of data is randomly sampled for testing. The selected subset is eventually used for validation; multinomial Naive Bayes classifier is trained on the training data on the selected subset and classification accuracy is finally estimated on the test data. */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_RandomRandom<INTERVALLER,IDXTYPE,BINTYPE> SPLITTERRR; //typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Criterion_Multinomial_Bhattacharyya<RETURNTYPE,DATATYPE,REALTYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR> BHATTMULTINOMIALDIST; typedef FST::Classifier_Multinomial_NaiveBayes<RETURNTYPE,DATATYPE,REALTYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR> CLASSIFIERMULTINOMIAL; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,BHATTMULTINOMIALDIST> EVALUATOR; std::cout << "Starting Example 33: Oscillating Search in very high-dimensional feature selection..." << std::endl; // randomly sample 50% of data for training and randomly sample (disjunct) 40% for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTERRR(1, 50, 40)); // (there will be one outer randomized split only) // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/reuters_apte.arff",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/40 random data split failed."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); // set-up multinomial Bhattacharyya distance criterion boost::shared_ptr<BHATTMULTINOMIALDIST> dmultinom(new BHATTMULTINOMIALDIST); dmultinom->initialize(da); // (initialization = multinomial model parameter estimation on training data) // set-up individual feature ranking to serve as OS initialization FST::Search_BIF<RETURNTYPE,DIMTYPE,SUBSET,BHATTMULTINOMIALDIST> srch_bif; // set-up the standard sequential search step object (option: hybrid, ensemble, etc.) boost::shared_ptr<EVALUATOR> eval(new EVALUATOR); // set-up the Oscillating Search procedure in its fastest setting FST::Search_OS<RETURNTYPE,DIMTYPE,SUBSET,BHATTMULTINOMIALDIST,EVALUATOR> srch(eval); srch.set_delta(1); // target subset size must be set because a) Bhattacharyya is monotonous with respect to subset size, // b) in very-high-dimensional problem d-optimizing search is not feasible due to search complexity DIMTYPE target_subsize=500; // run the search - first find the initial subset by means of BIF, then improve it by means of OS std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch_bif << std::endl << srch << std::endl << *dmultinom << std::endl << std::endl; RETURNTYPE critval_train, critval_test; if(!srch_bif.search(target_subsize,critval_train,sub,dmultinom,std::cout)) throw FST::fst_error("Search (BIF) not finished."); std::cout << std::endl << "Initialization result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl << std::endl; if(!srch.search(target_subsize,critval_train,sub,dmultinom,std::cout)) throw FST::fst_error("Search (OS) not finished."); std::cout << std::endl << "Search result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl; // (optionally) validate result by estimating Naive Multinomial Bayes classifier accuracy on selected feature sub-space on independent test data boost::shared_ptr<CLASSIFIERMULTINOMIAL> cmultinom(new CLASSIFIERMULTINOMIAL); cmultinom->initialize(da); cmultinom->train(da,sub); cmultinom->test(critval_test,da); std::cout << "Validated Multinomial NaiveBayes accuracy=" << critval_test << std::endl << std::endl; } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
DescriptionPtr Destroyable::toDescriptionStruct(ActorFeaturePtr) { DestroyableDescriptionPtr dsc(new DestroyableDescription); dsc->dropRules = _dropRules; return dsc; }
void MapParser::overWriteActorFeatures(Actor* actor) { for (int f = ActorFeature::FT_NULL+1; f != ActorFeature::FT_END; ++f) { ActorFeature::Type featureType = static_cast<ActorFeature::Type>( f ); std::unique_ptr<Description> dsc( _actorParser->parseFeatureDsc(featureType) ); if ( dsc ) actor->insertFeature( ActorFeature::create(featureType, dsc.get()) ); } }
DescriptionPtr Wearer::toDescriptionStruct(ActorFeaturePtr) { WearerDescriptionPtr dsc(new WearerDescription); for ( auto& kv : _itemSlots ) dsc->itemSlots.push_back((int)kv.first); for (ActorPtr a : *_equippedItems) dsc->eqItems.push_back(a->toDescriptionStruct()); return dsc; }
void TextRenderer::DoLayout() { const Point new_sz = CalcTextPlc().Size(); TextObj& t_obj = GetTextObj(); panLay->update_from_cairo_context(caiCont); panLay->set_text(t_obj.Text().c_str()); const Editor::TextStyle& ts = t_obj.Style(); Pango::AttrList attr_list; if( ts.isUnderlined ) { Pango::AttrInt attr = Pango::Attribute::create_attr_underline(Pango::UNDERLINE_SINGLE); attr.set_start_index(0); attr.set_end_index(G_MAXUINT); attr_list.insert(attr); } panLay->set_attributes(attr_list); Pango::FontDescription dsc(ts.fntDsc); Point abs_sz = t_obj.Placement().Size(); double t_wdh, t_hgt; if( abs_sz.x != 0 ) { // размер рассчитываем по ширине, так как она уязвимей к скалированию CalcTextSize(dsc, (double)new_sz.x/abs_sz.x, t_wdh, t_hgt); //CalcTextSize(dsc, (double)new_sz.x/t_wdh, t_wdh, t_hgt); // погрешность - на практике оказалось, что лучше вообще не использовать // скалирование по горизонтали перед отображением, потому что отрисовка // текста реально может отличаться (по размеру) от того, что говорит // Pango::Layout::get_size() (!) txtRat.first = (double)new_sz.x/t_wdh; } else { // пусто ASSERT( abs_sz.y != 0 ); // размер шрифта ненулевой CalcTextSize(dsc, (double)new_sz.y/abs_sz.y, t_wdh, t_hgt); txtRat.first = 1.0; //txtSht = 0; } txtRat.second = (double)new_sz.y/t_hgt; txtSht = int( (new_sz.x - t_wdh)/2 ); }
void Service::FormTrackName() { Rect v_rct(GetTrackLocation(false)); const std::string& name = CurrVideo->mdName; if( !name.empty() ) { RefPtr<Pango::Layout> lay = Pango::Layout::create(cont); Pango::FontDescription dsc("Sans Italic 12"); lay->set_font_description(dsc); lay->set_text(name); Rect txt_rct( CeilRect(RectASz(DPoint(3 - shift.x, 3), CalcTextSize(lay))) ); if( v_rct.Intersects(txt_rct) ) ProcessTrackName(lay, txt_rct); } }
/*! \section example51 Example 51: (DOS) Result regularization using secondary criterion. It is known that feature selection may over-fit. As in the case of over-trained classifiers, over-selected feature subsets may generalize poorly. This unwanted effect can lead to serious degradation of generalization ability, i.e., model or decision-rule behavior on previously unknown data. It has been suggested (Raudys: Feature Over-Selection, LNCS 4109, 2006, or Somol et al., ICPR 2010) that preferring a subset with slightly-worse-than-maximal criterion value can actually improve generalization. FST3 makes this possible through result tracking and subsequent selection of alternative solution by means of secondary criterion maximization. In this example we show a 3-Nearest Neighbor Wrapper based feature selection process, where the final result is eventually chosen among a group of solutions close enough to the achieved maximum, so as to optimize the secondary criterion. The group of solutions to select from is defined by means of a user-selected margin value (permitted primary criterion value difference from the known maximum). In this case we show that even the simplest secondary criterion (mere preference of smaller subsets) can improve classifcation accuracy on previously unknown data. */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format //typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE; typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN; typedef FST::Criterion_Subset_Size<RETURNTYPE,SUBSET> CRITSUBSIZE; typedef FST::Criterion_Negative<CRITSUBSIZE,RETURNTYPE,SUBSET> NEGATIVECRIT; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR; typedef FST::Result_Tracker_Regularizer<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,NEGATIVECRIT> TRACKER; std::cout << "Starting Example 51: (DOS) Result regularization using secondary criterion..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTER5050()); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/waveform_40.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up 3-Nearest Neighbor classifier based on Euclidean distances boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3); // wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN); wknn->initialize(cknn,da); // set-up the standard sequential search step object (option: hybrid, ensemble) boost::shared_ptr<EVALUATOR> eval(new EVALUATOR); // set-up Dynamic Oscillating Search procedure FST::Search_DOS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval); srch.set_delta(3); // set-up the regularizing result tracker boost::shared_ptr<TRACKER> tracker(new TRACKER); // register the result tracker with the used search step object eval->enable_result_tracking(tracker); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << *tracker << std::endl << std::endl; RETURNTYPE critval_train, critval_test; srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default) if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished."); // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl; // set-up the secondary criterion (regularization criterion); in this case to minimize subset size boost::shared_ptr<CRITSUBSIZE> critsubsiz(new CRITSUBSIZE); //Criterion_Subset_Size does not need to be initialized boost::shared_ptr<NEGATIVECRIT> regulcrit(new NEGATIVECRIT(critsubsiz)); //Criterion_Negative does not need to be initialized // select final solution among those recorded by tracker (show more alternatives for various margins) tracker->set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default) for(unsigned int i=1; i<10; i++) { RETURNTYPE margin=(double)i*0.001; da->setSplittingDepth(1); // necessary with criteria than need access to training data if(!tracker->optimize_within_margin(margin,critval_train,critval_test,sub,regulcrit)) throw FST::fst_error("tracker->optimize_within_margin() failed."); std::cout << std::endl << "Regularized (margin="<<margin<<") result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl; // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl; } } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
/*! \section example12t Example 12t: Threaded SVM-wrapper-based feature selection with Dynamic Oscillating Search. Dynamic Oscillating Search is a d-optimizing procedure that adjusts selected subset size in the course of search. It is a generalization of the Oscillating Search idea, which proved to be useful in various feature selection contexts. Here we demonstrate it in multi-threaded configuration (using \c Sequential_Step_Straight_Threaded instead of \c Sequential_Step_Straight evaluator object). In this example due to the use of very complex feature selection criterion (SVM Wrapper) the speed gain due to multithreading is substantial. In this example features are selected on 40-dimensional \e waveform data with 3-fold cross-validated SVM wrapper as criterion on the first 50% of data samples. The final classification performance on the selected subspace is eventually validated on the second 50% of data. \note The maximum permitted number of threads to run at once is to be user-specified with respect to hardware capabilities. */ int main() { try{ const unsigned int max_threads=2; typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; typedef FST::Classifier_LIBSVM<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR> CLASSIFIERSVM; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERSVM,DATAACCESSOR> WRAPPERSVM; typedef FST::Sequential_Step_Straight_Threaded<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERSVM,max_threads> EVALUATOR; std::cout << std::endl << "Starting Example 12t: Threaded SVM-wrapper-based feature selection with Dynamic Oscillating Search..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTER5050()); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/waveform_40.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); // set-up SVM (interface to external library LibSVM) boost::shared_ptr<CLASSIFIERSVM> csvm(new CLASSIFIERSVM); csvm->set_kernel_type(RBF); // (option: LINEAR, POLY, SIGMOID) csvm->initialize(da); // wrap the SVM classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERSVM> wsvm(new WRAPPERSVM); wsvm->initialize(csvm,da); // set-up the threaded sequential search step object (option: hybrid, ensemble, etc.) boost::shared_ptr<EVALUATOR> eval(new EVALUATOR); // set-up Dynamic Oscillating Search procedure FST::Search_DOS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERSVM,EVALUATOR> srch(eval); srch.set_delta(3); // first optimize SVM parameters using 3-fold cross-validation on training data on the full set of features sub->select_all(); csvm->optimize_parameters(da,sub); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wsvm << std::endl << std::endl; sub->deselect_all(); // let DOS start from an empty set (any starting subset is permissible) RETURNTYPE critval_train, critval_test; if(!srch.search(0,critval_train,sub,wsvm,std::cout)) throw FST::fst_error("Search not finished."); // (optionally) the following line is included here just for illustration because srch.search() reports results in itself std::cout << std::endl << "Search result: " << std::endl << *sub << std::endl << "Criterion value=" << critval_train << std::endl << std::endl; // (optionally) validate result by estimating SVM accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); csvm->train(da,sub); csvm->test(critval_test,da); std::cout << "Validated SVM accuracy=" << critval_test << std::endl << std::endl; } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
/*! \section example40 Example 40: Exhaustive (optimal) feature selection. Selects features exhaustively, i.e., evaluates all possible feature combinations. This approach is guaranteed to find optimum with respect to the chosen criterion, but its exponential time complexity renders it prohibitive for even moderately dimensioned tasks. Here it is demonstrated on 15-dimensional data with 3-NN (based on L1.5 distance) wrapper classification accuracy as FS criterion - note how time consuming the computation is even for relatively low-dimensional case. Classification accuracy (i.e, FS wrapper criterion value) is estimated on the first 50% of data samples by means of 3-fold cross-validation. The final classification performance on the selected subspace is eventually validated on the second 50% of data. Exhaustive search is called here in d-optimizing setting, invoked by parameter 0 in search(0,...), which is otherwise used to specify the required subset size. Optional result tracking is employed here to reveal duplicate solutions yielding the same maximum criterion value (see also \ref example60). */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format //typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Distance_Lp<DATATYPE,REALTYPE,DIMTYPE,SUBSET,3,2> DISTANCE; typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN; typedef FST::Result_Tracker_Dupless<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET> TRACKER; std::cout << "Starting Example 40: Exhaustive (optimal) feature selection..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTER5050()); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up 3-Nearest Neighbor classifier based on Euclidean distances boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3); // wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN); wknn->initialize(cknn,da); // set-up Exhaustive Search procedure FST::Search_Exhaustive<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> srch; // set-up result tracker to enable logging of candidate solutions, ordered descending by value // (optionally limit the number of kept records to 50000 highest valued to prevent memory exhaustion due to possibly excessive number of candidates) boost::shared_ptr<TRACKER> tracker(new TRACKER(50000)); // let the tracker register only solution no worse than "the best known criterion value minus 0.05" tracker->set_inclusion_margin(0.05); // register the result tracker with the used search object srch.enable_result_tracking(tracker); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << std::endl; RETURNTYPE critval_train, critval_test; srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default) if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished."); // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl; // report tracker contents std::cout << "Result tracker records " << tracker->size(0.0) << " solutions with criterion value equal to " << critval_train << "." << std::endl << std::endl; for(unsigned int i=1;i<10;i++) std::cout << "Result tracker records " << tracker->size((double)i*0.005) << " solutions with criterion value greater or equal to " << critval_train-(double)i*0.005 << "." << std::endl << std::endl; TRACKER::PResultRec result; if(tracker->get_first(result) && tracker->size(0.0)>1) { RETURNTYPE firstvalue=result->value; std::cout << "All recorded feature subsets yielding the same best known criterion value " << firstvalue << ":" << std::endl; while(tracker->get_next(result) && result->value==firstvalue) std::cout << *(result->sub) << std::endl; } } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
int main( int argc , char *argv[] ) { int c ; struct CommandParameter cp ; if( argc == 1 ) { version(); usage(); exit(0); } memset( & cp , 0x00 , sizeof(struct CommandParameter) ); for( c = 1 ; c < argc ; c++ ) { if( strcmp( argv[c] , "-v" ) == 0 ) { version(); return 0; } else if( strcmp( argv[c] , "-f" ) == 0 && c + 1 < argc ) { c++; cp.pathfilename = argv[c] ; } else if( strcmp( argv[c] , "-c" ) == 0 ) { cp.output_c_flag = 1 ; } else if( strcmp( argv[c] , "-c-order" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_order_flag = 1 ; } else if( strcmp( argv[c] , "-c-compact" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_order_flag = 1 ; cp.output_c_compact_flag = 1 ; } else if( strcmp( argv[c] , "-c-compress" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_order_flag = 1 ; cp.output_c_compress_flag = 1 ; } else if( strcmp( argv[c] , "-c-xml" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_xml_flag = 1 ; } else if( strcmp( argv[c] , "-c-xml-compact" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_xml_compact_flag = 1 ; } else if( strcmp( argv[c] , "-c-json" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_json_flag = 1 ; } else if( strcmp( argv[c] , "-c-json-compact" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_json_compact_flag = 1 ; } else if( strcmp( argv[c] , "-c-LOG" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_LOG_flag = 1 ; } else if( strcmp( argv[c] , "-sql-pgsql" ) == 0 ) { cp.output_sql_pgsql_flag = 1 ; } else if( strcmp( argv[c] , "-ec-pgsql" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_ec_pgsql_flag = 1 ; } else if( strcmp( argv[c] , "-sql-oracle" ) == 0 ) { cp.output_sql_oracle_flag = 1 ; } else if( strcmp( argv[c] , "-ec-oracle" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_ec_oracle_flag = 1 ; } else if( strcmp( argv[c] , "-c-ALL" ) == 0 ) { cp.output_c_flag = 1 ; cp.output_c_order_flag = 1 ; cp.output_c_compact_flag = 1 ; cp.output_c_compress_flag = 1 ; cp.output_c_xml_flag = 1 ; cp.output_c_LOG_flag = 1 ; } else { printf( "Invalid parameter[%s]\n" , argv[c] ); usage(); exit(7); } } if( cp.pathfilename == NULL ) { usage(); exit(7); } return -dsc( & cp ) ; }
/*! \section example55 Example 55: Evaluating Similarity of Two Feature Selection Processes. To study the difference in feature preferences among principally different feature selection methods or among differently parametrized instances of the same method FST3 provides measures capable of evaluating the level of similarity between two sets of trials (Somol Novovicova, IEEE, TPAMI, 2010). In analogy to stability evaluation (see \ref example54) for each of the two feature selection scenarios a series of trials is conducted on various samplings of the same data. In this example ten feature selection trials are performed per scenario, each on randomly sampled 95% of the data. In the first scenario in each trial the resulting subset is obtained using DOS procedure, optimizing the 3-Nearest Neighbour accuracy estimated by means of 3-fold cross-validation. In the second scenario in each trial the resulting subset is obtained using SFFS procedure, maximizing the Bhattacharyya distance based on normal model. A selection of standard stability measures is evaluated separately for each of the two scenarios. Eventually the similarity of the two scenarios is evaluated using analogously founded similarity measures. All measures yield values from [0,1], where values close to 0 denote low stability/similarity and values close to 1 denote high stability/similarity. Note that in this experiment the inter-measures (IATI, ICW, IANHI) yield markedly lower values than the corresponding stability measures (ATI, CW, ANHI). This illustrates well that considerably different results can be expected from differently founded feature selection methods. */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_RandomRandom<INTERVALLER,IDXTYPE,BINTYPE> SPLITTERRANDRAND; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format //typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Distance_L1<DATATYPE,DIMTYPE,SUBSET> DISTANCEL1; typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCEL1> CLASSIFIERKNN; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPER; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPER> EVALUATOR1; typedef FST::Criterion_Normal_Bhattacharyya<RETURNTYPE,DATATYPE,REALTYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR> BHATTCRIT; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,BHATTCRIT> EVALUATOR2; typedef FST::Result_Tracker_Stability_Evaluator<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET> TRACKER; std::cout << "Starting Example 55: Evaluating Similarity of Two Feature Selection Processes..." << std::endl; // set-up ten trials where in each 95% of data is randomly sampled PSPLITTER dsp_outer(new SPLITTERRANDRAND(10/*splits=trials*/,95,5)); // in the course of wrapper based feature subset search (in one trial) use 3-fold cross-validation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("RandRand data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up result trackers to collect results of each trial in both scenarios boost::shared_ptr<TRACKER> tracker1(new TRACKER); boost::shared_ptr<TRACKER> tracker2(new TRACKER); // FEATURE SELECTION SCENARIO A (wrapper) // set-up 3-Nearest Neighbor classifier based on L1 distances boost::shared_ptr<CLASSIFIERKNN> cknn1(new CLASSIFIERKNN); cknn1->set_k(3); // wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPER> wknn1(new WRAPPER); wknn1->initialize(cknn1,da); // set-up the standard sequential search step object (option: hybrid, ensemble, threaded) boost::shared_ptr<EVALUATOR1> eval1(new EVALUATOR1); // set-up Sequential Forward Floating Selection search procedure FST::Search_DOS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPER,EVALUATOR1> srch1(eval1); srch1.set_delta(10); sub->deselect_all(); // Technical remark: should threaded evaluator be used in this case, it would be necessary to move both the evaluator and search procedure set-up // inside the trial loop. The reason is technical: threaded evaluator caches criterion clones, including data accessor state. // Therefore no outside changes in splitting level nor current split change can be reflected in criterion evaluation. Renewed // evaluator set-up resets the cache and thus ensures correct threaded criterion evaluation behavior after split change. // run the trials std::cout << "Feature selection setup:" << std::endl << *da << std::endl << *wknn1 << std::endl << *tracker1 << std::endl << std::endl; RETURNTYPE critval_train; da->setSplittingDepth(0); unsigned int trial=0; bool run=da->getFirstSplit(); if(!run) throw FST::fst_error("RandRand data split failed."); while(run) { trial++; std::cout << std::endl<<"TRIAL A"<<trial<< " ---------------------------------------------------------------------"<<std::endl; da->setSplittingDepth(1); if(!srch1.search(0,critval_train,sub,wknn1,std::cout)) throw FST::fst_error("Search not finished."); tracker1->add(critval_train,sub); std::cout << std::endl << "(TRIAL A"<<trial<<") Search result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl; da->setSplittingDepth(0); run=da->getNextSplit(); } // FEATURE SELECTION SCENARIO B (filter) // set-up normal Bhattacharyya distance criterion boost::shared_ptr<BHATTCRIT> cb(new BHATTCRIT); // set-up the standard sequential search step object (option: hybrid, ensemble, threaded) boost::shared_ptr<EVALUATOR2> eval2(new EVALUATOR2); // set-up Sequential Forward Floating Selection search procedure FST::Search_SFFS<RETURNTYPE,DIMTYPE,SUBSET,BHATTCRIT,EVALUATOR2> srch2(eval2); srch2.set_search_direction(FST::FORWARD); // target subset size must be set because Bhattacharyya is monotonous with respect to subset size (i.e., evaluates full set as the best) const DIMTYPE target_size=7; // run the trials std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch2 << std::endl << *cb << std::endl << *tracker2 << std::endl << std::endl; trial=0; da->setSplittingDepth(0); run=da->getFirstSplit(); if(!run) throw FST::fst_error("RandRand data split failed."); while(run) { trial++; std::cout << std::endl<<"TRIAL B"<<trial<< " ---------------------------------------------------------------------"<<std::endl; cb->initialize(da); // (note that cb initialization = normal model parameter estimation on training data, therefore it must be repeated for each split) da->setSplittingDepth(1); if(!srch2.search(target_size,critval_train,sub,cb,std::cout)) throw FST::fst_error("Search not finished."); tracker2->add(critval_train,sub); std::cout << std::endl << "(TRIAL B"<<trial<<") Search result: " << std::endl << *sub << "Criterion value=" << critval_train << std::endl; da->setSplittingDepth(0); run=da->getNextSplit(); } // evaluate stability of each scenario and similarity of the two scenarios using results collected by trackers std::cout<<std::endl; std::cout << "---------------------------------------------------------------------" << std::endl; std::cout << "Scenario A resulting criterion values' mean: " << tracker1->value_mean() << ", std. dev.: " << tracker1->value_stddev() << std::endl; std::cout << "Scenario A subset sizes' mean: " << tracker1->size_mean() << ", std. dev.: " << tracker1->size_stddev() << std::endl; std::cout << std::endl; std::cout << "Scenario A stability_ATI()=" << tracker1->stability_ATI() << std::endl; std::cout << "Scenario A stability_CW()=" << tracker1->stability_CW() << std::endl; std::cout << "Scenario A stability_ANHI("<<da->getNoOfFeatures()<<")=" << tracker1->stability_ANHI(da->getNoOfFeatures()) << std::endl; std::cout<<std::endl; std::cout << "Scenario B resulting criterion values' mean: " << tracker2->value_mean() << ", std. dev.: " << tracker2->value_stddev() << std::endl; std::cout << "Scenario B subset sizes' mean: " << tracker2->size_mean() << ", std. dev.: " << tracker2->size_stddev() << std::endl; std::cout << std::endl; std::cout << "Scenario B stability_ATI()=" << tracker2->stability_ATI() << std::endl; std::cout << "Scenario B stability_CW()=" << tracker2->stability_CW() << std::endl; std::cout << "Scenario B stability_ANHI("<<da->getNoOfFeatures()<<")=" << tracker2->stability_ANHI(da->getNoOfFeatures()) << std::endl; std::cout<<std::endl; std::cout << "Evaluating similarity between scenario A and scenario B:"<< std::endl; std::cout << "similarity measure IATI()=" << tracker1->similarity_IATI(*tracker2) << std::endl; std::cout << "similarity measure ICW()=" << tracker1->similarity_ICW(*tracker2) << std::endl; std::cout << "similarity measure IANHI("<<da->getNoOfFeatures()<<")=" << tracker1->similarity_IANHI(da->getNoOfFeatures(), *tracker2) << std::endl; } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
static void draw_text (Cairo::RefPtr<Cairo::Context> cr, int wdh, int hgt) { RefPtr<Pango::Layout> layout = Pango::Layout::create(cr); //layout->set_single_paragraph_mode(true); layout->set_text("MTextTextM\nAbc\nff"); Pango::FontDescription dsc(FONT); layout->set_font_description(dsc); int t_wdh, t_hgt; layout->get_size(t_wdh, t_hgt); double t_sz = (double)dsc.get_size()/t_wdh; double new_sz = wdh * t_sz ; io::cout << "new_sz " << new_sz << io::endl; io::cout << "wdh " << wdh << io::endl; dsc.set_size( int(new_sz*PANGO_SCALE) ); layout->set_font_description(dsc); layout->get_size(t_wdh, t_hgt); io::cout << "t_wdh " << t_wdh/(double)PANGO_SCALE << io::endl; // для наглядности cr->set_line_width(1.0); cr->rectangle(0, 0, wdh, hgt); cr->stroke(); cr->save(); cr->move_to(0, 0); cr->scale( 1.0, hgt / ((double)t_hgt/PANGO_SCALE) ); //cr->scale( wdh / ((double)t_wdh/PANGO_SCALE), hgt / ((double)t_hgt/PANGO_SCALE) ); layout->update_from_cairo_context(cr); pango_cairo_show_layout(cr->cobj(), layout->gobj()); { Pango::Rectangle w_rct, s_rct; int cur_pos; cur_pos = 1; layout->get_cursor_pos(cur_pos, w_rct, s_rct); pango_extents_to_pixels(0, w_rct.gobj()); io::cout << "curs - x, y, hgt " << w_rct.get_x() << " " << w_rct.get_y() << " " << w_rct.get_height() << io::endl; cr->move_to(w_rct.get_x()+5, w_rct.get_y()); cr->line_to(w_rct.get_x()+5, w_rct.get_y()+w_rct.get_height()); cr->stroke(); cur_pos = 11; layout->get_cursor_pos(cur_pos, w_rct, s_rct); pango_extents_to_pixels(0, w_rct.gobj()); io::cout << "curs - x, y, hgt " << w_rct.get_x() << " " << w_rct.get_y() << " " << w_rct.get_height() << io::endl; cr->move_to(w_rct.get_x()+5, w_rct.get_y()); cr->line_to(w_rct.get_x()+5, w_rct.get_y()+w_rct.get_height()); cr->stroke(); } cr->restore(); }
void TextObj::SetFontDesc(const std::string& fnt) { Pango::FontDescription dsc(fnt.c_str()); SetFontDesc(dsc); }
/*! \section example21 Example 21: Generalized sequential feature subset search. All sequential search algorithms (SFS, SFFS, OS, DOS, SFRS) can be extended to operate in "generalized" setting (term coined in Devijver, Kittler book). In each step of a generalized sequential search algorithm not only one best feature is added to current subset nor one worst feature is removed from current subset; instead, g-tuples of features are considered. Searching for such group of g features that improves the current subset the most when added (or such that degrades the current subset the least when removed) is more computationally complex but increases the chance of finding the optimum or a result closer to optimum (nevertheless, improvement is not guaranteed and in some cases the result can actually degrade). The value g is to be set by user; the higher the value g, the slower the search (time complexity increases exponentially with increasing g). Note that setting g equal to the number of all features would effectively emulate the operation of exhaustive search. In this example features are selected using the generalized (G)SFFS algorithm (G=2) and 3-NN wrapper classification accuracy as FS criterion. Classification accuracy (i.e, FS wrapper criterion value) is estimated on the first 50% of data samples by means of 3-fold cross-validation. The final classification performance on the selected subspace is eventually validated on the second 50% of data. (G)SFFS is called here in d-optimizing setting, invoked by parameter 0 in search(0,...), which is otherwise used to specify the required subset size. \note Note that in this context the term generalization does not! relate to classification performance on independent data. */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format //typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE; typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR; std::cout << "Starting Example 21: Generalized sequential feature subset search..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTER5050()); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up 3-Nearest Neighbor classifier based on Euclidean distances boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(5); // wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN); wknn->initialize(cknn,da); // set-up the standard sequential search step object (option: hybrid, ensemble, etc.) boost::shared_ptr<EVALUATOR> eval(new EVALUATOR); // set-up Sequential Forward Floating Selection search procedure FST::Search_SFFS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval); srch.set_search_direction(FST::FORWARD); // try FST::BACKWARD // set the size of feature groups to be evaluated for inclusion/removal in each sequential step (can be applied to SFS, SFFS, OS, DOS, SFRS) srch.set_generalization_level(2); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << std::endl; RETURNTYPE critval_train, critval_test; srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default) if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished."); // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl; // (optionally) list the best known solutions for each cardinality as recorded throughout the course of search std::cout << "Best recorded solution for subset size:" << std::endl; for(DIMTYPE d=1;d<=sub->get_n();d++) if(srch.get_result(d,critval_train,sub)) std::cout << d << ": val="<< critval_train << ", "<<*sub << std::endl; } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
/*! \section example61 Example 61: Feature selection that respects pre-specified feature weights. In many applications it is desirable to optimize feature subsets not only with respect to the primary objective (e.g., decision rule accuracy), but also with respect to additional factors like known feature acquisition cost. In many cases there might be only negligible difference in discriminatory ability among several features, while the cost of measuring their value may differ a lot. In such a case it is certainly better to select the cheaper feature. In other scenarios it might be even advantageous to trade a minor degradation of classifcation accuracy for substantial saving in measurement acquisition cost. For such cases FST3 implements a mechanism that allows to control the feature accuracy vs. feature cost trade-off. It is made possible through result tracking and subsequent selection of alternative solution so as to minimize the sum of pre-specified feature weights. The lower-weight solution is selected from the pool of all known solutions that differ from the best one by less than a user-specifed margin (permitted primary criterion value difference from the known maximum value). In this example we illustrate how to add the respective mechanism to standard wrapper based feature selection. Here we select features so as to maximize 3-Nearest Neighbor accuracy; then several lower-weight solutions are identified and validated, for various margin values. */ int main() { try{ typedef double RETURNTYPE; typedef double DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_5050<INTERVALLER,IDXTYPE> SPLITTER5050; typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format //typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Distance_Euclid<DATATYPE,DIMTYPE,SUBSET> DISTANCE; typedef FST::Classifier_kNN<RETURNTYPE,DATATYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR,DISTANCE> CLASSIFIERKNN; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERKNN,DATAACCESSOR> WRAPPERKNN; typedef FST::Criterion_Sum_Of_Weights<RETURNTYPE,DIMTYPE,SUBSET> WEIGHCRIT; typedef FST::Criterion_Negative<WEIGHCRIT,RETURNTYPE,SUBSET> NEGATIVECRIT; typedef FST::Sequential_Step_Straight<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN> EVALUATOR; typedef FST::Result_Tracker_Regularizer<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,NEGATIVECRIT> TRACKER; std::cout << "Starting Example 61: Feature selection that respects pre-specified feature weights..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTER5050()); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/speech_15.trn",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("50/50 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up 3-Nearest Neighbor classifier based on Euclidean distances boost::shared_ptr<CLASSIFIERKNN> cknn(new CLASSIFIERKNN); cknn->set_k(3); // wrap the 3-NN classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERKNN> wknn(new WRAPPERKNN); wknn->initialize(cknn,da); // set-up the standard sequential search step object (option: hybrid, ensemble, threaded, etc.) boost::shared_ptr<EVALUATOR> eval(new EVALUATOR); // set-up Sequential Forward Floating Selection search procedure FST::Search_SFFS<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERKNN,EVALUATOR> srch(eval); srch.set_search_direction(FST::FORWARD); // set-up tracker of intermediate results boost::shared_ptr<TRACKER> tracker(new TRACKER); // register the result tracker with the used search step object eval->enable_result_tracking(tracker); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wknn << std::endl << *tracker << std::endl << std::endl; RETURNTYPE critval_train, critval_test; if(!srch.search(0,critval_train,sub,wknn,std::cout)) throw FST::fst_error("Search not finished."); // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); if(!wknn->evaluate(critval_train,sub)) throw FST::fst_error("crit call failure."); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << ", crit value="<< critval_train << std::endl << std::endl; // set-up the secondary criterion to minimize the sum of feature weights // (note that the concrete weight values shown here are sample only) RETURNTYPE feature_cost[]={1, 1.2, 1, 1.3, 1.02, 2.4, 3.9, 1.2, 7.1, 22, 9.52, 1.08, 3.27, 1.44, 1.04}; assert(sizeof(feature_cost)/sizeof(RETURNTYPE)==da->getNoOfFeatures()); boost::shared_ptr<WEIGHCRIT> weightsumcrit(new WEIGHCRIT); weightsumcrit->initialize(da->getNoOfFeatures(),feature_cost); boost::shared_ptr<NEGATIVECRIT> critminw(new NEGATIVECRIT(weightsumcrit)); // select final solution among those recorded by tracker (show more alternatives for various margins) for(unsigned int i=0; i<10; i++) { const RETURNTYPE margin=(double)i*0.005; if(!tracker->optimize_within_margin(margin,critval_train,critval_test,sub,critminw)) throw FST::fst_error("tracker2->optimize_within_margin() failed."); std::cout << std::endl << "Weight-optimized result (primary criterion margin="<<margin<<"): " << std::endl << *sub << "Criterion value=" << critval_train << std::endl << "Sum of weights=" << -critval_test << std::endl; // (optionally) validate result by estimating kNN accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); cknn->train(da,sub); cknn->test(critval_test,da); std::cout << "Validated "<<cknn->get_k()<<"-NN accuracy=" << critval_test << std::endl << std::endl; } } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }
/*! \section example35t Example 35t: Dependency-Aware Feature Ranking (DAF1) to enable Wrapper based FS on very-high-dimensional data. Dependency-Aware Feature Ranking (DAF) is a novel approach to feature selection especially suitable for very-high-dimensional problems and over-fitting-prone feature selection scenarios. DAF evaluates a chosen criterion on a series of probe subsets to eventually rank features according to their estimated contextual quality. Note that this approach makes it possible to apply even the complex Wrapper feature selection criteria in problems of very-high-dimensionality. DAF has been shown capable of overperforming BIF quite significantly in many cases in terms of the quality of selected feature subsets, yet its stability and resistance against over-fitting remains on par with BIF. For details see UTIA Technical Report No. 2295 from February 2011. We demonstrate two slightly different forms of DAF (DAF0 and DAF1) on examples \ref example34 and \ref example35t. Example34 illustrates the approach with k-NN accuracy \e wrapper criterion. This example 35t illustrates DAF with SVM \e wrapper applied to very-high-dimensional (greater than 10000-dimensional) text categorization problem. \note DAF (as BIF) ranks features but does not determine final subset size. \note To achieve reasonable results in case of extreme dimensionality like here DAF requires at least hours of computation. (Standard wrapper based methods would need several orders more time in similar setting.) It is beneficial to allow for as many probes as possible. For instance, setting max_search_time to 20 hours instead of 200 minutes as seen below improves the final accuracy on independent test data roughly by 3%. \warning This example needs large RAM memory (4GB may not be enough). */ int main() { try{ const unsigned int max_threads=16; typedef double RETURNTYPE; typedef float DATATYPE; typedef double REALTYPE; typedef unsigned int IDXTYPE; typedef unsigned int DIMTYPE; typedef short BINTYPE; typedef FST::Subset<BINTYPE, DIMTYPE> SUBSET; typedef FST::Data_Intervaller<std::vector<FST::Data_Interval<IDXTYPE> >,IDXTYPE> INTERVALLER; typedef boost::shared_ptr<FST::Data_Splitter<INTERVALLER,IDXTYPE> > PSPLITTER; typedef FST::Data_Splitter_CV<INTERVALLER,IDXTYPE> SPLITTERCV; typedef FST::Data_Splitter_RandomRandom<INTERVALLER,IDXTYPE,BINTYPE> SPLITTERRANDRAND; //typedef FST::Data_Accessor_Splitting_MemTRN<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for TRN data format typedef FST::Data_Accessor_Splitting_MemARFF<DATATYPE,IDXTYPE,INTERVALLER> DATAACCESSOR; // uncomment for ARFF data format typedef FST::Classifier_LIBSVM<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET,DATAACCESSOR> CLASSIFIERSVM; typedef FST::Criterion_Wrapper<RETURNTYPE,SUBSET,CLASSIFIERSVM,DATAACCESSOR> WRAPPERSVM; typedef FST::Result_Tracker_Feature_Stats<RETURNTYPE,IDXTYPE,DIMTYPE,SUBSET> TRACKERSTATS; std::cout << "Starting Example 35t: Dependency-Aware Feature Ranking (DAF1) enabling Wrapper based feature selectio on very-high-dimensional data..." << std::endl; // keep second half of data for independent testing of final classification performance PSPLITTER dsp_outer(new SPLITTERRANDRAND(1/*trials*/,70,30)); // in the course of search use the first half of data by 3-fold cross-validation in wrapper FS criterion evaluation PSPLITTER dsp_inner(new SPLITTERCV(3)); // do not scale data boost::shared_ptr<FST::Data_Scaler<DATATYPE> > dsc(new FST::Data_Scaler_void<DATATYPE>()); // set-up data access boost::shared_ptr<std::vector<PSPLITTER> > splitters(new std::vector<PSPLITTER>); splitters->push_back(dsp_outer); splitters->push_back(dsp_inner); boost::shared_ptr<DATAACCESSOR> da(new DATAACCESSOR("data/reuters_apte.arff",splitters,dsc)); da->initialize(); // initiate access to split data parts da->setSplittingDepth(0); if(!da->getFirstSplit()) throw FST::fst_error("70/30 data split failed."); da->setSplittingDepth(1); if(!da->getFirstSplit()) throw FST::fst_error("3-fold cross-validation failure."); // initiate the storage for subset to-be-selected boost::shared_ptr<SUBSET> sub(new SUBSET(da->getNoOfFeatures())); sub->deselect_all(); // set-up SVM (interface to external library LibSVM) boost::shared_ptr<CLASSIFIERSVM> csvm(new CLASSIFIERSVM); csvm->initialize(da); csvm->set_kernel_type(LINEAR); // first optimize SVM parameters using 3-fold cross-validation on training data on the full set of features sub->select_all(); csvm->optimize_parameters(da,sub); // wrap the SVM classifier to enable its usage as FS criterion (criterion value will be estimated by 3-fold cross-val.) boost::shared_ptr<WRAPPERSVM> wsvm(new WRAPPERSVM); wsvm->initialize(csvm,da); // Dependency-Aware Feature ranking computation settings const unsigned long max_search_time=200*60; // in seconds (the more search time can be afforded the better) const DIMTYPE min_probe_cardinality=1; // lower limit on random probe subset cardinality (the default value of 1 is generally applicable) const DIMTYPE max_probe_cardinality=200; // upper limit on random probe subset cardinality (the default value of 100 is generally applicable) // set-up Sequential Forward Floating Selection search procedure FST::Search_Monte_Carlo_Threaded<RETURNTYPE,DIMTYPE,SUBSET,WRAPPERSVM,max_threads> srch; srch.set_cardinality_randomization(min_probe_cardinality,max_probe_cardinality); srch.set_stopping_condition(0/*max trials*/,max_search_time/*seconds*/,1/*time check frequency*/); // one or both values must have positive value // set-up tracker to gather data for eventual DAF rank computation boost::shared_ptr<TRACKERSTATS> trackerstats(new TRACKERSTATS); srch.enable_result_tracking(trackerstats); // run the search std::cout << "Feature selection setup:" << std::endl << *da << std::endl << srch << std::endl << *wsvm << std::endl << std::endl; RETURNTYPE critval_train, critval_test; srch.set_output_detail(FST::NORMAL); // set FST::SILENT to disable all text output in the course of search (FST::NORMAL is default) if(!srch.search(0,critval_train,sub,wsvm,std::cout)) throw FST::fst_error("Search not finished."); // compute DAF0 ranking trackerstats->compute_stats(); // (optionally) print DAF computation statistics trackerstats->print_stats(std::cout); // select user-specified number of features according to highest DAF feature rank values // + validate result by estimating classifier accuracy on selected feature sub-space on independent test data da->setSplittingDepth(0); const DIMTYPE d=1000; unsigned int DAF=1; // DAF0 is the simplest and generally best performing option; DAF1 as a normalized version of DAF0 may occasionally yield better results RETURNTYPE critval; DIMTYPE i=0, feature; sub->deselect_all(); bool found=trackerstats->getFirstDAF(critval,feature,DAF); while(i++<d && found) { sub->select(feature); std::cout << "Added feature "<<feature<<", DAF"<<DAF<<"=" << critval << std::endl; if(i%50==0) { // (optionally) validate result by estimating classifier accuracy on selected feature sub-space on independent test data csvm->train(da,sub); csvm->test(critval_test,da); std::cout << *sub << std::endl << "Validated SVM accuracy=" << critval_test << std::endl << std::endl; } found=trackerstats->getNextDAF(critval,feature,DAF); } } catch(FST::fst_error &e) {std::cerr<<"FST ERROR: "<< e.what() << ", code=" << e.code() << std::endl;} catch(std::exception &e) {std::cerr<<"non-FST ERROR: "<< e.what() << std::endl;} return 0; }