void Evaluation::evalSplit (Trainer *trainer, ISamples &samples, ssi_real_t split) { if (split <= 0 || split >= 1) { ssi_err ("split must be a value between 0 and 1"); } _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); ssi_size_t *indices = new ssi_size_t[samples.getSize ()]; ssi_size_t *indices_count_lab = new ssi_size_t[samples.getClassSize ()]; ssi_size_t indices_count_all; indices_count_all = 0; for (ssi_size_t j = 0; j < samples.getClassSize (); j++) { indices_count_lab[j] = 0; } ssi_size_t label; ssi_size_t label_size; for (ssi_size_t j = 0; j < samples.getSize (); j++) { label = samples.get (j)->class_id; label_size = samples.getSize (label); if (++indices_count_lab[label] <= ssi_cast (ssi_size_t, label_size * split + 0.5f)) { indices[indices_count_all++] = j; } } SampleList strain; SampleList stest; // split off samples ModelTools::SelectSampleList (samples, strain, stest, indices_count_all, indices); _n_total = stest.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; // train with remaining samples _trainer->release (); if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } else if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } _trainer->train (strain); // test with remaining samples eval_h (stest); delete[] indices; delete[] indices_count_lab; }
void Evaluation::evalKFold (Trainer *trainer, ISamples &samples, ssi_size_t k) { // init confussion matrix _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); _n_total = samples.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; ssi_size_t *indices = new ssi_size_t[samples.getSize ()]; ssi_size_t *indices_count_lab = new ssi_size_t[samples.getClassSize ()]; ssi_size_t indices_count_all; for (ssi_size_t i = 0; i < k; ++i) { indices_count_all = 0; for (ssi_size_t j = 0; j < samples.getClassSize (); j++) { indices_count_lab[j] = 0; } ssi_size_t label; for (ssi_size_t j = 0; j < samples.getSize (); j++) { label = samples.get (j)->class_id; if (++indices_count_lab[label] % k == i) { indices[indices_count_all++] = j; } } SampleList strain; SampleList stest; // split off i'th fold ModelTools::SelectSampleList (samples, stest, strain, indices_count_all, indices); // train with i'th fold _trainer->release (); if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } _trainer->train (strain); // test with remaining samples eval_h (stest); } delete[] indices; delete[] indices_count_lab; }
bool MyFusion::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } ssi_size_t n_streams = samples.getStreamSize (); if (n_streams != n_models) { ssi_err ("#models (%u) differs from #streams (%u)", n_models, n_streams); } for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train (samples, n_model); } } _is_trained = true; return true; }
void AlgLibTools::Samples2MatrixWithClass (ISamples &samples, ssi_size_t stream_id, ae_matrix* m) { ae_int_t nfeatures = samples.get (0)->streams[stream_id]->dim; ae_int_t nsamples = samples.getSize (); ae_int_t i = 0; ae_int_t j = 0; ae_state state; ae_matrix_clear(m); ae_matrix_set_length(m, nsamples, nfeatures+1, &state); ssi_sample_t *sample; samples.reset (); while (sample = samples.next ()) { ssi_real_t *ptr = ssi_pcast (ssi_real_t, sample->streams[stream_id]->ptr); for (j = 0; j <= nfeatures-1; j++) { m->ptr.pp_double[i][j] = ssi_cast (double, *ptr++); } m->ptr.pp_double[i][j] = ssi_cast (double, sample->class_id); i++; } //delete sample; }
void AlgLibTools::Samples2matrix ( ISamples &samples, ssi_size_t stream_id, ssi_size_t class_id, ae_matrix* m, ae_state *state) { ae_int_t nfeatures = samples.get (0)->streams[stream_id]->dim; ae_int_t nsamples = samples.getSize (class_id); ae_int_t i = 0; ae_int_t j = 0; ae_matrix_clear(m); ae_matrix_set_length(m, nsamples, nfeatures, state); ssi_sample_t *sample; ISSelectClass samples_s (&samples); samples_s.setSelection (class_id); samples_s.reset (); while (sample = samples_s.next ()) { ssi_real_t *ptr = ssi_pcast (ssi_real_t, sample->streams[stream_id]->ptr); for(j=0; j<=nfeatures-1; j++) { m->ptr.pp_double[i][j] = ssi_cast (double, *ptr++); } i++; } }
bool Fisher::build (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isBuild ()) { ssi_wrn ("already trained"); return false; } ae_state state; ae_int_t info; ae_matrix data; ae_matrix_init (&data, 0, 0, DT_REAL, &state, ae_true); // convert the samples to a matrix where the last column holds the class number to which the sample belongs AlgLibTools::Samples2MatrixWithClass(samples, 0, &data); _basis = new ae_matrix; ae_matrix_init (_basis, 0, 0, DT_REAL, &state, ae_true); fisherldan(&data,data.rows,data.cols-1 , samples.getClassSize(),&info,_basis,&state); ae_matrix_clear (&data); _is_build = true; return true; }
void Evaluation::evalLOO (Trainer *trainer, ISamples &samples) { _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); ssi_size_t n_samples = samples.getSize (); _n_total = n_samples; _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; ssi_size_t itest = 0; ssi_size_t *itrain = new ssi_size_t[n_samples - 1]; for (ssi_size_t nsample = 0; nsample < n_samples - 1; ++nsample) { itrain[nsample] = nsample+1; } ISSelectSample strain (&samples); ISSelectSample stest (&samples); strain.setSelection (n_samples-1, itrain); stest.setSelection (1, &itest); _trainer->release (); if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } _trainer->train (strain); eval_h (stest); for (ssi_size_t nsample = 1; nsample < n_samples; ++nsample) { itrain[nsample-1] = nsample-1; itest = nsample; strain.setSelection (n_samples-1, itrain); stest.setSelection (1, &itest); _trainer->release (); if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } _trainer->train (strain); eval_h (stest); } delete [] itrain; }
bool MyModel::train (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_classes = samples.getClassSize (); _n_features = samples.getStream (stream_index).dim; _centers = new ssi_real_t *[_n_classes]; for (ssi_size_t i = 0; i < _n_classes; i++) { _centers[i] = new ssi_real_t[_n_features]; for (ssi_size_t j = 0; j < _n_features; j++) { _centers[i][j] = 0; } } ssi_sample_t *sample; samples.reset (); ssi_real_t *ptr = 0; while (sample = samples.next ()) { ssi_size_t id = sample->class_id; ptr = ssi_pcast (ssi_real_t, sample->streams[stream_index]->ptr); for (ssi_size_t j = 0; j < _n_features; j++) { _centers[id][j] += ptr[j]; } } for (ssi_size_t i = 0; i < _n_classes; i++) { ssi_size_t num = samples.getSize (i); for (ssi_size_t j = 0; j < _n_features; j++) { _centers[i][j] /= num; } } return true; }
bool SimpleKNN::train (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getSize () < _options.k) { ssi_wrn ("sample list has less than '%u' entries", _options.k); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_classes = samples.getClassSize (); _n_samples = samples.getSize (); _n_features = samples.getStream (stream_index).dim; _data = new ssi_real_t[_n_features*_n_samples]; _classes = new ssi_size_t[_n_samples]; ssi_sample_t *sample; samples.reset (); ssi_real_t *data_ptr = _data; ssi_size_t *class_ptr = _classes; ssi_stream_t *stream_ptr = 0; ssi_size_t bytes_to_copy = _n_features * sizeof (ssi_real_t); while (sample = samples.next ()) { memcpy (data_ptr, sample->streams[stream_index]->ptr, bytes_to_copy); *class_ptr++ = sample->class_id; data_ptr += _n_features; } return true; }
void Evaluation::eval (Trainer *trainer, ISamples &samples) { // init confussion matrix _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); _n_total = samples.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; // call helper function eval_h (samples); }
void Evaluation::eval (IFusion &fusion, ssi_size_t n_models, IModel **models, ISamples &samples) { // init confussion matrix _trainer = 0; destroy_conf_mat (); init_conf_mat (samples); ssi_size_t n_classes = samples.getClassSize (); ssi_real_t *probs = new ssi_real_t[n_classes]; _n_total = samples.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; samples.reset (); const ssi_sample_t *sample = 0; while (sample = samples.next ()) { ssi_size_t real_index = sample->class_id; *_result_vec_ptr++ = real_index; if (fusion.forward (n_models, models, sample->num, sample->streams, n_classes, probs)) { ssi_size_t max_ind = 0; ssi_real_t max_val = probs[0]; for (ssi_size_t i = 1; i < n_classes; i++) { if (probs[i] > max_val) { max_val = probs[i]; max_ind = i; } } *_result_vec_ptr++ = max_ind; _conf_mat_ptr[real_index][max_ind]++; _n_classified++; } else if (!_allow_unclassified) { ssi_size_t max_ind = _default_class_id; *_result_vec_ptr++ = max_ind; _conf_mat_ptr[real_index][max_ind]++; _n_classified++; } else { *_result_vec_ptr++ = SSI_ISAMPLES_GARBAGE_CLASS_ID; _n_unclassified++; } } delete[] probs; }
bool MajorityVoting::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getStreamSize () != n_models) { ssi_wrn ("#models (%u) differs from #streams (%u)", n_models, samples.getStreamSize ()); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; if (samples.hasMissingData ()) { ISMissingData samples_h (&samples); for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model); models[n_model]->train (samples_h, n_model); } } } else{ for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train (samples, n_model); } } } return true; }
bool SimpleFusion::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } ssi_size_t n_streams = samples.getStreamSize (); if (n_streams != 1 && n_streams != n_models) { ssi_err ("#models (%u) differs from #streams (%u)", n_models, n_streams); } if (samples.hasMissingData ()) { ISMissingData samples_h (&samples); for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream(n_streams == 1 ? 0 : n_model); models[n_model]->train (samples_h, n_model); } } } else { for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train(samples, n_streams == 1 ? 0 : n_model); } } } _is_trained = true; return true; }
bool FeatureFusion::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; //initialize weights ssi_real_t **weights = new ssi_real_t*[n_models]; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { weights[n_model] = new ssi_real_t[_n_classes+1]; } if (samples.hasMissingData ()) { _handle_md = true; ISMissingData samples_h (&samples); Evaluation eval; if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nMissing data detected.\n"); } //models[0] is featfuse_model, followed by singlechannel_models ISMergeDim ffusionSamples (&samples); ISMissingData ffusionSamples_h (&ffusionSamples); ffusionSamples_h.setStream(0); if (!models[0]->isTrained ()) { models[0]->train (ffusionSamples_h, 0); } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { eval.eval (*models[0], ffusionSamples_h, 0); eval.print(); } //dummy weights for fused model for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { weights[0][n_class] = 0.0f; } weights[0][_n_classes] = 0.0f; for (ssi_size_t n_model = 1; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model - 1); models[n_model]->train (samples_h, n_model - 1); } eval.eval (*models[n_model], samples_h, n_model - 1); if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { eval.print(); } for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { weights[n_model][n_class] = eval.get_class_prob (n_class); } weights[n_model][_n_classes] = eval.get_classwise_prob (); } //calculate fillers _filler = new ssi_size_t[_n_streams]; for (ssi_size_t n_fill = 0; n_fill < _n_streams; n_fill++) { _filler[n_fill] = 1; ssi_real_t filler_weight = weights[1][_n_classes]; for (ssi_size_t n_model = 2; n_model < n_models; n_model++) { if (filler_weight < weights[n_model][_n_classes]) { _filler[n_fill] = n_model; filler_weight = weights[n_model][_n_classes]; } } weights[_filler[n_fill]][_n_classes] = 0.0f; } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nfiller:\n"); for (ssi_size_t n_model = 0; n_model < _n_streams; n_model++) { ssi_print("%d ", _filler[n_model]); }ssi_print("\n"); } } else{ _handle_md = false; if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nNo missing data detected.\n"); } ISMergeDim ffusionSamples (&samples); if (!models[0]->isTrained ()) { models[0]->train (ffusionSamples, 0); } //dummy _filler = new ssi_size_t[_n_streams]; for (ssi_size_t n_fill = 0; n_fill < _n_streams; n_fill++) { _filler[n_fill] = 0; } } if (weights) { for (ssi_size_t n_model = 0; n_model < _n_models; n_model++) { delete[] weights[n_model]; } delete[] weights; weights = 0; } return true; }
bool WeightedMajorityVoting::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getStreamSize () != n_models) { ssi_wrn ("#models (%u) differs from #streams (%u)", n_models, samples.getStreamSize ()); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; _weights = new ssi_real_t*[n_models]; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { _weights[n_model] = new ssi_real_t[_n_classes+1]; } if (samples.hasMissingData ()) { ISMissingData samples_h (&samples); Evaluation eval; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model); models[n_model]->train (samples_h, n_model); } eval.eval (*models[n_model], samples_h, n_model); for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { _weights[n_model][n_class] = eval.get_class_prob (n_class); } _weights[n_model][_n_classes] = eval.get_classwise_prob (); } } else{ Evaluation eval; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train (samples, n_model); } eval.eval (*models[n_model], samples, n_model); for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { _weights[n_model][n_class] = eval.get_class_prob (n_class); } _weights[n_model][_n_classes] = eval.get_classwise_prob (); } } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nClassifier Weights: \n"); for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { ssi_print ("%f ", _weights[n_model][n_class]); } ssi_print ("%f\n", _weights[n_model][_n_classes]); } } return true; }
bool SVM::train (ISamples &samples, ssi_size_t stream_index) { if (_options.seed > 0) { srand(_options.seed); } else { srand(ssi_time_ms()); } ISamples *s_balance = 0; switch (_options.balance) { case BALANCE::OFF: { s_balance = &samples; break; } case BALANCE::OVER: { s_balance = new ISOverSample(&samples); ssi_pcast(ISOverSample, s_balance)->setOver(ISOverSample::RANDOM); ssi_msg(SSI_LOG_LEVEL_BASIC, "balance training set '%u' -> '%u'", samples.getSize(), s_balance->getSize()); break; } case BALANCE::UNDER: { s_balance = new ISUnderSample(&samples); ssi_pcast(ISUnderSample, s_balance)->setUnder(ISUnderSample::RANDOM); ssi_msg(SSI_LOG_LEVEL_BASIC, "balance training set '%u' -> '%u'", samples.getSize(), s_balance->getSize()); break; } } _n_samples = s_balance->getSize(); if (_n_samples == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_classes = s_balance->getClassSize(); _n_features = s_balance->getStream(stream_index).dim; ssi_size_t elements = _n_samples * (_n_features + 1); init_class_names(*s_balance); _problem = new svm_problem; _problem->l = ssi_cast (int, _n_samples); _problem->y = new double[_problem->l]; _problem->x = new svm_node *[_problem->l]; s_balance->reset(); ssi_sample_t *sample; int n_sample = 0; float *ptr = 0; svm_node *node = 0; while (sample = s_balance->next()) { ptr = ssi_pcast (float, sample->streams[stream_index]->ptr); _problem->x[n_sample] = new svm_node[_n_features + 1]; _problem->y[n_sample] = ssi_cast (float, sample->class_id); node = _problem->x[n_sample]; for (ssi_size_t nfeat = 0; nfeat < _n_features; nfeat++) { node->index = nfeat+1; node->value = *ptr; ptr++; ++node; } node->index = -1; ++n_sample; } if(_options.params.gamma == 0 && _n_features > 0) { _options.params.gamma = 1.0 / _n_features; } if (_options.params.kernel_type == PRECOMPUTED) { int max_index = ssi_cast (int, _n_features); for (int i = 0; i < _problem->l; i++) { if (_problem->x[i][0].index != 0) { ssi_err ("wrong input format: first column must be 0:sample_serial_number"); } if ((int)_problem->x[i][0].value <= 0 || (int)_problem->x[i][0].value > max_index) { ssi_err ("wrong input format: sample_serial_number out of range"); } } }