void Evaluation::evalSplit (Trainer *trainer, ISamples &samples, ssi_real_t split) { if (split <= 0 || split >= 1) { ssi_err ("split must be a value between 0 and 1"); } _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); ssi_size_t *indices = new ssi_size_t[samples.getSize ()]; ssi_size_t *indices_count_lab = new ssi_size_t[samples.getClassSize ()]; ssi_size_t indices_count_all; indices_count_all = 0; for (ssi_size_t j = 0; j < samples.getClassSize (); j++) { indices_count_lab[j] = 0; } ssi_size_t label; ssi_size_t label_size; for (ssi_size_t j = 0; j < samples.getSize (); j++) { label = samples.get (j)->class_id; label_size = samples.getSize (label); if (++indices_count_lab[label] <= ssi_cast (ssi_size_t, label_size * split + 0.5f)) { indices[indices_count_all++] = j; } } SampleList strain; SampleList stest; // split off samples ModelTools::SelectSampleList (samples, strain, stest, indices_count_all, indices); _n_total = stest.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; // train with remaining samples _trainer->release (); if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } else if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } _trainer->train (strain); // test with remaining samples eval_h (stest); delete[] indices; delete[] indices_count_lab; }
void Evaluation::evalKFold (Trainer *trainer, ISamples &samples, ssi_size_t k) { // init confussion matrix _trainer = trainer; destroy_conf_mat (); init_conf_mat (samples); _n_total = samples.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; ssi_size_t *indices = new ssi_size_t[samples.getSize ()]; ssi_size_t *indices_count_lab = new ssi_size_t[samples.getClassSize ()]; ssi_size_t indices_count_all; for (ssi_size_t i = 0; i < k; ++i) { indices_count_all = 0; for (ssi_size_t j = 0; j < samples.getClassSize (); j++) { indices_count_lab[j] = 0; } ssi_size_t label; for (ssi_size_t j = 0; j < samples.getSize (); j++) { label = samples.get (j)->class_id; if (++indices_count_lab[label] % k == i) { indices[indices_count_all++] = j; } } SampleList strain; SampleList stest; // split off i'th fold ModelTools::SelectSampleList (samples, stest, strain, indices_count_all, indices); // train with i'th fold _trainer->release (); if (_fselmethod) { _trainer->setSelection (strain, _fselmethod, _pre_fselmethod, _n_pre_feature); } if (_preproc_mode) { _trainer->setPreprocMode (_preproc_mode, _n_streams_refs, _stream_refs); } _trainer->train (strain); // test with remaining samples eval_h (stest); } delete[] indices; delete[] indices_count_lab; }
bool Fisher::build (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isBuild ()) { ssi_wrn ("already trained"); return false; } ae_state state; ae_int_t info; ae_matrix data; ae_matrix_init (&data, 0, 0, DT_REAL, &state, ae_true); // convert the samples to a matrix where the last column holds the class number to which the sample belongs AlgLibTools::Samples2MatrixWithClass(samples, 0, &data); _basis = new ae_matrix; ae_matrix_init (_basis, 0, 0, DT_REAL, &state, ae_true); fisherldan(&data,data.rows,data.cols-1 , samples.getClassSize(),&info,_basis,&state); ae_matrix_clear (&data); _is_build = true; return true; }
void Evaluation::init_conf_mat (ISamples &samples) { _n_classes = samples.getClassSize (); // store class names _class_names = new ssi_char_t *[_n_classes]; for (ssi_size_t i = 0; i < _n_classes; i++) { _class_names[i] = ssi_strcpy (samples.getClassName (i)); } // allocate confussion matrix _conf_mat_ptr = new ssi_size_t *[_n_classes]; _conf_mat_data = new ssi_size_t[_n_classes * _n_classes]; for (ssi_size_t i = 0; i < _n_classes; ++i) { _conf_mat_ptr[i] = _conf_mat_data + i*_n_classes; } // set all elements in the confussion matrix to zero for (ssi_size_t i = 0; i < _n_classes; ++i) { for (ssi_size_t j = 0; j < _n_classes; ++j) { _conf_mat_ptr[i][j] = 0; } } _n_unclassified = 0; _n_classified = 0; }
void Evaluation::eval (IFusion &fusion, ssi_size_t n_models, IModel **models, ISamples &samples) { // init confussion matrix _trainer = 0; destroy_conf_mat (); init_conf_mat (samples); ssi_size_t n_classes = samples.getClassSize (); ssi_real_t *probs = new ssi_real_t[n_classes]; _n_total = samples.getSize (); _result_vec = new ssi_size_t[2*_n_total]; _result_vec_ptr = _result_vec; samples.reset (); const ssi_sample_t *sample = 0; while (sample = samples.next ()) { ssi_size_t real_index = sample->class_id; *_result_vec_ptr++ = real_index; if (fusion.forward (n_models, models, sample->num, sample->streams, n_classes, probs)) { ssi_size_t max_ind = 0; ssi_real_t max_val = probs[0]; for (ssi_size_t i = 1; i < n_classes; i++) { if (probs[i] > max_val) { max_val = probs[i]; max_ind = i; } } *_result_vec_ptr++ = max_ind; _conf_mat_ptr[real_index][max_ind]++; _n_classified++; } else if (!_allow_unclassified) { ssi_size_t max_ind = _default_class_id; *_result_vec_ptr++ = max_ind; _conf_mat_ptr[real_index][max_ind]++; _n_classified++; } else { *_result_vec_ptr++ = SSI_ISAMPLES_GARBAGE_CLASS_ID; _n_unclassified++; } } delete[] probs; }
bool MyModel::train (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_classes = samples.getClassSize (); _n_features = samples.getStream (stream_index).dim; _centers = new ssi_real_t *[_n_classes]; for (ssi_size_t i = 0; i < _n_classes; i++) { _centers[i] = new ssi_real_t[_n_features]; for (ssi_size_t j = 0; j < _n_features; j++) { _centers[i][j] = 0; } } ssi_sample_t *sample; samples.reset (); ssi_real_t *ptr = 0; while (sample = samples.next ()) { ssi_size_t id = sample->class_id; ptr = ssi_pcast (ssi_real_t, sample->streams[stream_index]->ptr); for (ssi_size_t j = 0; j < _n_features; j++) { _centers[id][j] += ptr[j]; } } for (ssi_size_t i = 0; i < _n_classes; i++) { ssi_size_t num = samples.getSize (i); for (ssi_size_t j = 0; j < _n_features; j++) { _centers[i][j] /= num; } } return true; }
bool MajorityVoting::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getStreamSize () != n_models) { ssi_wrn ("#models (%u) differs from #streams (%u)", n_models, samples.getStreamSize ()); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; if (samples.hasMissingData ()) { ISMissingData samples_h (&samples); for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model); models[n_model]->train (samples_h, n_model); } } } else{ for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train (samples, n_model); } } } return true; }
bool SimpleKNN::train (ISamples &samples, ssi_size_t stream_index) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getSize () < _options.k) { ssi_wrn ("sample list has less than '%u' entries", _options.k); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_classes = samples.getClassSize (); _n_samples = samples.getSize (); _n_features = samples.getStream (stream_index).dim; _data = new ssi_real_t[_n_features*_n_samples]; _classes = new ssi_size_t[_n_samples]; ssi_sample_t *sample; samples.reset (); ssi_real_t *data_ptr = _data; ssi_size_t *class_ptr = _classes; ssi_stream_t *stream_ptr = 0; ssi_size_t bytes_to_copy = _n_features * sizeof (ssi_real_t); while (sample = samples.next ()) { memcpy (data_ptr, sample->streams[stream_index]->ptr, bytes_to_copy); *class_ptr++ = sample->class_id; data_ptr += _n_features; } return true; }
bool FileSamplesOut::open (ISamples &data, const ssi_char_t *path, File::TYPE type, File::VERSION version) { ssi_msg (SSI_LOG_LEVEL_DETAIL, "open files '%s'", path); _version = version; if (_version < File::V2) { ssi_wrn ("version < V2 not supported"); return false; } if (_file_info || _file_data) { ssi_wrn ("samples already open"); return false; } _n_users = data.getUserSize (); _users = new ssi_char_t *[_n_users]; _n_per_user = new ssi_size_t[_n_users]; for (ssi_size_t i = 0; i < _n_users; i++) { _users[i] = ssi_strcpy (data.getUserName (i)); _n_per_user[i] = 0; } _n_classes = data.getClassSize (); _classes = new ssi_char_t *[_n_classes]; _n_per_class = new ssi_size_t[_n_classes]; for (ssi_size_t i = 0; i < _n_classes; i++) { _classes[i] = ssi_strcpy (data.getClassName (i)); _n_per_class[i] = 0; } _n_streams = data.getStreamSize (); _streams = new ssi_stream_t[_n_streams]; for (ssi_size_t i = 0; i < _n_streams; i++) { ssi_stream_t s = data.getStream (i); ssi_stream_init (_streams[i], 0, s.dim, s.byte, s.type, s.sr, 0); } _has_missing_data = false; if (path == 0 || path[0] == '\0') { _console = true; } if (_console) { _file_data = File::CreateAndOpen (type, File::WRITE, ""); if (!_file_data) { ssi_wrn ("could not open console"); return false; } } else { FilePath fp (path); ssi_char_t *path_info = 0; if (strcmp (fp.getExtension (), SSI_FILE_TYPE_SAMPLES) != 0) { path_info = ssi_strcat (path, SSI_FILE_TYPE_SAMPLES); } else { path_info = ssi_strcpy (path); } _path = ssi_strcpy (path_info); _file_info = File::CreateAndOpen (File::ASCII, File::WRITE, path_info); if (!_file_info) { ssi_wrn ("could not open info file '%s'", path_info); return false; } ssi_sprint (_string, "<?xml version=\"1.0\" ?>\n<samples ssi-v=\"%d\">", version); _file_info->writeLine (_string); ssi_char_t *path_data = ssi_strcat (path_info, "~"); _file_data = File::CreateAndOpen (type, File::WRITE, path_data); if (!_file_data) { ssi_wrn ("could not open data file '%s'", path_data); return false; } if (_version == File::V3) { _file_streams = new FileStreamOut[_n_streams]; ssi_char_t string[SSI_MAX_CHAR]; for (ssi_size_t i = 0; i < _n_streams; i++) { ssi_sprint (string, "%s.#%u", path_info, i); _file_streams[i].open (_streams[i], string, type); } } delete[] path_info; delete[] path_data; } return true; };
bool FeatureFusion::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; //initialize weights ssi_real_t **weights = new ssi_real_t*[n_models]; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { weights[n_model] = new ssi_real_t[_n_classes+1]; } if (samples.hasMissingData ()) { _handle_md = true; ISMissingData samples_h (&samples); Evaluation eval; if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nMissing data detected.\n"); } //models[0] is featfuse_model, followed by singlechannel_models ISMergeDim ffusionSamples (&samples); ISMissingData ffusionSamples_h (&ffusionSamples); ffusionSamples_h.setStream(0); if (!models[0]->isTrained ()) { models[0]->train (ffusionSamples_h, 0); } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { eval.eval (*models[0], ffusionSamples_h, 0); eval.print(); } //dummy weights for fused model for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { weights[0][n_class] = 0.0f; } weights[0][_n_classes] = 0.0f; for (ssi_size_t n_model = 1; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model - 1); models[n_model]->train (samples_h, n_model - 1); } eval.eval (*models[n_model], samples_h, n_model - 1); if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { eval.print(); } for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { weights[n_model][n_class] = eval.get_class_prob (n_class); } weights[n_model][_n_classes] = eval.get_classwise_prob (); } //calculate fillers _filler = new ssi_size_t[_n_streams]; for (ssi_size_t n_fill = 0; n_fill < _n_streams; n_fill++) { _filler[n_fill] = 1; ssi_real_t filler_weight = weights[1][_n_classes]; for (ssi_size_t n_model = 2; n_model < n_models; n_model++) { if (filler_weight < weights[n_model][_n_classes]) { _filler[n_fill] = n_model; filler_weight = weights[n_model][_n_classes]; } } weights[_filler[n_fill]][_n_classes] = 0.0f; } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nfiller:\n"); for (ssi_size_t n_model = 0; n_model < _n_streams; n_model++) { ssi_print("%d ", _filler[n_model]); }ssi_print("\n"); } } else{ _handle_md = false; if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nNo missing data detected.\n"); } ISMergeDim ffusionSamples (&samples); if (!models[0]->isTrained ()) { models[0]->train (ffusionSamples, 0); } //dummy _filler = new ssi_size_t[_n_streams]; for (ssi_size_t n_fill = 0; n_fill < _n_streams; n_fill++) { _filler[n_fill] = 0; } } if (weights) { for (ssi_size_t n_model = 0; n_model < _n_models; n_model++) { delete[] weights[n_model]; } delete[] weights; weights = 0; } return true; }
bool WeightedMajorityVoting::train (ssi_size_t n_models, IModel **models, ISamples &samples) { if (samples.getSize () == 0) { ssi_wrn ("empty sample list"); return false; } if (samples.getStreamSize () != n_models) { ssi_wrn ("#models (%u) differs from #streams (%u)", n_models, samples.getStreamSize ()); return false; } if (isTrained ()) { ssi_wrn ("already trained"); return false; } _n_streams = samples.getStreamSize (); _n_classes = samples.getClassSize (); _n_models = n_models; _weights = new ssi_real_t*[n_models]; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { _weights[n_model] = new ssi_real_t[_n_classes+1]; } if (samples.hasMissingData ()) { ISMissingData samples_h (&samples); Evaluation eval; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { samples_h.setStream (n_model); models[n_model]->train (samples_h, n_model); } eval.eval (*models[n_model], samples_h, n_model); for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { _weights[n_model][n_class] = eval.get_class_prob (n_class); } _weights[n_model][_n_classes] = eval.get_classwise_prob (); } } else{ Evaluation eval; for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { if (!models[n_model]->isTrained ()) { models[n_model]->train (samples, n_model); } eval.eval (*models[n_model], samples, n_model); for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { _weights[n_model][n_class] = eval.get_class_prob (n_class); } _weights[n_model][_n_classes] = eval.get_classwise_prob (); } } if (ssi_log_level >= SSI_LOG_LEVEL_DEBUG) { ssi_print("\nClassifier Weights: \n"); for (ssi_size_t n_model = 0; n_model < n_models; n_model++) { for (ssi_size_t n_class = 0; n_class < _n_classes; n_class++) { ssi_print ("%f ", _weights[n_model][n_class]); } ssi_print ("%f\n", _weights[n_model][_n_classes]); } } return true; }