RcppExport SEXP classify_samples(int nrows, int ncols, vector<double> ia, vector<string> ja, vector<double> ra, string model_data) { new_model(); model.load_from_string(model_data); vector<string> results; vector<string> probability_names; NumericMatrix probability_matrix(nrows,model.num_classes()); for (int i=0; i < nrows; i++) { // for each document //Rprintf("Document %d\n",i); // debug output ME_Sample newSample; // create new sample for code for (int j=ia[i]-1; j < ia[i+1]-1; j++) { // for each feature newSample.add_feature(ja[j],ra[j]); } vector<double> prob = model.classify(newSample); for (int k=0; k < model.num_classes(); k++) { probability_matrix(i,k) = prob[k]; } results.push_back(newSample.label); } for (int k=0; k < model.num_classes(); k++) { probability_names.push_back(model.get_class_label(k)); } List rs = List::create(results,probability_matrix,probability_names); return rs; }
void viterbi(vector<Token> & vt, const ME_Model & me) { if (vt.size() == 0) return; vector< vector<double> > mat; vector< vector<int> > bpm; vector<double> vd(me.num_classes()); for (size_t j = 0; j < vd.size(); j++) vd[j] = 0; mat.push_back(vd); for (size_t i = 0; i < vt.size(); i++) { vector<double> vd(me.num_classes()); for (size_t j = 0; j < vd.size(); j++) vd[j] = -999999; vector<int> bp(me.num_classes()); double maxl = -999999; for (size_t j = 0; j < vd.size(); j++) { if (mat[i][j] > maxl) maxl = mat[i][j]; } for (size_t j = 0; j < vd.size(); j++) { if (mat[i][j] < maxl - BEAM_WIDTH) continue; // beam thresholding string prepos = me.get_class_label(j); if (i == 0) { if (j > 0) continue; prepos = "BOS"; } // prepos = me.get_class_name(j); // if (i == 0 && prepos != "BOS") continue; ME_Sample mes = mesample(vt, i, prepos); vector<double> membp = me.classify(mes); for (size_t k = 0; k < vd.size(); k++) { double l = mat[i][j] + log(membp[k]); if (l > vd[k]) { bp[k] = j; vd[k] = l; } } } mat.push_back(vd); // for (int k = 0; k < vd.size(); k++) cout << bp[k] << " "; // cout << endl; bpm.push_back(bp); } /* for (int i = 0; i < vt.size(); i++) { int max_prd = 0; for (int j = 0; j < vd.size(); j++) { double l = mat[i+1][j]; if (l > mat[i+1][max_prd]) { max_prd = j; } } vt[i].prd = me.get_class_name(max_prd); } */ // cout << "viterbi "; int max_prd = 0; int n = vt.size(); for (size_t j = 0; j < vd.size(); j++) { double l = mat[n][j]; if (l > mat[n][max_prd]) { max_prd = j; } } vt[n-1].prd = me.get_class_label(max_prd); for (int i = vt.size() - 2; i >= 0; i--) { // cout << max_prd << " "; // cerr << max_prd << " "; if (max_prd < 0 || max_prd >= me.num_classes()) exit(0); max_prd = bpm[i+1][max_prd]; vt[i].prd = me.get_class_label(max_prd); } // cout << endl; }