RcppExport SEXP classify_samples(int nrows, int ncols, vector<double> ia, vector<string> ja, vector<double> ra, string model_data) { new_model(); model.load_from_string(model_data); vector<string> results; vector<string> probability_names; NumericMatrix probability_matrix(nrows,model.num_classes()); for (int i=0; i < nrows; i++) { // for each document //Rprintf("Document %d\n",i); // debug output ME_Sample newSample; // create new sample for code for (int j=ia[i]-1; j < ia[i+1]-1; j++) { // for each feature newSample.add_feature(ja[j],ra[j]); } vector<double> prob = model.classify(newSample); for (int k=0; k < model.num_classes(); k++) { probability_matrix(i,k) = prob[k]; } results.push_back(newSample.label); } for (int k=0; k < model.num_classes(); k++) { probability_names.push_back(model.get_class_label(k)); } List rs = List::create(results,probability_matrix,probability_names); return rs; }
ME_Sample sample(const vector<string> & vt) { ME_Sample sample; sample.label = vt[0]; for (size_t j = 1; j < vt.size(); ++j) { sample.add_feature(vt[j]); } return sample; }
ME_Sample sample(const vector<Token> & vt, int i) { ME_Sample sample; sample.label = vt[i].pos; const string & w0 = vt[i].str; const string wp1 = i > 0 ? vt[i - 1].str : "BOS"; const string wp2 = i > 1 ? vt[i - 2].str : "BOS"; const string wm1 = i < (int)vt.size() - 1 ? vt[i + 1].str : "EOS"; const string wm2 = i < (int)vt.size() - 2 ? vt[i + 2].str : "EOS"; sample.add_feature("W0_" + w0); sample.add_feature("W-1_" + wm1); sample.add_feature("W+1_" + wp1); sample.add_feature("W-2_" + wm2); sample.add_feature("W+2_" + wp2); sample.add_feature("W-10_" + wm1 + "_" + w0); sample.add_feature("W0+1_" + w0 + "_" + wp1); sample.add_feature("W-1+1_" + wm1 + "_" + wp1); char buf[1000]; for (unsigned int j = 1; j <= 10; j++) { if (w0.size() >= j) { sprintf(buf, "SUF_%s", w0.substr(w0.size() - j).c_str()); sample.add_feature(buf); } if (w0.size() >= j) { sprintf(buf, "PRE_%s", w0.substr(0, j).c_str()); sample.add_feature(buf); } } return sample; }
int main(int argc, char* argv[]) { if (argc < 3 || argc > 4) { cerr << "Usage: " << argv[0] << "input output [path-to-ruby]" << endl; exit(1); } ME_Model model; string inFile = argv[1]; string outFile = argv[2]; //string modelFile = argv[3]; string modelFile = "model1-1.0"; string rubyCommand = (argc == 4) ? argv[3] : "ruby"; string eventFile = inFile + ".event"; string resultFile = inFile + ".result"; cerr << "Extracting events."; string extractionCommand = rubyCommand + " EventExtracter.rb " + inFile + " " + eventFile; system(extractionCommand.c_str()); cerr << "roading model file." << endl; model.load_from_file(modelFile.c_str()); //model.load_from_file("model" + setID + "-" + ineq); //ifstream fileIn(string("/home/users/y-matsu/private/workspace/eclipse-workspace/GENIASS/" + setID + "/test.txt").c_str()); //ofstream fileOut(string("/home/users/y-matsu/private/workspace/eclipse-workspace/GENIASS/" + setID + "/test-" + ineq + ".prob").c_str()); ifstream fileIn(eventFile.c_str()); ofstream fileOut(resultFile.c_str()); string line, markedTxt; getline(fileIn, markedTxt); cerr << "start classification." << endl; while (getline(fileIn, line)){ vector<string> tokens; split(line, tokens); ME_Sample s; for(vector<string>::const_iterator token = tokens.begin() + 1; token != tokens.end(); ++token){ s.add_feature(*token); } (void) model.classify(s); fileOut << s.label << endl; } fileOut.close(); fileIn.close(); remove(eventFile.c_str()); string splitCommand = rubyCommand + " Classifying2Splitting.rb " + resultFile + " " + markedTxt + " " + outFile; system(splitCommand.c_str()); return 0; }
//--------------------------------------------------------- bool CPresence_Prediction::On_Execute(void) { //----------------------------------------------------- EventSet DL_Events ; m_DL_Events = &DL_Events ; GISTrainer DL_Trainer; m_DL_Trainer = &DL_Trainer; MaxEntModel DL_Model ; m_DL_Model = &DL_Model ; m_YT_Model.clear(); //----------------------------------------------------- CSG_Grid *pPrediction = Parameters("PREDICTION" )->asGrid(); CSG_Grid *pProbability = Parameters("PROBABILITY")->asGrid(); if( !pPrediction ->Get_Range() ) DataObject_Set_Colors(pPrediction , 11, SG_COLORS_YELLOW_GREEN); if( !pProbability->Get_Range() ) DataObject_Set_Colors(pProbability, 11, SG_COLORS_YELLOW_GREEN); m_Method = Parameters("METHOD" )->asInt (); m_nNumClasses = Parameters("NUM_CLASSES" )->asInt (); m_bYT_Weights = Parameters("YT_NUMASREAL")->asBool(); //----------------------------------------------------- CSG_Array Features; if( !Get_Features(Features) ) { Error_Set(_TL("invalid features")); return( false ); } //----------------------------------------------------- if( m_Method == 0 && SG_File_Exists(Parameters("YT_FILE_LOAD")->asString()) ) { if( !Get_File(Parameters("YT_FILE_LOAD")->asString()) ) { return( false ); } } else if( !Get_Training() ) { return( false ); } //----------------------------------------------------- Process_Set_Text(_TL("prediction")); for(int y=0; y<Get_NY() && Set_Progress(y); y++) { #pragma omp parallel for for(int x=0; x<Get_NX(); x++) { int i; CSG_Strings Values; for(i=0; i<m_nFeatures; i++) { if( !m_Features[i].pGrid->is_NoData(x, y) ) { Values.Add(Get_Feature(x, y, i)); } else { break; } } if( Values.Get_Count() != m_nFeatures ) { pPrediction ->Set_NoData(x, y); pProbability->Set_NoData(x, y); } else switch( m_Method ) { //--------------------------------------------- default: // Kyoshida { ME_Sample Sample; for(i=0; i<m_nFeatures; i++) { if( m_bYT_Weights && m_Features[i].bNumeric ) { Sample.add_feature(m_Features[i].Name, m_Features[i].pGrid->asDouble(x, y)); } else { Sample.add_feature(Values[i].b_str()); } } vector<double> Probs = m_YT_Model.classify(Sample); pPrediction ->Set_Value(x, y, m_YT_Model.get_class_id(Sample.label) == 0 ? 1 : 0); pProbability->Set_Value(x, y, Probs[0]); } break; //--------------------------------------------- case 1: // Dekang Lin { MaxEntEvent Event; Event.count(1); for(i=0; i<m_nFeatures; i++) { Event.push_back(m_DL_Trainer->getId(Values[i].b_str())); } vector<double> Probs; pPrediction ->Set_Value(x, y, m_DL_Model->getProbs(Event, Probs) == 0 ? 1 : 0); pProbability->Set_Value(x, y, Probs[0]); } break; } } } return( true ); }