int main(int argc, char* argv[]) { if(argc == 1) { cout << "You must pass some parameters. Run \"./KNN help\" to take some tips." << endl; } if(argc == 2 && strcmp(argv[1],"help") == 0) { cout << "This is a implemantation of KNN algorithm. Case of handwritten numbers." << endl << endl; cout << "[USAGE]: ./KNN [K] [dataTraineFile] [dataTestFile]" << endl; } if(argc == 4) { int k = atoi(argv[1]); string dataTraineFile = argv[2]; string dataTestFile = argv[3]; KNN knn = KNN(k); double precision = knn.analysisData(dataTraineFile, dataTestFile); cout << "Precision of this test data: " << precision << "%" << endl << endl; cout << "You can view details of this execution in result.txt" << endl; } return 0; }
int main() { cout << "!!!DataTeam project start!!!" << endl; // int totalTrainRecords = 878049; int totalTrainRecords = 100000; int totalTestRecords = 884261; string predictFieldName = "Category"; string trainFileName = string("../DataTeam/files/train.csv"); string testFileName = string("../DataTeam/files/test.csv"); string trainNewFileName = string("../DataTeam/files/trainNew.csv"); string submissionFileName = string("../DataTeam/files/submission.csv"); // Preparar el archivo para que funcione al pasarlo al NaiveBayes FileManager fileManager = FileManager(totalTrainRecords, trainFileName, trainNewFileName); fileManager.process(); // Pasarle el archivo con los datos de los registros como numericos // NaiveBayes naiveBayes = NaiveBayes(predictFieldName, fileManager.getSetterData()); // naiveBayes.train(totalTrainRecords, trainNewFileName); // naiveBayes.test(totalTestRecords, testFileName, submissionFileName); KNN knn = KNN(50, fileManager.getSetterData()); knn.aplicarKNN(trainNewFileName,testFileName,submissionFileName); cout << "Finish run app" << endl; return 0; }
// Tests the default constructor TEST(KNN, Constructor) { KNN knn; //Check the type matches EXPECT_TRUE( knn.getClassifierType() == KNN::getId() ); //Check the module is not trained EXPECT_TRUE( !knn.getTrained() ); }
double predict(Database* db, int roadIndex, const Item& item) { double jc = (double)db->getJamCount(roadIndex); double var = cluster->getVar(db, roadIndex); //double A = 480; //double B = 520; double A = 780; double B = 820; if ( midAvg == 0 ) { midAvg = new KNN(db, LOWER_LIMIT, UPPER_LIMIT, cluster); midAvg->initialize(db); } if ( smallAvg == 0 ) { smallAvg = new SmallAverage(db, cluster, LOWER_LIMIT); smallAvg->initialize(db); } /* if ( jc >= 30 && var >= 25.0 ) { return midAvg->predict(db, roadIndex, item); }*/ if ( jc > B ) { return midAvg->predict(db, roadIndex, item); } // return 50.0; if ( jc < A ) { return smallAvg->predict(db, roadIndex, item); } double mid = midAvg->predict(db, roadIndex, item); double small = smallAvg->predict(db, roadIndex, item); double K = (jc - A)/(B - A); double res = small*(1.0 - K) + mid*K; return res; }
void iris() { vector<vector<double>> examples; vector<string> target; string data_file_name = "iris.data"; ifstream input(data_file_name.c_str()); string line, tar; double a, b, c ,d; while(getline(input, line)) { istringstream iss(line); vector<double> v; iss>>a>>b>>c>>d>>tar; examples.push_back({a, b, c, d}); target.push_back(tar); } input.close(); vector<vector<double>> test, train; vector<string> tar_test, tar_train; for(int i = 0; i < (int) examples.size(); ++i) { if(i%50 >= 40) test.push_back(examples[i]), tar_test.push_back(target[i]); else train.push_back(examples[i]), tar_train.push_back(target[i]); } KNN knn; knn.set_data(train, tar_train, 3); for(int i = 0; i < (int)test.size(); ++i) { auto ans = knn.classify(test[i]); cout<<'<'; for(auto& j:test[i]) cout<<j<<", "; cout<<tar_test[i]<<">\t the answer is "<<ans<<endl; } }
// Tests the learning algorithm on a basic dataset TEST(KNN, TrainBasicDataset) { KNN knn; //Check the module is not trained EXPECT_TRUE( !knn.getTrained() ); //Generate a basic dataset const UINT numSamples = 1000; const UINT numClasses = 10; const UINT numDimensions = 10; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Train the classifier EXPECT_TRUE( knn.train( trainingData ) ); EXPECT_TRUE( knn.getTrained() ); EXPECT_TRUE( knn.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } EXPECT_TRUE( knn.save( "knn_model.grt" ) ); knn.clear(); EXPECT_TRUE( !knn.getTrained() ); EXPECT_TRUE( knn.load( "knn_model.grt" ) ); EXPECT_TRUE( knn.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } }
void incrementallyAddPoints(vertex** v, int n, vertex* start) { int numRounds = Exp::getNumRounds(); numRounds = numRounds <= 0 ? 100 : numRounds; // various structures needed for each parallel insertion //int maxR = (int) (n/100) + 1; // maximum number to try in parallel int maxR = (int) (n/numRounds) + 1; // maximum number to try in parallel Qs *qqs = newA(Qs,maxR); Qs **qs = newA(Qs*,maxR); for (int i=0; i < maxR; i++) { qs[i] = new (&qqs[i]) Qs; } simplex *t = newA(simplex,maxR); bool *flags = newA(bool,maxR); vertex** h = newA(vertex*,maxR); // create a point location structure typedef kNearestNeighbor<vertex,1> KNN; KNN knn = KNN(&start, 1); int multiplier = 8; // when to regenerate int nextNN = multiplier; int top=n; int rounds = 0; int failed = 0; // process all vertices starting just below top while(top > 0) { // every once in a while create a new point location // structure using all points inserted so far if ((n-top)>=nextNN && (n-top) < n/multiplier) { knn.del(); knn = KNN(v+top, n-top); nextNN = nextNN*multiplier; } // determine how many vertices to try in parallel //int cnt = 1 + (n-top)/100; // 100 is pulled out of a hat int cnt = 1 + (n-top)/numRounds; cnt = (cnt > maxR) ? maxR : cnt; cnt = (cnt > top) ? top : cnt; vertex **vv = v+top-cnt; // for trial vertices find containing triangle, determine cavity // and reserve vertices on boundary of cavity // parallel_for (int j = 0; j < cnt; j++) { parallel_doall(int, j, 0, cnt) { vertex *u = knn.nearest(vv[j]); t[j] = find(vv[j],simplex(u->t,0)); reserveForInsert(vv[j],t[j],qs[j]); } parallel_doall_end // For trial vertices check if they own their boundary and // update mesh if so. flags[i] is 1 if failed (need to retry) // parallel_for (int j = 0; j < cnt; j++) { parallel_doall(int, j, 0, cnt) { flags[j] = insert(vv[j],t[j],qs[j]); } parallel_doall_end // Pack failed vertices back onto Q and successful // ones up above (needed for point location structure) int k = sequence::pack(vv,h,flags,cnt); // parallel_for (int j = 0; j < cnt; j++) flags[j] = !flags[j]; parallel_doall(int, j, 0, cnt) { flags[j] = !flags[j]; } parallel_doall_end sequence::pack(vv,h+k,flags,cnt); // parallel_for (int j = 0; j < cnt; j++) vv[j] = h[j]; parallel_doall(int, j, 0, cnt) { vv[j] = h[j]; } parallel_doall_end failed += k; top = top-cnt+k; // adjust top, accounting for failed vertices rounds++; } knn.del(); free(qqs); free(qs); free(t); free(flags); free(h); cout << "n=" << n << " Total retries=" << failed << " Total rounds=" << rounds << endl; }