void ActGraph::trainModel(const std::string featureFile) { // features cv::Mat stat; // mean_x, mean_y, var_x, var_y cv::Mat shapeXY; // track shapes cv::Mat mbhX; // MBHx cv::Mat mbhY; // MBHy // import features from file readFeatures(featureFile, stat, shapeXY, mbhX, mbhY); // GMM-clustering trainModel(stat, shapeXY, mbhX, mbhY); }
int main(int argc, char** argv) { if (argc < 3) { fprintf(stderr, "Usage %s input output\n", argv[0]); exit (-1); } featureValues* head = retriveFeatures(argv[1]); dumpFeatureInfo(head); aggInstFeature af[INST_TYPE_NUM]; readFeatures(af, argv[1]); int i; for (i=0; i<INST_TYPE_NUM; i++) { printf("%s:%u\n", InstTypeName[i], af[i].count); } return 0; }
int main (int argc, char** argv) { // const char trace = 0; // const unsigned debug = 0; const char *me = "main"; // parse and validate the command line options struct paramsStruct params; parseCommandLine(argc, argv, ¶ms); // create results directory in ANALYSIS directory // permissions are read, write for the owner char resultsDirectoryName[256] = ""; makeResultsDirectory(resultsDirectoryName, sizeof(resultsDirectoryName), ¶ms); // start logging char logFilePath[256] = ""; { const int outputLength = snprintf(logFilePath, sizeof(logFilePath), "%s/run.log", resultsDirectoryName); if (outputLength > (int) sizeof(logFilePath)) { fprintf(stderr, "%s: logFilePath too small", me); exit(1); } } Log_T log = Log_new(logFilePath, stderr); // log the command line parameters LOG(log,"started log file %s\n", logFilePath); LOG(log,"params: algo=%s\n", params.algo); LOG(log," : obs=%s\n", params.obs); LOG(log," : radius=%d\n", params.radius); LOG(log," : which=%s\n", params.which); // check the command line parameters assert(strcmp(params.algo, "knn") == 0); assert(strcmp(params.obs, "1A") == 0); // read the input files const unsigned nObservations = 217376; // adjust of OBS != 1A const unsigned nFeatures = 55; double *apns = readCsvNoHeader(nObservations, "aps.csv"); double *dates = readCsvNoHeader(nObservations, "date.csv"); char *featuresHeaderP; double *features = readFeatures(nObservations, nFeatures, &featuresHeaderP); double *prices = readCsvNoHeader(nObservations, "SALE-AMOUNT-log.csv"); // convert dates to days past the epoch unsigned dayStdColumn = 5; // the 6th column contains the standardized day value assert(columnHeaderEqual(featuresHeaderP, dayStdColumn, "day-std")); double *days = convertDatesToDays(nObservations, dates); free(dates); double mean; double stdv; determineMeanStdv(nObservations, days, &mean, &stdv); double *daysStd = standardize(nObservations, days, mean, stdv); replaceDay(nObservations, nFeatures, features, daysStd, dayStdColumn); free(days); free(daysStd); // generate one set of estimates FILE *resultFile; { char resultFilePath[256]; const int outputLength = snprintf(resultFilePath, sizeof(resultFilePath), "%s/estimates-laufer.csv", resultsDirectoryName); if (outputLength > (int) sizeof(resultFilePath)) { fprintf(stderr, "%s: resultFilePath too small", me); exit(1); } LOG(log, " result file path: %s\n", resultFilePath); resultFile = fopen(resultFilePath, "w"); } assert(resultFile); if (strcmp(params.which, "laufer")) createLaufer(nObservations, nFeatures, apns, dates, features, prices, log, resultFile); else assert(NULL != "logic error"); // OLD CODE BELOW THIS LINE #if 0 double **pricesHatP = NULL; if (params.useCache) pricesHatP = readCache(nObservations, params.obs, log, kMax); // determine estimated prices for any missing entries in the cache // this operation could be fast or very slow // MAYBE: write out cache periodically const unsigned cacheMutated = completeCache(nObservations, pricesHatP, params.obs, log, kMax, pricesP, debug); if (params.useCache && cacheMutated) writeCache(nObservations, pricesHatP, params.obs, log, kMax); // select which set of estimates to create if (paramsP->whichIsLaufer) createEstimatesLaufer(nObservations, nFeatures, features, dates, prices); else assert(false); // should never get here // pricesHatP[i][k] is // the estimate priced of transaction indexed i for k nearest neighbors // for each value of k, determine RMSE overall all the test transactions // determine kArgMin, the k providing the lowest RMSE // write CSV containing <k, rmse> values char resultFilePath[256]; { const int outputLength = snprintf(resultFilePath, sizeof(resultFilePath), "%s/k-rmse.csv", directoryName); if (outputLength > (int) sizeof(resultFilePath)) { fprintf(stderr, "%s: resultFilePath too small", me); exit(1); } LOG(log, " result file path: %s\n", resultFilePath); } FILE *resultFile = fopen(resultFilePath, "w"); assert(resultFile); // log best k for random sample of test observations bestK(0.01, nObservations, pricesHatP, pricesP, log, kMax); // write CSV header fprintf(resultFile, "k,rmse\n"); unsigned kArgMin = 0; double lowestRMSE = DBL_MAX; for (unsigned hpK = 0; hpK < kMax; hpK++) { // determine rmse for this k const double rmse = determineRmse(nObservations, pricesHatP, pricesP, hpK); // check if we have a new best k LOG(log, "hpK %u rmse %f\n", hpK + 1, rmse); fprintf(resultFile, "%u,%f\n", hpK + 1, rmse); if (rmse < lowestRMSE) { lowestRMSE = rmse; kArgMin = hpK; } } #endif // LOG(log, "%s\n", "finished"); exit(0); }