示例#1
0
void ActGraph::trainModel(const std::string featureFile)
{
    // features
    cv::Mat stat;        // mean_x, mean_y, var_x, var_y
    cv::Mat shapeXY;     // track shapes
    cv::Mat mbhX;        // MBHx
    cv::Mat mbhY;        // MBHy

    // import features from file
    readFeatures(featureFile, stat, shapeXY, mbhX, mbhY);

    // GMM-clustering
    trainModel(stat, shapeXY, mbhX, mbhY);
}
示例#2
0
int main(int argc, char** argv)
{

	if (argc < 3)
	{
		fprintf(stderr, "Usage %s input output\n", argv[0]);
		exit (-1);
	}

	featureValues* head = retriveFeatures(argv[1]);
	dumpFeatureInfo(head);

	aggInstFeature af[INST_TYPE_NUM];
	readFeatures(af, argv[1]);
	
	int i;
	for (i=0; i<INST_TYPE_NUM; i++)
	{
		printf("%s:%u\n", InstTypeName[i], af[i].count);
	}

	return 0;
}
示例#3
0
int
main (int argc, char** argv)
{
  // const char trace = 0;
  // const unsigned debug = 0;
  const char *me = "main";

  // parse and validate the command line options
  struct paramsStruct params;
  parseCommandLine(argc, argv, &params);

  // create results directory in ANALYSIS directory
  // permissions are read, write for the owner
  char resultsDirectoryName[256] = "";
  makeResultsDirectory(resultsDirectoryName, 
                       sizeof(resultsDirectoryName), 
                       &params);

  // start logging
  char logFilePath[256] = "";
  {
    const int outputLength =  snprintf(logFilePath, 
                                       sizeof(logFilePath), 
                                       "%s/run.log", 
                                       resultsDirectoryName);
    if (outputLength > (int) sizeof(logFilePath)) {
      fprintf(stderr, "%s: logFilePath too small", me);
      exit(1);
    }
  }
  Log_T log = Log_new(logFilePath, stderr);

  // log the command line parameters
  LOG(log,"started log file %s\n", logFilePath);

  LOG(log,"params: algo=%s\n",          params.algo);
  LOG(log,"      : obs=%s\n",           params.obs); 
  LOG(log,"      : radius=%d\n",        params.radius);
  LOG(log,"      : which=%s\n",         params.which);
  
  // check the command line parameters
  assert(strcmp(params.algo, "knn") == 0); 
  assert(strcmp(params.obs, "1A") == 0);
 
  // read the input files
  const unsigned nObservations = 217376;  // adjust of OBS != 1A
  const unsigned nFeatures = 55;

  double *apns = readCsvNoHeader(nObservations, "aps.csv");
  double *dates = readCsvNoHeader(nObservations, "date.csv");
  char   *featuresHeaderP;
  double *features = readFeatures(nObservations, 
				  nFeatures,
				  &featuresHeaderP);
  double *prices = readCsvNoHeader(nObservations, "SALE-AMOUNT-log.csv");

  // convert dates to days past the epoch
  unsigned dayStdColumn = 5; // the 6th column contains the standardized day value
  assert(columnHeaderEqual(featuresHeaderP, dayStdColumn, "day-std"));  
  double *days = convertDatesToDays(nObservations, dates);
  free(dates);
  double mean;
  double stdv;
  determineMeanStdv(nObservations, days, &mean, &stdv);
  double *daysStd = standardize(nObservations, days, mean, stdv);
  replaceDay(nObservations, nFeatures, features, daysStd, dayStdColumn);
  free(days);
  free(daysStd);


  // generate one set of estimates
  FILE *resultFile;
  {
    char resultFilePath[256];
    const int outputLength = 
      snprintf(resultFilePath,
               sizeof(resultFilePath),
               "%s/estimates-laufer.csv",
               resultsDirectoryName);
    if (outputLength > (int) sizeof(resultFilePath)) {
      fprintf(stderr, "%s: resultFilePath too small", me);
      exit(1);
    }
    LOG(log, " result file path: %s\n", resultFilePath);
    resultFile = fopen(resultFilePath, "w");
  }
  assert(resultFile);

  if (strcmp(params.which, "laufer"))
    createLaufer(nObservations, nFeatures, 
		 apns, dates, features, prices,
		 log,
		 resultFile);
  else
    assert(NULL != "logic error");

  // OLD CODE BELOW THIS LINE
#if 0
  double **pricesHatP = NULL;
  if (params.useCache)
    pricesHatP = readCache(nObservations, params.obs, log, kMax);

  // determine estimated prices for any missing entries in the cache
  // this operation could be fast or very slow
  // MAYBE: write out cache periodically 
  const unsigned cacheMutated = completeCache(nObservations, 
                                              pricesHatP, 
                                              params.obs, 
                                              log, 
                                              kMax, 
                                              pricesP, 
                                              debug);

  if (params.useCache && cacheMutated)
    writeCache(nObservations, pricesHatP, params.obs, log, kMax);


  // select which set of estimates to create
  if (paramsP->whichIsLaufer)
    createEstimatesLaufer(nObservations, nFeatures,
			  features, dates, prices);
  else
    assert(false); // should never get here

  // pricesHatP[i][k] is
  // the estimate priced of transaction indexed i for k nearest neighbors

  // for each value of k, determine RMSE overall all the test transactions
  // determine kArgMin, the k providing the lowest RMSE
  // write CSV containing <k, rmse> values
  char resultFilePath[256];
  {
    const int outputLength = 
      snprintf(resultFilePath,
               sizeof(resultFilePath),
               "%s/k-rmse.csv",
               directoryName);
    if (outputLength > (int) sizeof(resultFilePath)) {
      fprintf(stderr, "%s: resultFilePath too small", me);
      exit(1);
    }
    LOG(log, " result file path: %s\n", resultFilePath);
  }
  FILE *resultFile = fopen(resultFilePath, "w");
  assert(resultFile);

  // log best k for random sample of test observations
  bestK(0.01, nObservations, pricesHatP, pricesP, log, kMax);

  // write CSV header 
  fprintf(resultFile, "k,rmse\n");
  unsigned kArgMin = 0;
  double lowestRMSE = DBL_MAX;
  for (unsigned hpK = 0; hpK < kMax; hpK++) {

    // determine rmse for this k
    const double rmse = determineRmse(nObservations, pricesHatP, pricesP, hpK);

    // check if we have a new best k
    LOG(log, "hpK %u rmse %f\n", hpK + 1, rmse);
    fprintf(resultFile, "%u,%f\n", hpK + 1, rmse);
    if (rmse < lowestRMSE) {
      lowestRMSE = rmse;
      kArgMin = hpK;
    }
  }

#endif
  // 
  LOG(log, "%s\n", "finished");

  exit(0);
}