コード例 #1
0
ファイル: Annotations.cpp プロジェクト: chagge/MosseFilters
void Annotations::createBins(Annotations::Tag tag) {
  // first set the useBins flag so that we return the binned annotations on
  // subsequent calls for annotations
  useBins = true;

  FrameAnnotation* fa;
  int min = INT_MAX;
  int max = INT_MIN;
  for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
    fa = frameAnnotations[i];
    int loi = 0;
    switch (tag) {
    case Face:
      loi = fa->getFace().x;
      break;
    case LeftEye:
      loi = fa->getLeftIris().x;
      break;
    case RightEye:
      loi = fa->getRightIris().x;
      break;
    case Nose:
      loi = fa->getNose().x;
      break;
    default:
      loi = (fa->getLeftIris().x + fa->getRightIris().x) / 2;
      break;
    }
    if (min > loi)
      min = loi;
    if (max < loi)
      max = loi;
  }

  // min is taken to be half a bin width to the left of leftmost 
  // loi in the annotation data. Similarly for max
  min -= Globals::binWidth / 2;
  max += Globals::binWidth / 2;

  int width = max - min;
  int nBins = width / Globals::binWidth + 1;

  // create a counter for each bin and reset it
  int count[nBins];
  for (int i = 0; i < nBins; i++) {
    vector<FrameAnnotation*>* bin = new vector<FrameAnnotation*>();
    bins.push_back(bin);
    count[i] = 0;
  }

  // now iterate over all annotations and place them in their bin computed by their
  // distance from min, given our bin width. Simultaneously, compute the bin sizes
  for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
    fa = frameAnnotations[i];
    int loi = (fa->getLeftIris().x + fa->getRightIris().x) / 2;
    int index = (loi - min) / Globals::binWidth;
    bins[index]->push_back(fa);
    count[index]++;
  }

  // Get the smallest bin size
  int sampleSize = INT_MAX;
  for (int i = 0; i < nBins; i++)
    if (sampleSize > count[i] && count[i])
      sampleSize = count[i];

  // We now create a set of sampleSize * nBins frame annotations in the unif
  // vector
  for (int i = 0; i < nBins; i++) {
    // for now pick the first sampleSize elements from each bin
    // a uniformly distributed pick will happen later if needed
    for (int j = 0; j < sampleSize; j++) {
      if (bins[i]->size()) {
	unif.push_back(bins[i]->back());
	bins[i]->pop_back();
      }
    }
    // Now that we are done with bin i, destroy it
    delete bins[i];
  }
}
コード例 #2
0
ファイル: Classifier.cpp プロジェクト: hemprasad/MosseFilters
pair<double,string> Classifier::getError(string trainingDirectory) {
    Annotations annotations;

    // read annotations
    string locationsFileName = trainingDirectory + "/" + Globals::annotationsFileName;
    annotations.readAnnotations(locationsFileName);

    // get the frames directory
    string framesDirectory = annotations.getFramesDirectory();

    double missclassified = 0;

    int counts[Globals::numZones];
    int missCounts[Globals::numZones];
    for (unsigned int i = 0; i < 3; i++) {
        counts[i] = 0;
        missCounts[i] = 0;
    }

    // iterate over the set of all annotations
    vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations();
    for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
        FrameAnnotation* fa = frameAnnotations[i];

        int actualZone = fa->getZone();
        if (actualZone < 3)
            actualZone = 1;
        else if (actualZone > 3)
            actualZone = 3;
        else
            actualZone = 2;
        counts[actualZone - 1]++;

        // compose filename
        char buffer[256];
        sprintf(buffer, "frame_%d.png", fa->getFrameNumber());
        string fileName = framesDirectory + "/" + buffer;

        // load image
        IplImage* inputImg = cvLoadImage((const char*)fileName.c_str());

        double confidence;
        FrameAnnotation tf;
        int zone = getZone(inputImg, confidence, tf);
        if (zone < 3)
            zone = 1;
        else if (zone > 3)
            zone = 3;
        else
            zone = 2;

        if (zone != actualZone) {
            cout << "Classifier::getError. Expecting zone " << actualZone <<
                 " got zone " << zone << endl;
            missclassified++;
            missCounts[actualZone - 1]++;
        }

        cvReleaseImage(&inputImg);
    }

    int nAnnotations = frameAnnotations.size();
    char buffer[Globals::largeBufferSize];
    sprintf(buffer, "%d out of %d were miss-classified.", (int)missclassified, nAnnotations);
    string msg = buffer;
    sprintf(buffer, " Zones [%d, %d, %d].", counts[0], counts[1], counts[2]);
    msg += buffer;
    sprintf(buffer, " Missed [%d, %d, %d].", missCounts[0], missCounts[1], missCounts[2]);
    msg += buffer;

    return make_pair((missclassified / frameAnnotations.size()) * 100, msg);
}
コード例 #3
0
ファイル: Classifier.cpp プロジェクト: hemprasad/MosseFilters
int Classifier::getZone(IplImage* frame, double& confidence, FrameAnnotation& fa) {
    if (!leftEye || !rightEye || !nose) {
        string err = "Classifier::getZone. Location extractors malformed.";
        throw (err);
    }

    // the roi offset
    CvPoint offset;

    // LOIs
    CvPoint leftEyeLocation;
    CvPoint rightEyeLocation;
    CvPoint noseLocation;

    // computing the confidence of the location identification
    double leftPSR;
    double rightPSR;
    double nosePSR;

    CvPoint center = fa.getLOI(Annotations::Face);
    if (!center.x || !center.y) {
        center.x = Globals::imgWidth / 2;
        center.y = Globals::imgHeight / 2;
        fa.setFace(center);
    }

    offset.x = offset.y = 0;
    IplImage* roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Face) : 0;

    // all location extractors do identical preprocessing. Therefore, preprocess
    // once using say the left eye extractor and re-use it for all three extractors
    fftw_complex* preprocessedImage = leftEye->getPreprocessedImage((roi)? roi : frame);

    #pragma omp parallel sections num_threads(2)
    {
        #pragma omp section
        {
            leftEye->setImage(preprocessedImage);
            leftEye->apply();
            leftEye->getMaxLocation(leftEyeLocation, leftPSR);
            leftEyeLocation.x += offset.x;
            leftEyeLocation.y += offset.y;
        }

        #pragma omp section
        {
            // get the location of the right eye
            rightEye->setImage(preprocessedImage);
            rightEye->apply();
            rightEye->getMaxLocation(rightEyeLocation, rightPSR);
            rightEyeLocation.x += offset.x;
            rightEyeLocation.y += offset.y;
        }
    }

    if (roi)
        cvReleaseImage(&roi);

    center.x = (leftEyeLocation.x + rightEyeLocation.x) / 2;
    center.y = leftEyeLocation.y + Globals::noseDrop;

    fa.setNose(center);

    offset.x = offset.y = 0;
    roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Nose) : 0;

    // free the preprocessed image
    fftw_free(preprocessedImage);

    // all location extractors do identical preprocessing. Therefore, preprocess
    // once using say the left eye extractor and re-use it for all three extractors
    preprocessedImage = nose->getPreprocessedImage((roi)? roi : frame);

    // get the location of the nose
    nose->setImage(preprocessedImage);
    nose->apply();
    nose->getMaxLocation(noseLocation, nosePSR);
    noseLocation.x += offset.x;
    noseLocation.y += offset.y;

    // free the preprocessed image
    fftw_free(preprocessedImage);

    fa.setLeftIris(leftEyeLocation);
    fa.setRightIris(rightEyeLocation);
    fa.setNose(noseLocation);

    // we are done with the images at this point. Free roi if not zero
    if (roi)
        cvReleaseImage(&roi);

    //  cout << "Confidence (L, R, N) = (" << leftPSR << ", " <<
    //    rightPSR << ")" << endl;

    // extract features vector
    vector<double> data;
    for (int i = 0; i < nFeatures; i++) {
        double value = featureExtractors[i]->extract(&fa);
        data.push_back(value);
    }

    // normalize
    normalize(data);

    // create SVM Light objects to classify
    DOC* doc;
    WORD* words = (WORD*)malloc(sizeof(WORD) * (nFeatures + 1));

    for (int i = 0; i < nFeatures; i++) {
        words[i].wnum = featureExtractors[i]->getId();
        words[i].weight = data[i];
    }

    // SVM Light expects that the features vector has a zero element
    // to indicate termination and hence
    words[nFeatures].wnum = 0;
    words[nFeatures].weight = 0.0;

    // create doc
    string comment = "Gaze SVM";
    doc = create_example(-1, 0, 0, 0.0, create_svector(words, (char*)comment.c_str(), 1.0));

    int maxIndex = 0;
    confidence = -FLT_MAX;

    double dists[Globals::numZones];

    // classify using each zone model
    #pragma omp parallel for num_threads(Globals::numZones)
    for (unsigned int i = 0; i < Globals::numZones; i++) {
        if (kernelType == Trainer::Linear)
            dists[i] = classify_example_linear(models[i], doc);
        else
            dists[i] = classify_example(models[i], doc);
    }

    for (unsigned int i = 0; i < Globals::numZones; i++) {
        if (confidence < dists[i]) {
            confidence = dists[i];
            maxIndex = i + 1;
        }
    }

    free_example(doc, 1);
    free(words);

    return maxIndex;
}
コード例 #4
0
ファイル: Classifier.cpp プロジェクト: hemprasad/MosseFilters
double Classifier::getFilterError(string trainingDirectory, Annotations::Tag tag,
                                  ErrorType errorType) {
    Annotations annotations;
    Filter* filter = getFilter(tag);

    // read annotations
    string locationsFileName = trainingDirectory + "/" + Globals::annotationsFileName;
    annotations.readAnnotations(locationsFileName);

    // get the frames directory
    string framesDirectory = annotations.getFramesDirectory();

    // reset total
    double totalError = 0;

    // iterate over the set of all annotations
    vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations();
    for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
        FrameAnnotation* fa = frameAnnotations[i];

        // get LOI
        CvPoint& location = fa->getLOI(tag);
        if (!location.x && !location.y)
            continue;

        // compose filename
        char buffer[256];
        sprintf(buffer, "frame_%d.png", fa->getFrameNumber());
        string fileName = framesDirectory + "/" + buffer;

        // load image
        IplImage* inputImg = cvLoadImage((const char*)fileName.c_str());
        if (!inputImg) {
            string err = "Filter::update. Cannot load file " + fileName + ".";
            throw (err);
        }
        IplImage* image = cvCreateImage(cvGetSize(inputImg), IPL_DEPTH_8U, 1);
        cvCvtColor(inputImg, image, CV_BGR2GRAY);

        // get the location of the left eye
        CvPoint offset;
        offset.x = offset.y = 0;
        IplImage* roi = (roiFunction)? roiFunction(image, *fa, offset, Annotations::Face) : 0;

        location.x -= offset.x;
        location.y -= offset.y;

        // apply filter
        fftw_complex* imageFFT = filter->preprocessImage((roi)? roi : image);
        IplImage* postFilterImg = filter->apply(imageFFT);

        // compute location
        double min;
        double max;
        CvPoint minLoc;
        CvPoint maxLoc;
        cvMinMaxLoc(postFilterImg, &min, &max, &minLoc, &maxLoc);

        // compute squared error as the distance between the location
        // found and the location as annotated
        double xdiff = abs(maxLoc.x - location.x);
        double ydiff = abs(maxLoc.y - location.y);

        switch (errorType) {
        case OneNorm:
            totalError += (xdiff + ydiff);
            break;
        case TwoNorm:
            totalError += sqrt(xdiff * xdiff + ydiff * ydiff);
            break;
        case MSE:
            totalError += (xdiff * xdiff + ydiff * ydiff);
            break;
        default:
            totalError += ((xdiff > ydiff)? xdiff : ydiff);
            break;
        }

        if (roi)
            cvReleaseImage(&roi);
        cvReleaseImage(&image);
        cvReleaseImage(&inputImg);
    }

    return totalError / frameAnnotations.size();
}
コード例 #5
0
ファイル: classify.cpp プロジェクト: chagge/DeepLearning
int main(int argc, char** argv) {
  string modelsFileName = "";
  string imageFileName = "";
  string imageDirectory = "";
  int x = Globals::imgWidth / 2;
  int y = Globals::imgHeight / 2;

  for (int i = 1; i < argc; i++) {
    if (!strcmp(argv[i], "-i"))
      imageFileName = argv[i + 1];
    else if (!strcmp(argv[i], "-d"))
      imageDirectory = argv[i + 1];
    else if (!strcmp(argv[i], "-m"))
      modelsFileName = argv[i + 1];
    else if (!strcmp(argv[i], "-c")) {
      char* str = argv[i + 1];
      char* token = strtok(str, "(),");
      if (token)
	x = atoi(token);
      token = strtok(NULL, "(),");
      if (token)
	y = atoi(token);
    }
    else if (!strcmp(argv[i], "-h")) {
      cout << 
	"Usage: classify -i <imageFileName> -m <modelsFileName>" << endl;
      return 0;
    }
  }

  if (modelsFileName == "") {
    cout << 
      "Usage: classify -i <imageFileName> -m <modelsFileName>" << endl;
    return -1;
  }

  CvPoint center;
  center.x = x;
  center.y = y;

  CvSize size;
  size.width = Globals::roiWidth;
  size.height = Globals::roiHeight;

  double scale = 0.3;

  try {
    loadModel(modelsFileName);

    if (imageFileName != "") {
      Preprocess preprocess(size, scale, center, roiFunction);
      IplImage* image = cvLoadImage(imageFileName.c_str());
      IplImage* imageVector = preprocess.generateImageVector(image);

      cout << "Sector " << classify(imageVector) << endl;
      cvReleaseImage(&image);
      cvReleaseImage(&imageVector);
    } else if (imageDirectory != "") {
      int counts[5][6];
      for (int i = 0; i < 5; i++)
	for (int j = 0; j < 6; j++)
	  counts[i][j] = 0;

      string annotationsFileName = imageDirectory + "/annotations.xml";
      Annotations annotations;
      annotations.readAnnotations(annotationsFileName);
      CvPoint& center = annotations.getCenter();

      Preprocess preprocess(size, scale, center, roiFunction);
      vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations();
      for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
	FrameAnnotation* fa = frameAnnotations[i];
	fa->setFace(center);

	int expectedZone = fa->getSector();
	counts[expectedZone - 1][5]++;

	// compose filename and update map
	char buffer[256];
	sprintf(buffer, "frame_%d.png", fa->getFrameNumber());
	string simpleName = buffer;
	string fileName = imageDirectory + "/" + simpleName;
	IplImage* image = cvLoadImage(fileName.c_str());
	IplImage* imageVector = preprocess.generateImageVector(image);

	int zone = classify(imageVector);
	if (expectedZone == zone)
	  counts[zone - 1][zone - 1]++;
	else
	  counts[expectedZone - 1][zone - 1]++;

	cvReleaseImage(&image);
	cvReleaseImage(&imageVector);
      }
      cout << "Errors by class" << endl;
      for (int i = 0; i < 5; i++) {
	for (int j = 0; j < 6; j++)
	  cout << counts[i][j] << "\t";
	cout << endl;
      }
    }
  } catch (string err) {
    cout << err << endl;
  }

  return 0;
}