void Annotations::createBins(Annotations::Tag tag) { // first set the useBins flag so that we return the binned annotations on // subsequent calls for annotations useBins = true; FrameAnnotation* fa; int min = INT_MAX; int max = INT_MIN; for (unsigned int i = 0; i < frameAnnotations.size(); i++) { fa = frameAnnotations[i]; int loi = 0; switch (tag) { case Face: loi = fa->getFace().x; break; case LeftEye: loi = fa->getLeftIris().x; break; case RightEye: loi = fa->getRightIris().x; break; case Nose: loi = fa->getNose().x; break; default: loi = (fa->getLeftIris().x + fa->getRightIris().x) / 2; break; } if (min > loi) min = loi; if (max < loi) max = loi; } // min is taken to be half a bin width to the left of leftmost // loi in the annotation data. Similarly for max min -= Globals::binWidth / 2; max += Globals::binWidth / 2; int width = max - min; int nBins = width / Globals::binWidth + 1; // create a counter for each bin and reset it int count[nBins]; for (int i = 0; i < nBins; i++) { vector<FrameAnnotation*>* bin = new vector<FrameAnnotation*>(); bins.push_back(bin); count[i] = 0; } // now iterate over all annotations and place them in their bin computed by their // distance from min, given our bin width. Simultaneously, compute the bin sizes for (unsigned int i = 0; i < frameAnnotations.size(); i++) { fa = frameAnnotations[i]; int loi = (fa->getLeftIris().x + fa->getRightIris().x) / 2; int index = (loi - min) / Globals::binWidth; bins[index]->push_back(fa); count[index]++; } // Get the smallest bin size int sampleSize = INT_MAX; for (int i = 0; i < nBins; i++) if (sampleSize > count[i] && count[i]) sampleSize = count[i]; // We now create a set of sampleSize * nBins frame annotations in the unif // vector for (int i = 0; i < nBins; i++) { // for now pick the first sampleSize elements from each bin // a uniformly distributed pick will happen later if needed for (int j = 0; j < sampleSize; j++) { if (bins[i]->size()) { unif.push_back(bins[i]->back()); bins[i]->pop_back(); } } // Now that we are done with bin i, destroy it delete bins[i]; } }
pair<double,string> Classifier::getError(string trainingDirectory) { Annotations annotations; // read annotations string locationsFileName = trainingDirectory + "/" + Globals::annotationsFileName; annotations.readAnnotations(locationsFileName); // get the frames directory string framesDirectory = annotations.getFramesDirectory(); double missclassified = 0; int counts[Globals::numZones]; int missCounts[Globals::numZones]; for (unsigned int i = 0; i < 3; i++) { counts[i] = 0; missCounts[i] = 0; } // iterate over the set of all annotations vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations(); for (unsigned int i = 0; i < frameAnnotations.size(); i++) { FrameAnnotation* fa = frameAnnotations[i]; int actualZone = fa->getZone(); if (actualZone < 3) actualZone = 1; else if (actualZone > 3) actualZone = 3; else actualZone = 2; counts[actualZone - 1]++; // compose filename char buffer[256]; sprintf(buffer, "frame_%d.png", fa->getFrameNumber()); string fileName = framesDirectory + "/" + buffer; // load image IplImage* inputImg = cvLoadImage((const char*)fileName.c_str()); double confidence; FrameAnnotation tf; int zone = getZone(inputImg, confidence, tf); if (zone < 3) zone = 1; else if (zone > 3) zone = 3; else zone = 2; if (zone != actualZone) { cout << "Classifier::getError. Expecting zone " << actualZone << " got zone " << zone << endl; missclassified++; missCounts[actualZone - 1]++; } cvReleaseImage(&inputImg); } int nAnnotations = frameAnnotations.size(); char buffer[Globals::largeBufferSize]; sprintf(buffer, "%d out of %d were miss-classified.", (int)missclassified, nAnnotations); string msg = buffer; sprintf(buffer, " Zones [%d, %d, %d].", counts[0], counts[1], counts[2]); msg += buffer; sprintf(buffer, " Missed [%d, %d, %d].", missCounts[0], missCounts[1], missCounts[2]); msg += buffer; return make_pair((missclassified / frameAnnotations.size()) * 100, msg); }
int Classifier::getZone(IplImage* frame, double& confidence, FrameAnnotation& fa) { if (!leftEye || !rightEye || !nose) { string err = "Classifier::getZone. Location extractors malformed."; throw (err); } // the roi offset CvPoint offset; // LOIs CvPoint leftEyeLocation; CvPoint rightEyeLocation; CvPoint noseLocation; // computing the confidence of the location identification double leftPSR; double rightPSR; double nosePSR; CvPoint center = fa.getLOI(Annotations::Face); if (!center.x || !center.y) { center.x = Globals::imgWidth / 2; center.y = Globals::imgHeight / 2; fa.setFace(center); } offset.x = offset.y = 0; IplImage* roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Face) : 0; // all location extractors do identical preprocessing. Therefore, preprocess // once using say the left eye extractor and re-use it for all three extractors fftw_complex* preprocessedImage = leftEye->getPreprocessedImage((roi)? roi : frame); #pragma omp parallel sections num_threads(2) { #pragma omp section { leftEye->setImage(preprocessedImage); leftEye->apply(); leftEye->getMaxLocation(leftEyeLocation, leftPSR); leftEyeLocation.x += offset.x; leftEyeLocation.y += offset.y; } #pragma omp section { // get the location of the right eye rightEye->setImage(preprocessedImage); rightEye->apply(); rightEye->getMaxLocation(rightEyeLocation, rightPSR); rightEyeLocation.x += offset.x; rightEyeLocation.y += offset.y; } } if (roi) cvReleaseImage(&roi); center.x = (leftEyeLocation.x + rightEyeLocation.x) / 2; center.y = leftEyeLocation.y + Globals::noseDrop; fa.setNose(center); offset.x = offset.y = 0; roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Nose) : 0; // free the preprocessed image fftw_free(preprocessedImage); // all location extractors do identical preprocessing. Therefore, preprocess // once using say the left eye extractor and re-use it for all three extractors preprocessedImage = nose->getPreprocessedImage((roi)? roi : frame); // get the location of the nose nose->setImage(preprocessedImage); nose->apply(); nose->getMaxLocation(noseLocation, nosePSR); noseLocation.x += offset.x; noseLocation.y += offset.y; // free the preprocessed image fftw_free(preprocessedImage); fa.setLeftIris(leftEyeLocation); fa.setRightIris(rightEyeLocation); fa.setNose(noseLocation); // we are done with the images at this point. Free roi if not zero if (roi) cvReleaseImage(&roi); // cout << "Confidence (L, R, N) = (" << leftPSR << ", " << // rightPSR << ")" << endl; // extract features vector vector<double> data; for (int i = 0; i < nFeatures; i++) { double value = featureExtractors[i]->extract(&fa); data.push_back(value); } // normalize normalize(data); // create SVM Light objects to classify DOC* doc; WORD* words = (WORD*)malloc(sizeof(WORD) * (nFeatures + 1)); for (int i = 0; i < nFeatures; i++) { words[i].wnum = featureExtractors[i]->getId(); words[i].weight = data[i]; } // SVM Light expects that the features vector has a zero element // to indicate termination and hence words[nFeatures].wnum = 0; words[nFeatures].weight = 0.0; // create doc string comment = "Gaze SVM"; doc = create_example(-1, 0, 0, 0.0, create_svector(words, (char*)comment.c_str(), 1.0)); int maxIndex = 0; confidence = -FLT_MAX; double dists[Globals::numZones]; // classify using each zone model #pragma omp parallel for num_threads(Globals::numZones) for (unsigned int i = 0; i < Globals::numZones; i++) { if (kernelType == Trainer::Linear) dists[i] = classify_example_linear(models[i], doc); else dists[i] = classify_example(models[i], doc); } for (unsigned int i = 0; i < Globals::numZones; i++) { if (confidence < dists[i]) { confidence = dists[i]; maxIndex = i + 1; } } free_example(doc, 1); free(words); return maxIndex; }
double Classifier::getFilterError(string trainingDirectory, Annotations::Tag tag, ErrorType errorType) { Annotations annotations; Filter* filter = getFilter(tag); // read annotations string locationsFileName = trainingDirectory + "/" + Globals::annotationsFileName; annotations.readAnnotations(locationsFileName); // get the frames directory string framesDirectory = annotations.getFramesDirectory(); // reset total double totalError = 0; // iterate over the set of all annotations vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations(); for (unsigned int i = 0; i < frameAnnotations.size(); i++) { FrameAnnotation* fa = frameAnnotations[i]; // get LOI CvPoint& location = fa->getLOI(tag); if (!location.x && !location.y) continue; // compose filename char buffer[256]; sprintf(buffer, "frame_%d.png", fa->getFrameNumber()); string fileName = framesDirectory + "/" + buffer; // load image IplImage* inputImg = cvLoadImage((const char*)fileName.c_str()); if (!inputImg) { string err = "Filter::update. Cannot load file " + fileName + "."; throw (err); } IplImage* image = cvCreateImage(cvGetSize(inputImg), IPL_DEPTH_8U, 1); cvCvtColor(inputImg, image, CV_BGR2GRAY); // get the location of the left eye CvPoint offset; offset.x = offset.y = 0; IplImage* roi = (roiFunction)? roiFunction(image, *fa, offset, Annotations::Face) : 0; location.x -= offset.x; location.y -= offset.y; // apply filter fftw_complex* imageFFT = filter->preprocessImage((roi)? roi : image); IplImage* postFilterImg = filter->apply(imageFFT); // compute location double min; double max; CvPoint minLoc; CvPoint maxLoc; cvMinMaxLoc(postFilterImg, &min, &max, &minLoc, &maxLoc); // compute squared error as the distance between the location // found and the location as annotated double xdiff = abs(maxLoc.x - location.x); double ydiff = abs(maxLoc.y - location.y); switch (errorType) { case OneNorm: totalError += (xdiff + ydiff); break; case TwoNorm: totalError += sqrt(xdiff * xdiff + ydiff * ydiff); break; case MSE: totalError += (xdiff * xdiff + ydiff * ydiff); break; default: totalError += ((xdiff > ydiff)? xdiff : ydiff); break; } if (roi) cvReleaseImage(&roi); cvReleaseImage(&image); cvReleaseImage(&inputImg); } return totalError / frameAnnotations.size(); }
int main(int argc, char** argv) { string modelsFileName = ""; string imageFileName = ""; string imageDirectory = ""; int x = Globals::imgWidth / 2; int y = Globals::imgHeight / 2; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "-i")) imageFileName = argv[i + 1]; else if (!strcmp(argv[i], "-d")) imageDirectory = argv[i + 1]; else if (!strcmp(argv[i], "-m")) modelsFileName = argv[i + 1]; else if (!strcmp(argv[i], "-c")) { char* str = argv[i + 1]; char* token = strtok(str, "(),"); if (token) x = atoi(token); token = strtok(NULL, "(),"); if (token) y = atoi(token); } else if (!strcmp(argv[i], "-h")) { cout << "Usage: classify -i <imageFileName> -m <modelsFileName>" << endl; return 0; } } if (modelsFileName == "") { cout << "Usage: classify -i <imageFileName> -m <modelsFileName>" << endl; return -1; } CvPoint center; center.x = x; center.y = y; CvSize size; size.width = Globals::roiWidth; size.height = Globals::roiHeight; double scale = 0.3; try { loadModel(modelsFileName); if (imageFileName != "") { Preprocess preprocess(size, scale, center, roiFunction); IplImage* image = cvLoadImage(imageFileName.c_str()); IplImage* imageVector = preprocess.generateImageVector(image); cout << "Sector " << classify(imageVector) << endl; cvReleaseImage(&image); cvReleaseImage(&imageVector); } else if (imageDirectory != "") { int counts[5][6]; for (int i = 0; i < 5; i++) for (int j = 0; j < 6; j++) counts[i][j] = 0; string annotationsFileName = imageDirectory + "/annotations.xml"; Annotations annotations; annotations.readAnnotations(annotationsFileName); CvPoint& center = annotations.getCenter(); Preprocess preprocess(size, scale, center, roiFunction); vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations(); for (unsigned int i = 0; i < frameAnnotations.size(); i++) { FrameAnnotation* fa = frameAnnotations[i]; fa->setFace(center); int expectedZone = fa->getSector(); counts[expectedZone - 1][5]++; // compose filename and update map char buffer[256]; sprintf(buffer, "frame_%d.png", fa->getFrameNumber()); string simpleName = buffer; string fileName = imageDirectory + "/" + simpleName; IplImage* image = cvLoadImage(fileName.c_str()); IplImage* imageVector = preprocess.generateImageVector(image); int zone = classify(imageVector); if (expectedZone == zone) counts[zone - 1][zone - 1]++; else counts[expectedZone - 1][zone - 1]++; cvReleaseImage(&image); cvReleaseImage(&imageVector); } cout << "Errors by class" << endl; for (int i = 0; i < 5; i++) { for (int j = 0; j < 6; j++) cout << counts[i][j] << "\t"; cout << endl; } } } catch (string err) { cout << err << endl; } return 0; }