예제 #1
0
double Classifier::getFilterError(string trainingDirectory, Annotations::Tag tag,
                                  ErrorType errorType) {
    Annotations annotations;
    Filter* filter = getFilter(tag);

    // read annotations
    string locationsFileName = trainingDirectory + "/" + Globals::annotationsFileName;
    annotations.readAnnotations(locationsFileName);

    // get the frames directory
    string framesDirectory = annotations.getFramesDirectory();

    // reset total
    double totalError = 0;

    // iterate over the set of all annotations
    vector<FrameAnnotation*>& frameAnnotations = annotations.getFrameAnnotations();
    for (unsigned int i = 0; i < frameAnnotations.size(); i++) {
        FrameAnnotation* fa = frameAnnotations[i];

        // get LOI
        CvPoint& location = fa->getLOI(tag);
        if (!location.x && !location.y)
            continue;

        // compose filename
        char buffer[256];
        sprintf(buffer, "frame_%d.png", fa->getFrameNumber());
        string fileName = framesDirectory + "/" + buffer;

        // load image
        IplImage* inputImg = cvLoadImage((const char*)fileName.c_str());
        if (!inputImg) {
            string err = "Filter::update. Cannot load file " + fileName + ".";
            throw (err);
        }
        IplImage* image = cvCreateImage(cvGetSize(inputImg), IPL_DEPTH_8U, 1);
        cvCvtColor(inputImg, image, CV_BGR2GRAY);

        // get the location of the left eye
        CvPoint offset;
        offset.x = offset.y = 0;
        IplImage* roi = (roiFunction)? roiFunction(image, *fa, offset, Annotations::Face) : 0;

        location.x -= offset.x;
        location.y -= offset.y;

        // apply filter
        fftw_complex* imageFFT = filter->preprocessImage((roi)? roi : image);
        IplImage* postFilterImg = filter->apply(imageFFT);

        // compute location
        double min;
        double max;
        CvPoint minLoc;
        CvPoint maxLoc;
        cvMinMaxLoc(postFilterImg, &min, &max, &minLoc, &maxLoc);

        // compute squared error as the distance between the location
        // found and the location as annotated
        double xdiff = abs(maxLoc.x - location.x);
        double ydiff = abs(maxLoc.y - location.y);

        switch (errorType) {
        case OneNorm:
            totalError += (xdiff + ydiff);
            break;
        case TwoNorm:
            totalError += sqrt(xdiff * xdiff + ydiff * ydiff);
            break;
        case MSE:
            totalError += (xdiff * xdiff + ydiff * ydiff);
            break;
        default:
            totalError += ((xdiff > ydiff)? xdiff : ydiff);
            break;
        }

        if (roi)
            cvReleaseImage(&roi);
        cvReleaseImage(&image);
        cvReleaseImage(&inputImg);
    }

    return totalError / frameAnnotations.size();
}
예제 #2
0
int Classifier::getZone(IplImage* frame, double& confidence, FrameAnnotation& fa) {
    if (!leftEye || !rightEye || !nose) {
        string err = "Classifier::getZone. Location extractors malformed.";
        throw (err);
    }

    // the roi offset
    CvPoint offset;

    // LOIs
    CvPoint leftEyeLocation;
    CvPoint rightEyeLocation;
    CvPoint noseLocation;

    // computing the confidence of the location identification
    double leftPSR;
    double rightPSR;
    double nosePSR;

    CvPoint center = fa.getLOI(Annotations::Face);
    if (!center.x || !center.y) {
        center.x = Globals::imgWidth / 2;
        center.y = Globals::imgHeight / 2;
        fa.setFace(center);
    }

    offset.x = offset.y = 0;
    IplImage* roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Face) : 0;

    // all location extractors do identical preprocessing. Therefore, preprocess
    // once using say the left eye extractor and re-use it for all three extractors
    fftw_complex* preprocessedImage = leftEye->getPreprocessedImage((roi)? roi : frame);

    #pragma omp parallel sections num_threads(2)
    {
        #pragma omp section
        {
            leftEye->setImage(preprocessedImage);
            leftEye->apply();
            leftEye->getMaxLocation(leftEyeLocation, leftPSR);
            leftEyeLocation.x += offset.x;
            leftEyeLocation.y += offset.y;
        }

        #pragma omp section
        {
            // get the location of the right eye
            rightEye->setImage(preprocessedImage);
            rightEye->apply();
            rightEye->getMaxLocation(rightEyeLocation, rightPSR);
            rightEyeLocation.x += offset.x;
            rightEyeLocation.y += offset.y;
        }
    }

    if (roi)
        cvReleaseImage(&roi);

    center.x = (leftEyeLocation.x + rightEyeLocation.x) / 2;
    center.y = leftEyeLocation.y + Globals::noseDrop;

    fa.setNose(center);

    offset.x = offset.y = 0;
    roi = (roiFunction)? roiFunction(frame, fa, offset, Annotations::Nose) : 0;

    // free the preprocessed image
    fftw_free(preprocessedImage);

    // all location extractors do identical preprocessing. Therefore, preprocess
    // once using say the left eye extractor and re-use it for all three extractors
    preprocessedImage = nose->getPreprocessedImage((roi)? roi : frame);

    // get the location of the nose
    nose->setImage(preprocessedImage);
    nose->apply();
    nose->getMaxLocation(noseLocation, nosePSR);
    noseLocation.x += offset.x;
    noseLocation.y += offset.y;

    // free the preprocessed image
    fftw_free(preprocessedImage);

    fa.setLeftIris(leftEyeLocation);
    fa.setRightIris(rightEyeLocation);
    fa.setNose(noseLocation);

    // we are done with the images at this point. Free roi if not zero
    if (roi)
        cvReleaseImage(&roi);

    //  cout << "Confidence (L, R, N) = (" << leftPSR << ", " <<
    //    rightPSR << ")" << endl;

    // extract features vector
    vector<double> data;
    for (int i = 0; i < nFeatures; i++) {
        double value = featureExtractors[i]->extract(&fa);
        data.push_back(value);
    }

    // normalize
    normalize(data);

    // create SVM Light objects to classify
    DOC* doc;
    WORD* words = (WORD*)malloc(sizeof(WORD) * (nFeatures + 1));

    for (int i = 0; i < nFeatures; i++) {
        words[i].wnum = featureExtractors[i]->getId();
        words[i].weight = data[i];
    }

    // SVM Light expects that the features vector has a zero element
    // to indicate termination and hence
    words[nFeatures].wnum = 0;
    words[nFeatures].weight = 0.0;

    // create doc
    string comment = "Gaze SVM";
    doc = create_example(-1, 0, 0, 0.0, create_svector(words, (char*)comment.c_str(), 1.0));

    int maxIndex = 0;
    confidence = -FLT_MAX;

    double dists[Globals::numZones];

    // classify using each zone model
    #pragma omp parallel for num_threads(Globals::numZones)
    for (unsigned int i = 0; i < Globals::numZones; i++) {
        if (kernelType == Trainer::Linear)
            dists[i] = classify_example_linear(models[i], doc);
        else
            dists[i] = classify_example(models[i], doc);
    }

    for (unsigned int i = 0; i < Globals::numZones; i++) {
        if (confidence < dists[i]) {
            confidence = dists[i];
            maxIndex = i + 1;
        }
    }

    free_example(doc, 1);
    free(words);

    return maxIndex;
}