/**
     * Get the prediction response for the given image.
     * @param originImage image, which should be predicted
     * @param resultLayer the name of the result layer
     * @param dataLayer   the name of the data layer
     * @param predictions the predictions
     */
    void CaffeClassifier::predict(std::vector<cv::Mat> originImages, std::vector<int> labels, string resultLayer,
                                  string dataLayer, vector<short> & predictions) {
        vector<Datum> vecDatum;

        for (int i = 0; i < originImages.size(); i++) {
            cv::Mat originImage = originImages[i];

            // resize image
            Mat image;
            if (originImage.cols != imageSize.width || originImage.rows != imageSize.height) {
                resize(originImage, image, imageSize);
            } else
                image = originImage;

            // check channels
            if (channels != image.channels()) {
                cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl;
                exit(1);
            }

            // mat to datum
            Datum datum;
            CVMatToDatum(image, &datum);
            datum.set_label(labels[i]);
            vecDatum.push_back(datum);
            image.release();
        }

        // get the data layer
        const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>> (caffeNet->layer_by_name(dataLayer));

        // push new image data
        memDataLayer->AddDatumVector(vecDatum);
        //memDataLayer->ExactNumBottomBlobs();

        // do forward pass
        vector<Blob<float>*> inputVec;
        caffeNet->Forward(inputVec);

        // get results
        const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(resultLayer);
        int batchSize = featureBlob->num();
        int dimFeatures = featureBlob->count() / batchSize;
//        std::cout << "Batch size is " << batchSize << "/ dim features is " << dimFeatures << std::endl;

        // get output from each channel
        for (int n = 0; n < batchSize; ++n) {
            float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(n);
            if (sizeof(fs) > 0) {
                vector<float> feature_vector(fs, fs + dimFeatures);
                predictions.insert(predictions.end(), feature_vector.begin(), feature_vector.end());
            }
        }

        // release data
        // for (Datum d : vecDatum) {
        //     d.release_data();
        // }
    }
    void CaffeClassifier::predictHeatMap(cv::Mat& inputImage, int label, string predictionLayer, string dataLayer, cv::Mat& heatMap) {
        const int IMAGE_SIZE = 227;
        const int BATCH_SIZE = 64;
        heatMap = cv::Mat(IMAGE_SIZE, IMAGE_SIZE, CV_32FC1, Scalar(26.932154));

        const int STEP_SIZE = 9;
        const int START_OFFSET = STEP_SIZE / 2;
        const int FILLER_SIZE = 50;

        cv::Scalar mean =  cv::mean(inputImage);

        std::vector<Point> middlePoints;
        for (int i = START_OFFSET; i < IMAGE_SIZE; i += STEP_SIZE) {
            for (int j = START_OFFSET; j < IMAGE_SIZE; j += STEP_SIZE) {
                middlePoints.push_back(Point(i, j));
            }
        }

        for (int i = 0; i < middlePoints.size(); i += BATCH_SIZE) {
            std::cout << (i * 100) / middlePoints.size() << "% " << std::flush;
            vector<Datum> vecDatum;

            for (int j = 0; j < BATCH_SIZE; ++j) {
                // do not go over the last middle point
                int index = min(static_cast<int>(middlePoints.size() - 1), i + j);
                Point p = middlePoints[index];
                cv::Mat image = inputImage.clone();

//                cv::Rect rect(Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)), Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE)));
//                cv::Mat subMat = image(rect);
//                cv::Scalar mean =  cv::mean(subMat);

                circle(image,
                       p,
                       FILLER_SIZE,
                       mean,
                       CV_FILLED);
//                rectangle(image,
//                          Point(max(0, p.x - FILLER_SIZE), max(0, p.y - FILLER_SIZE)),
//                          Point(min(IMAGE_SIZE - 1, p.x + FILLER_SIZE), min(IMAGE_SIZE - 1, p.y + FILLER_SIZE)),
//                          Scalar(0, 0, 0),
//                          CV_FILLED);
                std::ostringstream o;
                o << "/home/knub/Repositories/video-classification/nets/activity_recognition/caffenet/";
                o << index;
                o << "_heat.png";
                cv::imwrite(o.str(), image);

                // check channels
                if (channels != image.channels()) {
                    cerr << "Error: the channel number of input image is invalid for CNN classifier!" << endl;
                    exit(1);
                }

                // mat to datum
                Datum datum;
                CVMatToDatum(image, &datum);
                vecDatum.push_back(datum);
                image.release();
            }

            // get the data layer
            const caffe::shared_ptr<MemoryDataLayer<float>> memDataLayer = boost::static_pointer_cast<MemoryDataLayer<float>>(caffeNet->layer_by_name(dataLayer));

            // push new image data
            memDataLayer->AddDatumVector(vecDatum);

            // do forward pass
            vector<Blob<float>*> inputVec;
            caffeNet->Forward(inputVec);

            // get results
            const caffe::shared_ptr<Blob<float> > featureBlob = caffeNet->blob_by_name(predictionLayer);
            int dimFeatures = featureBlob->count() /  BATCH_SIZE; // 101
            assert(dimFeatures == 101);

            // get output from each input image
            for (int j = 0; j < BATCH_SIZE; ++j) {
                int index = min(static_cast<int>(middlePoints.size() - 1), i + j);
                Point p = middlePoints[index];

//                std::cout << "Channels: " << featureBlob->channels() << ", Count: " << featureBlob->count() << ", Width: " << featureBlob->width() << ", Height: " << featureBlob->height() << std::endl;

//                featureBlob = 64 x 101 matrix

                float* fs = featureBlob->mutable_cpu_data() + featureBlob->offset(j);
                vector<float> featureVector(fs, fs + dimFeatures);
//                std::vector<float>::iterator result = std::max_element(featureVector.begin(), featureVector.end());
//                int predicted = result - featureVector.begin();
//                std::cout << "Predicted: " << predicted << ", Actual: " << label << std::endl;
//                assert(predicted == label);

                float confidence = featureVector[label];


                rectangle(heatMap,
                    Point(p.x - START_OFFSET, p.y - START_OFFSET),
                    Point(p.x + START_OFFSET, p.y + START_OFFSET),
                    Scalar(confidence),
                    CV_FILLED);
            }
        }
    }
float CaffeFeatExtractor<Dtype>::extractBatch_singleFeat_1D(vector<cv::Mat> &images, int new_batch_size, vector< vector<Dtype> > &features)
{

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    vector<int> labels(images.size(), 0);

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size

    if (memory_data_layer->batch_size()!=new_batch_size)
    {
        if (images.size()%new_batch_size==0)
        {
            memory_data_layer->set_batch_size(new_batch_size);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
        else
        {
            if (images.size()%memory_data_layer->batch_size()==0)
            {
                cout << "WARNING: image number is not multiple of requested batch size,leaving the old one." << endl;
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            } else
            {
                cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
                memory_data_layer->set_batch_size(1);
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            }

        }

    } else
    {
        if (images.size()%memory_data_layer->batch_size()!=0)
        {
            cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
            memory_data_layer->set_batch_size(1);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
    }

    int num_batches = images.size()/new_batch_size;

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    for (int i=0; i<images.size(); i++)
    {
        if (images[i].rows != mean_height || images[i].cols != mean_height)
        {
            if (images[i].rows > mean_height || images[i].cols > mean_height)
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
            }
            else
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
            }
        }
    }

    memory_data_layer->AddMatVector(images,labels);

    size_t num_features = blob_names.size();
    if (num_features!=1)
    {
        cout<< "Error! The list of features to be extracted has not size one!" << endl;
        return -1;
    }

    // Run network and retrieve features!

    std::vector<Blob<Dtype>*> results;

    for (int b=0; b<num_batches; ++b)
    {
        results = feature_extraction_net->Forward();

        const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[0]);

        int batch_size = feature_blob->num();

        int feat_dim = feature_blob->count() / batch_size; // should be equal to: channels*width*height
        if (feat_dim!=feature_blob->channels())
        {
            cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl;
        }

        for (int i=0; i<batch_size; ++i)
        {
            features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(i), feature_blob->mutable_cpu_data() + feature_blob->offset(i) + feat_dim));
        }

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        float msecPerImage = msecTotal/(float)images.size();

        return msecPerImage;
    }
    else
    {
        return 0;
    }
}
float CaffeFeatExtractor<Dtype>::extract_multipleFeat_1D(cv::Mat &image, vector< vector<Dtype> > &features)
{

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    int label = 0;

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size to 1

    if (memory_data_layer->batch_size()!=1)
    {
        memory_data_layer->set_batch_size(1);
        cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
    }

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    if (image.rows != mean_height || image.cols != mean_height)
    {
        if (image.rows > mean_height || image.cols > mean_height)
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
        }
        else
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
        }
    }

    memory_data_layer->AddMatVector(vector<cv::Mat>(1, image),vector<int>(1,label));

    size_t num_features = blob_names.size();

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results = feature_extraction_net->Forward();

    for (int f = 0; f < num_features; ++f) {

        const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[f]);

        int batch_size = feature_blob->num(); // should be 1
        if (batch_size!=1)
        {
            cout << "Error! Retrieved more than one feature, exiting..." << endl;
            return -1;
        }

        int feat_dim = feature_blob->count(); // should be equal to: count/batch_size=channels*width*height
        if (feat_dim!=feature_blob->channels())
        {
            cout<< "Attention! The feature is not 1D: unrolling according to Caffe's order (i.e. channel, width, height)" << endl;
        }

        features.push_back(vector <Dtype>(feature_blob->mutable_cpu_data() + feature_blob->offset(0), feature_blob->mutable_cpu_data() + feature_blob->offset(0) + feat_dim));

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        return msecTotal;
    }
    else
    {
        return 0;
    }

}
float CaffeFeatExtractor<Dtype>::extractBatch_multipleFeat(vector<cv::Mat> &images, int new_batch_size, vector< Blob<Dtype>* > &features) {

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize labels to zero
    vector<int> labels(images.size(), 0);

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size

    if (memory_data_layer->batch_size()!=new_batch_size)
    {
        if (images.size()%new_batch_size==0)
        {
            memory_data_layer->set_batch_size(new_batch_size);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
        else
        {
            if (images.size()%memory_data_layer->batch_size()==0)
            {
                cout << "WARNING: image number is not multiple of requested batch size, leaving the old one..." << endl;
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            } else
            {
                cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl;
                memory_data_layer->set_batch_size(1);
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            }

        }

    } else
    {
        if (images.size()%memory_data_layer->batch_size()!=0)
        {
            cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)..." << endl;
            memory_data_layer->set_batch_size(1);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
    }

    int num_batches = images.size()/new_batch_size;

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized anisotropically (BILINEAR)
    // if it is downsampled, LANCZOS4 is used for antialiasing

    for (int i=0; i<images.size(); i++)
    {
        if (images[i].rows != mean_height || images[i].cols != mean_height)
        {
            if (images[i].rows > mean_height || images[i].cols > mean_height)
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
            }
            else
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
            }
        }
    }

    memory_data_layer->AddMatVector(images,labels);

    size_t num_features = blob_names.size();

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results;

    for (int b=0; b<num_batches; b++)
    {
        results = feature_extraction_net->Forward();

        for (int i = 0; i < num_features; ++i) {

            const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[i]);

            int batch_size = feature_blob->num();
            int channels = feature_blob->channels();
            int width = feature_blob->width();
            int height = feature_blob->height();

            features.push_back(new Blob<Dtype>(batch_size, channels, height, width));

            features.back()->CopyFrom(*feature_blob);
        }

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        float msecPerImage = msecTotal/(float)images.size();

        return msecPerImage;
    }
    else
    {
        return 0;
    }

}