float CaffeFeatExtractor<Dtype>::extract_singleFeat(cv::Mat &image, Blob<Dtype> *features)
{

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize label to zero
    int label = 0;

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size to 1

    if (memory_data_layer->batch_size()!=1)
    {
        memory_data_layer->set_batch_size(1);
        cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
    }

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    if (image.rows != mean_height || image.cols != mean_height)
    {
        if (image.rows > mean_height || image.cols > mean_height)
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
        }
        else
        {
            cv::resize(image, image, cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
        }
    }

    memory_data_layer->AddMatVector(vector<cv::Mat>(1, image),vector<int>(1,label));

    size_t num_features = blob_names.size();
    if(num_features!=1)
    {
        cout<< "Error! The list of features to be extracted has not size one!" << endl;
        return -1;
    }

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results = feature_extraction_net->Forward();

    const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[0]);

    int batch_size = feature_blob->num(); // should be 1
    if (batch_size!=1)
    {
        cout << "Error! Retrieved more than one feature, exiting..." << endl;
        return -1;
    }

    int channels = feature_blob->channels();
    int width = feature_blob->width();
    int height = feature_blob->height();

    if (features==NULL)
    {
        features = new Blob<Dtype>(1, channels, height, width);
    } else
    {
        features->Reshape(1, channels, height, width);
    }

    features->CopyFrom(*feature_blob);

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        return msecTotal;
    }
    else
    {
        return 0;
    }
}
float CaffeFeatExtractor<Dtype>::extractBatch_singleFeat(vector<cv::Mat> &images, int new_batch_size, vector< Blob<Dtype>* > &features) {

    // Set the GPU/CPU mode for Caffe (here in order to be thread-safe)
    if (gpu_mode)
    {
        Caffe::set_mode(Caffe::GPU);
        Caffe::SetDevice(device_id);
    }
    else
    {
        Caffe::set_mode(Caffe::CPU);
    }

    cudaEvent_t start, stop;

    if (timing)
    {
        cudaEventCreate(&start);
        cudaEventCreate(&stop);
        cudaEventRecord(start, NULL);
    }

    // Initialize the labels to zero
    vector<int> labels(images.size(), 0);

    // Get pointer to data layer to set the input
    caffe::shared_ptr<MemoryDataLayer<Dtype> > memory_data_layer = boost::dynamic_pointer_cast<caffe::MemoryDataLayer<Dtype> >(feature_extraction_net->layers()[0]);

    // Set batch size

    if (memory_data_layer->batch_size()!=new_batch_size)
    {
        if (images.size()%new_batch_size==0)
        {
            memory_data_layer->set_batch_size(new_batch_size);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
        else
        {
            if (images.size()%memory_data_layer->batch_size()==0)
            {
                cout << "WARNING: image number is not multiple of requested batch size, leaving the old one." << endl;
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            } else
            {
                cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
                memory_data_layer->set_batch_size(1);
                cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
            }

        }

    } else
    {
        if (images.size()%memory_data_layer->batch_size()!=0)
        {
            cout << "WARNING: image number is not multiple of batch size, setting it to 1 (performance issue)." << endl;
            memory_data_layer->set_batch_size(1);
            cout << "BATCH SIZE = " << memory_data_layer->batch_size() << endl;
        }
    }

    int num_batches = images.size()/new_batch_size;

    // Input preprocessing

    // The image passed to AddMatVector must be same size as the mean image
    // If not, it is resized:
    // if it is downsampled, an anti-aliasing Gaussian Filter is applied

    for (int i=0; i<images.size(); i++)
    {
        if (images[i].rows != mean_height || images[i].cols != mean_height)
        {
            if (images[i].rows > mean_height || images[i].cols > mean_height)
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LANCZOS4);
            }
            else
            {
                cv::resize(images[i], images[i], cv::Size(mean_height, mean_width), 0, 0, CV_INTER_LINEAR);
            }
        }
    }

    memory_data_layer->AddMatVector(images,labels);

    size_t num_features = blob_names.size();
    if (num_features!=1)
    {
        cout<< "Error! The list of features to be extracted has not size one!" << endl;
        return -1;
    }

    // Run network and retrieve features!

    // depending on your net's architecture, the blobs will hold accuracy and/or labels, etc
    std::vector<Blob<Dtype>*> results;

    for (int b=0; b<num_batches; b++)
    {
        results = feature_extraction_net->Forward();

        const caffe::shared_ptr<Blob<Dtype> > feature_blob = feature_extraction_net->blob_by_name(blob_names[0]);

        int batch_size = feature_blob->num();
        int channels = feature_blob->channels();
        int width = feature_blob->width();
        int height = feature_blob->height();

        features.push_back(new Blob<Dtype>(batch_size, channels, height, width));

        features.back()->CopyFrom(*feature_blob);

    }

    if (timing)
    {
        // Record the stop event
        cudaEventRecord(stop, NULL);

        // Wait for the stop event to complete
        cudaEventSynchronize(stop);

        float msecTotal = 0.0f;
        cudaEventElapsedTime(&msecTotal, start, stop);

        float msecPerImage = msecTotal/(float)images.size();

        return msecPerImage;
    }
    else
    {
        return 0;
    }
}