void CudaSynchronizedMemory<T>::pullFromDeviceAsync(cudaStream_t stream, size_t nElements)
 {
   ASRL_ASSERT_GT(m_size,0, "The array is empty");
   ASRL_ASSERT(m_pageLocked, "Asynchronous transfer is only valid for page-locked host memory");
   if(nElements > m_size)
     nElements = m_size;
   cudaError_t err = (cudaMemcpyAsync((void*) m_host, (void *)m_device, nElements*sizeof(T), cudaMemcpyDeviceToHost, stream));
   ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to copy " << typeid(T).name() << " array of size " << m_size << " from device. Stream " << stream << ": (" << err << "): " << cudaGetErrorString(err));
 }
  void GpuSurfDetectorInternal::saveIntegralImage(std::string const & basename)
  {
    float * iimg = m_intImg->h_get();
    std::stringstream sout;
    sout << basename << "-iimg.bin";
    std::ofstream fout(sout.str().c_str(),std::ios::binary);
    ASRL_ASSERT(fout.good(),"Unable to open file \"" << sout.str() << "\" for writing");
    int size[2];
    size[0] = m_intImg->width();
    size[1] = m_intImg->height();
    fout.write(reinterpret_cast<const char *>(&size[0]),2*sizeof(int));
    fout.write(reinterpret_cast<const char *>(iimg),m_intImg->width()*m_intImg->height() * sizeof(float));

  }