void MeshBufferReader::setMeshPyramid()
{
    TICK("setupMeshBufferRendering");
    
    visibilityMaskPyramid.resize(m_nNumMeshLevels);
    outputInfoPyramid.resize(m_nNumMeshLevels);
    outputPropPyramid.resize(m_nNumMeshLevels);

    for(int i = 0; i < m_nNumMeshLevels; ++i)
    {
        int numVertices = currentMeshPyramid.levels[i].numVertices;
        visibilityMaskPyramid[i].resize(numVertices,true);

        vector<CoordinateType> proj2D;
        proj2D.resize(2); proj2D[0] = 0; proj2D[1] = 0;

        outputInfoPyramid[i].meshData = std::move(currentMeshPyramid.levels[i]);
        // outputInfoPyramid[i].meshDataGT = currentMeshPyramid.levels[i];
        // outputInfoPyramid[i].meshDataColorDiff = currentMeshPyramid.levels[i];
        outputInfoPyramid[i].nRenderLevel = i;
        outputInfoPyramid[i].meshProj.resize(numVertices, proj2D);
        // outputInfoPyramid[i].meshProjGT = outputInfoPyramid[i].meshProj;
        outputInfoPyramid[i].visibilityMask.resize(numVertices, true);
        memset(outputInfoPyramid[i].camPose, 0, 6*sizeof(double));

        UpdateRenderingData(outputInfoPyramid[i], KK, camPose, outputInfoPyramid[i].meshData);

        //////////////////////////// outputPropPyramid

        if(meshLoadingSettings.loadProp)
        {
            outputPropPyramid[i].meshData = std::move(propMeshPyramid.levels[i]);
            // outputPropPyramid[i].meshDataGT = propMeshPyramid.levels[i];
            // outputPropPyramid[i].meshDataColorDiff = propMeshPyramid.levels[i];
            outputPropPyramid[i].nRenderLevel = i;
            outputPropPyramid[i].meshProj.resize(numVertices, proj2D);
            // outputPropPyramid[i].meshProjGT = outputPropPyramid[i].meshProj;
            outputPropPyramid[i].visibilityMask.resize(numVertices, true);
            memset(outputPropPyramid[i].camPose, 0, 6*sizeof(double));

            UpdateRenderingData(outputPropPyramid[i], KK, camPose, outputPropPyramid[i].meshData);
        }

        // update the visibility of each vertex
        if(useVisibilityMask)
        {
            TICK( "visibilityMask" + std::to_string(i) );

            UpdateVisibilityMaskGL(outputInfoPyramid[i], visibilityMaskPyramid[i], KK, camPose, m_nWidth, m_nHeight);
            if(meshLoadingSettings.loadProp)
            UpdateVisibilityMaskGL(outputPropPyramid[i], visibilityMaskPyramid[i], KK, camPose, m_nWidth, m_nHeight);

            TOCK( "visibilityMask" + std::to_string(i) );
            
        }
    }

    TOCK("setupMeshBufferRendering");
    
}
예제 #2
0
bool MainEngine::ProcessOneFrame(int nFrame)
{
    // read input
    // if(!GetInput(nFrame))
    // return false;
    

    // if(inputThreadGroup.size() > 0){
    //     inputThreadGroup.join_all();
    //     memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3);
    //     inputThreadGroup.remove_thread(pInputThread);
    //     pInputThread = inputThreadGroup.create_thread( boost::bind(&MainEngine::GetInput, this, nFrame) );
    // }
    // // for the first frame, we have to wait
    // else{
    //     pInputThread = inputThreadGroup.create_thread( boost::bind(&MainEngine::GetInput, this, nFrame) );
    //     inputThreadGroup.join_all();
    //     memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3);
    // }

    TICK("timePerFrame");

    TICK("getInput");
    if(pInputThread == NULL)
    {
        pInputThread = new boost::thread(boost::bind(&MainEngine::GetInput, this, nFrame));
        pInputThread->join();
        memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3);
    }
    else
    {
        pInputThread->join();
        memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3);
        delete pInputThread;
        pInputThread = new boost::thread(boost::bind(&MainEngine::GetInput, this, nFrame));
    }
    TOCK("getInput");

    if(!inputFlag)
    {
        cout << "getting input failure" << endl;
        return false;
    }
    
    // do tracking
    TICK("tracking");
    if(!m_pTrackingEngine->trackFrame(nFrame, m_pColorImageRGB, &pOutputInfo))
    {
        cout << "tracking failed: " << endl;
        return false;
    }
    TOCK("tracking");

    TOCK("timePerFrame");

    return true;
}
예제 #3
0
파일: pfile.c 프로젝트: Zhoutall/stasis
inline static int pfile_write_unlocked(int fd, lsn_t off, const byte *dat,
                                       lsn_t len) {
  int error = 0;
  ssize_t bytes_written = 0;
  TICK(write_hist);
  while (bytes_written < len) {

    ssize_t count = pwrite(fd,
                           dat + bytes_written,
                           len - bytes_written,
                           off + bytes_written);

    if (count == -1) {
      if (errno == EAGAIN || errno == EINTR) {
        // @see file.c for an explanation; basically; we ignore these,
        // and try again.
        count = 0;
      } else {
        if (errno == EBADF) {
          error = EBADF;
        } else {
          error = errno;
        }
        break;
      }
    }
    bytes_written += count;
    if (bytes_written != len) {
      DEBUG("pwrite spinning\n");
    }
  }
  TOCK(write_hist);
  return error;
}
MeshPyramidReader::MeshPyramidReader(MeshLoadingSettings& settings, int width,
    int height, double K[3][3], int startFrame, int numTrackingFrames): trackerInitialized(false)
{
    m_nWidth = width;
    m_nHeight = height;
    startFrameNo = startFrame;
    currentFrameNo = startFrame;

    pCurrentColorImageRGB = new unsigned char[3*width*height];
    // in this case camPose will always be zero
    for(int i = 0; i < 6; ++i)
    camPose[i] = 0;

    useVisibilityMask = settings.visibilityMask;

    setIntrinsicMatrix(K);

    TICK("loadingMesh");
    
    currentMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath,
            settings.meshLevelFormat, currentFrameNo, settings.meshLevelList));
    if(settings.loadProp)
    {
        propMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath,
            settings.propLevelFormat, currentFrameNo, settings.meshLevelList));
    }

    TOCK("loadingMesh");

    m_nNumMeshLevels = settings.meshLevelList.size();

}
예제 #5
0
void run_benchmarks(void) {
  int asizes[] = {2, 5, 10, 30, 500};
  int i, j;
  yatrie_t trie = (yatrie_t)NULL;
  int set_time = 0; int get_time = 0;

  BENCHMARK_INIT();

  for (i = 0; i < 5; i++) {
    int array_size = asizes[i];
    
    /* Contiguous keys */
    TICK();
    for (j = 0; j < array_size; j++)
      trie = yatrie_insert(trie, j, j);
    TOCK();
    set_time += benchmark_total_time * 500 / array_size;

    TICK();
    for (j = 0; j < array_size; j++)
      yatrie_get(trie, j);
    TOCK();
    get_time += benchmark_total_time * 500 / array_size;
    yatrie_free(trie); trie = (yatrie_t)NULL;

    /* Uniform keys */
    srand(1234567);
    TICK();
    for (j = 0; j < array_size; j++)
      trie = yatrie_insert(trie, rand(), j);
    TOCK();
    set_time += benchmark_total_time * 500 / array_size;

    TICK();
    for (j = 0; j < array_size; j++)
      yatrie_get(trie, rand());
    TOCK();
    get_time += benchmark_total_time * 500 / array_size;
    yatrie_free(trie); trie = (yatrie_t)NULL;
  }
  
  printf("%i %i\n", get_time, set_time);
}
예제 #6
0
파일: pfile.c 프로젝트: Zhoutall/stasis
static int pfile_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) {
  pfile_impl *impl = (pfile_impl*)(h->impl);
  int error = 0;

  if (off < 0) {
    error = EDOM;
  } else {
    ssize_t bytes_read = 0;
    TICK(read_hist);
    while (bytes_read < len) {
      ssize_t count = pread(impl->fd,
                            buf + bytes_read,
                            len - bytes_read,
                            off + bytes_read);
      if (count == -1) {
        if (errno == EAGAIN || errno == EINTR) {
          count = 0;
        } else {
          if (errno == EBADF) {
            h->error = EBADF;
          } else {
            int err = errno;
            // The other errors either involve memory bugs (EFAULT), logic bugs
            // (EISDIR, EFIFO, EOVERFLOW), or bad hardware (EIO), so print
            // something to console, and uncleanly crash.
            perror("pfile_read encountered an unknown error code.");
            fprintf(stderr, "pread() returned -1; errno is %d\n",err);
            abort();
          }
          error = errno;
          break;
        }
      } else if(count == 0) {
        // EOF
        if(bytes_read != 0) {
          fprintf(stderr, "short read at end of storefile.  Assuming that this is due to strange recovery scenario, and continuing.\n");
        }
        error = EDOM;
        break;
      } else {
        bytes_read += count;
        if (bytes_read != len) {
          DEBUG("pread spinning\n");
        }
      }
    }
    TOCK(read_hist);
    assert(error || bytes_read == len);
  }
  return error;
}
void MeshSequenceReader::trackerUpdate(TrackerOutputInfo& outputInfo)
{
    TICK("visualRenderingUpdate");

    UpdateRenderingData(outputInfo, KK, camPose, currentMesh);
    UpdateRenderingDataFast(outputInfo, KK, currentMesh);

    if(useVisibilityMask)
    {
        UpdateVisibilityMaskGL(outputInfo, visibilityMask, KK, camPose, m_nWidth, m_nHeight);
        //UpdateVisibilityMask(outputInfo, visibilityMask, m_nWidth, m_nHeight);
        UpdateColorDiff(outputInfo, visibilityMask, colorImageSplit);
    }

    TOCK("visualRenderingUpdate");
}
bool MeshSequenceReader::setCurrentFrame(int curFrame)
{
    if(currentFrameNo != curFrame)
    {
        currentFrameNo = curFrame;

        // changing new frame time
        TICK("setCurrentFrame");

        if(!loadMesh(meshLoadingSettings.meshPath,
                meshLoadingSettings.meshFormat,currentFrameNo))
        return false;

        TOCK("setCurrentFrame");
    }
    return true;
}
예제 #9
0
int main(int argc, char *argv[])
{
  float *u = new float[N];
  float *v = new float[N];
  float alpha = 2.3;
  double time4=0;

  initializeVectors(u,v);

  TICK();
  axpyGPU(u,v,alpha,N);
  TOCK(time4);

  outputStats(time4);  

  delete [] u; 
  delete [] v;
}
bool MeshPyramidReader::setCurrentFrame(int curFrame)
{
    if(currentFrameNo != curFrame)
    {
        currentFrameNo = curFrame;

        TICK("setCurrentFrame");
        
        if(!loadMeshPyramid(meshLoadingSettings.meshPath,
                meshLoadingSettings.meshLevelFormat, currentFrameNo,
                meshLoadingSettings.meshLevelList))
        return false;

        TOCK("setCurrentFrame");
        
    }
    return true;
   
}
예제 #11
0
파일: pfile.c 프로젝트: Zhoutall/stasis
static int pfile_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) {
  TICK(force_range_hist);
  pfile_impl * impl = h->impl;
#ifdef HAVE_SYNC_FILE_RANGE
  // stop of zero syncs to eof.
  DEBUG("pfile_force_range calling sync_file_range %lld %lld\n",
	 start, stop-start); fflush(stdout);
  int ret = sync_file_range(impl->fd, start, stop-start,
			      SYNC_FILE_RANGE_WAIT_BEFORE |
			      SYNC_FILE_RANGE_WRITE |
			      SYNC_FILE_RANGE_WAIT_AFTER);
  if(ret) {
    int error = errno;
    assert(ret == -1);
    // With the possible exceptions of ENOMEM and ENOSPACE, all of the sync
    // errors are unrecoverable.
    h->error = EBADF;
    ret = error;
  }
#else
#ifdef HAVE_FDATASYNC
  DEBUG("pfile_force_range() is calling fdatasync()\n");
  fdatasync(impl->fd);
#else
  DEBUG("pfile_force_range() is calling fsync()\n");
  fsync(impl->fd);
#endif
  int ret = 0;
#endif
#ifdef HAVE_POSIX_FADVISE
  if(impl->sequential) {
    int err = posix_fadvise(impl->fd, start, stop-start, POSIX_FADV_DONTNEED);
    if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed");
  }
#endif
  TOCK(force_range_hist);
  return ret;
}
예제 #12
0
파일: pfile.c 프로젝트: Zhoutall/stasis
static int pfile_force(stasis_handle_t *h) {
  TICK(force_hist);
  pfile_impl *impl = h->impl;
  if(!(impl->file_flags & O_SYNC)) {
#ifdef HAVE_FDATASYNC
    DEBUG("pfile_force() is calling fdatasync()\n");
    fdatasync(impl->fd);
#else
    DEBUG("pfile_force() is calling fsync()\n");
    fsync(impl->fd);
#endif
  } else {
    DEBUG("File was opened with O_SYNC.  pfile_force() is a no-op\n");
  }
  if(impl->sequential) {
#ifdef HAVE_POSIX_FADVISE
    int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED);
    if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed");
#endif
  }
  TOCK(force_hist);
  return 0;
}
예제 #13
0
bool MainFrame::ProcessOneFrame(int nFrame)
{
    // if(trackingType != DEFORMNRSFM && m_pControlPanel->m_nCurrentFrame == nFrame && m_nCurrentFrame == nFrame)
    // return true;
    isTrackingFinished =  false;
    cout << "processing frame: " << nFrame << endl;

    // // read input
    // TICK("getInput");
    // if(!GetInput(nFrame))
    // return false;
    // TOCK("getInput");

    // // do tracking
    // TICK("tracking");
    // if(!m_pTrackingEngine->trackFrame(nFrame, m_pColorImageRGB, &pOutputInfo))
    // {
    //     cout << "tracking failed: " << endl;
    //     return false;
    // }
    // TOCK("tracking");
    if(!MainEngine::ProcessOneFrame(nFrame))
    return false;

    // update imagePanel
    TICK("update2DRendering");
    m_pOverlayPane->updateImage(m_pColorImageRGB, m_nWidth, m_nHeight);
    m_pImagePane->updateImage(m_pColorImageRGB, m_nWidth, m_nHeight);
    TOCK("update2DRendering");

    isTrackingFinished =  true;

    Stopwatch::getInstance().printAll();

    return true;
}
예제 #14
0
void GlobalModel::fuse(const Eigen::Matrix4f & pose,
                       const int & time,
                       GPUTexture * rgb,
                       GPUTexture * depthRaw,
                       GPUTexture * depthFiltered,
                       GPUTexture * indexMap,
                       GPUTexture * vertConfMap,
                       GPUTexture * colorTimeMap,
                       GPUTexture * normRadMap,
                       const float depthCutoff,
                       const float confThreshold,
                       const float weighting)
{
    TICK("Fuse::Data");
    //This first part does data association and computes the vertex to merge with, storing
    //in an array that sets which vertices to update by index
    frameBuffer.Bind();

    glPushAttrib(GL_VIEWPORT_BIT);

    glViewport(0, 0, renderBuffer.width, renderBuffer.height);

    glClearColor(0, 0, 0, 0);
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    dataProgram->Bind();

    dataProgram->setUniform(Uniform("cSampler", 0));
    dataProgram->setUniform(Uniform("drSampler", 1));
    dataProgram->setUniform(Uniform("drfSampler", 2));
    dataProgram->setUniform(Uniform("indexSampler", 3));
    dataProgram->setUniform(Uniform("vertConfSampler", 4));
    dataProgram->setUniform(Uniform("colorTimeSampler", 5));
    dataProgram->setUniform(Uniform("normRadSampler", 6));
    dataProgram->setUniform(Uniform("time", (float)time));
    dataProgram->setUniform(Uniform("weighting", weighting));

    dataProgram->setUniform(Uniform("cam", Eigen::Vector4f(Intrinsics::getInstance().cx(),
                                                     Intrinsics::getInstance().cy(),
                                                     1.0 / Intrinsics::getInstance().fx(),
                                                     1.0 / Intrinsics::getInstance().fy())));
    dataProgram->setUniform(Uniform("cols", (float)Resolution::getInstance().cols()));
    dataProgram->setUniform(Uniform("rows", (float)Resolution::getInstance().rows()));
    dataProgram->setUniform(Uniform("scale", (float)IndexMap::FACTOR));
    dataProgram->setUniform(Uniform("texDim", (float)TEXTURE_DIMENSION));
    dataProgram->setUniform(Uniform("pose", pose));
    dataProgram->setUniform(Uniform("maxDepth", depthCutoff));

    glEnableVertexAttribArray(0);
    glBindBuffer(GL_ARRAY_BUFFER, uvo);
    glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0);

    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, newUnstableFid);

    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, newUnstableVbo);

    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, rgb->texture->tid);

    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, depthRaw->texture->tid);

    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, depthFiltered->texture->tid);

    glActiveTexture(GL_TEXTURE3);
    glBindTexture(GL_TEXTURE_2D, indexMap->texture->tid);

    glActiveTexture(GL_TEXTURE4);
    glBindTexture(GL_TEXTURE_2D, vertConfMap->texture->tid);

    glActiveTexture(GL_TEXTURE5);
    glBindTexture(GL_TEXTURE_2D, colorTimeMap->texture->tid);

    glActiveTexture(GL_TEXTURE6);
    glBindTexture(GL_TEXTURE_2D, normRadMap->texture->tid);

    glBeginTransformFeedback(GL_POINTS);

    glDrawArrays(GL_POINTS, 0, uvSize);

    glEndTransformFeedback();

    frameBuffer.Unbind();

    glBindTexture(GL_TEXTURE_2D, 0);

    glActiveTexture(GL_TEXTURE0);

    glDisableVertexAttribArray(0);
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);

    dataProgram->Unbind();

    glPopAttrib();

    glFinish();
    TOCK("Fuse::Data");

    TICK("Fuse::Update");
    //Next we update the vertices at the indexes stored in the update textures
    //Using a transform feedback conditional on a texture sample
    updateProgram->Bind();

    updateProgram->setUniform(Uniform("vertSamp", 0));
    updateProgram->setUniform(Uniform("colorSamp", 1));
    updateProgram->setUniform(Uniform("normSamp", 2));
    updateProgram->setUniform(Uniform("texDim", (float)TEXTURE_DIMENSION));
    updateProgram->setUniform(Uniform("time", time));

    glBindBuffer(GL_ARRAY_BUFFER, vbos[target].first);

    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0);

    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f)));

    glEnableVertexAttribArray(2);
    glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2));

    glEnable(GL_RASTERIZER_DISCARD);

    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, vbos[renderSource].second);

    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbos[renderSource].first);

    glBeginTransformFeedback(GL_POINTS);

    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, updateMapVertsConfs.texture->tid);

    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, updateMapColorsTime.texture->tid);

    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, updateMapNormsRadii.texture->tid);

    glDrawTransformFeedback(GL_POINTS, vbos[target].second);

    glEndTransformFeedback();

    glDisable(GL_RASTERIZER_DISCARD);

    glBindTexture(GL_TEXTURE_2D, 0);
    glActiveTexture(GL_TEXTURE0);

    glDisableVertexAttribArray(0);
    glDisableVertexAttribArray(1);
    glDisableVertexAttribArray(2);
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);

    updateProgram->Unbind();

    std::swap(target, renderSource);

    glFinish();
    TOCK("Fuse::Update");
}
void MeshSequenceReader::trackerInitSetup(TrackerOutputInfo& outputInfo)
{
    TICK("visualRenderingInit");

    outputInfo.meshData = currentMesh;
    outputInfo.meshDataGT = outputInfo.meshData;

    // get 2d projections
    double X,Y,Z;
    double u,v,w;
    vector<CoordinateType> proj2D, proj2DGT;
    proj2D.resize(2); proj2DGT.resize(2);
    for(int vertex = 0; vertex < currentMesh.numVertices; ++vertex)
    {

        X = currentMesh.vertices[vertex][0];
        Y = currentMesh.vertices[vertex][1];
        Z = currentMesh.vertices[vertex][2];

        if(KK[0][2] == 0) // this is orthographic camera
        {
           proj2D[0] = X; proj2D[1] = Y;
           proj2DGT[0] = X; proj2DGT[1] = Y;
        }
        else
        {
            u = KK[0][0] * X + KK[0][1] * Y + KK[0][2] * Z;
            v = KK[1][0] * X + KK[1][1] * Y + KK[1][2] * Z;
            w = KK[2][0] * X + KK[2][1] * Y + KK[2][2] * Z;

            if(w != 0)
            {
                u = u/w;
                v = v/w;
            }

            proj2D[0] = u; proj2D[1] = v;
            proj2DGT[0] = u; proj2DGT[1] = v;

        }

        outputInfo.meshProj.push_back(proj2D);
        outputInfo.meshProjGT.push_back(proj2DGT);

    }

    outputInfo.visibilityMask.resize(outputInfo.meshData.numVertices,true);
    // update the visiblity mask
    if(useVisibilityMask)
    {
        UpdateVisibilityMaskGL(outputInfo, visibilityMask, KK, camPose, m_nWidth, m_nHeight);
        //UpdateVisibilityMask(outputInfo, visibilityMask, m_nWidth, m_nHeight);
        outputInfo.meshDataColorDiff = outputInfo.meshData;
        UpdateColorDiff(outputInfo, visibilityMask, colorImageSplit);
    }

    // camera pose is always 0 in this case
    for(int i = 0; i < 6; ++i)
    outputInfo.camPose[i] = 0;

    trackerInitialized = true;

    TOCK("visualRenderingInit");

}
예제 #16
0
파일: CG.cpp 프로젝트: Titzi90/miniapps
/*!
  Routine to compute an approximate solution to Ax = b

  @param[in]    geom The description of the problem's geometry.
  @param[inout] A    The known system matrix
  @param[inout] data The data structure with all necessary CG vectors preallocated
  @param[in]    b    The known right hand side vector
  @param[inout] x    On entry: the initial guess; on exit: the new approximate solution
  @param[in]    max_iter  The maximum number of iterations to perform, even if tolerance is not met.
  @param[in]    tolerance The stopping criterion to assert convergence: if norm of residual is <= to tolerance.
  @param[out]   niters    The number of iterations actually performed.
  @param[out]   normr     The 2-norm of the residual vector after the last iteration.
  @param[out]   normr0    The 2-norm of the residual vector before the first iteration.
  @param[out]   times     The 7-element vector of the timing information accumulated during all of the iterations.
  @param[in]    doPreconditioning The flag to indicate whether the preconditioner should be invoked at each iteration.

  @return Returns zero on success and a non-zero value otherwise.

  @see CG_ref()
*/
int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x,
    const int max_iter, const double tolerance, int & niters, double & normr, double & normr0,
    double * times, bool doPreconditioning) {

  double t_begin = mytimer();  // Start timing right away
  normr = 0.0;
  double rtz = 0.0, oldrtz = 0.0, alpha = 0.0, beta = 0.0, pAp = 0.0;


  double t0 = 0.0, t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0, t5 = 0.0;
//#ifndef HPCG_NOMPI
//  double t6 = 0.0;
//#endif
  local_int_t nrow = A.localNumberOfRows;
  Vector & r = data.r; // Residual vector
  Vector & z = data.z; // Preconditioned residual vector
  Vector & p = data.p; // Direction vector (in MPI mode ncol>=nrow)
  Vector & Ap = data.Ap;

  if (!doPreconditioning && A.geom->rank==0) HPCG_fout << "WARNING: PERFORMING UNPRECONDITIONED ITERATIONS" << std::endl;

#ifdef HPCG_DEBUG
  int print_freq = 1;
  if (print_freq>50) print_freq=50;
  if (print_freq<1)  print_freq=1;
#endif
  // p is of length ncols, copy x to p for sparse MV operation
  CopyVector(x, p);     //TODO paralel
  TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p
  TICK(); ComputeWAXPBY(nrow, 1.0, b, -1.0, Ap, r, A.isWaxpbyOptimized);  TOCK(t2); // r = b - Ax (x stored in p)
  TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1);
  normr = sqrt(normr);
#ifdef HPCG_DEBUG
  if (A.geom->rank==0) HPCG_fout << "Initial Residual = "<< normr << std::endl;
#endif

  // Record initial residual for convergence testing
  normr0 = normr;

  // Start iterations

  for (int k=1; k<=max_iter && normr/normr0 > tolerance; k++ ) {
    TICK();
    if (doPreconditioning)
      ComputeMG(A, r, z); // Apply preconditioner
    else
      CopyVector (r, z); // copy r to z (no preconditioning)
    TOCK(t5); // Preconditioner apply time

    if (k == 1) {
      TICK(); ComputeWAXPBY(nrow, 1.0, z, 0.0, z, p, A.isWaxpbyOptimized); TOCK(t2); // Copy Mr to p
      TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z
    } else {
      oldrtz = rtz;
      TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z
      beta = rtz/oldrtz;
      TICK(); ComputeWAXPBY (nrow, 1.0, z, beta, p, p, A.isWaxpbyOptimized);  TOCK(t2); // p = beta*p + z
    }

    TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p
    TICK(); ComputeDotProduct(nrow, p, Ap, pAp, t4, A.isDotProductOptimized); TOCK(t1); // alpha = p'*Ap
    alpha = rtz/pAp;
    TICK(); ComputeWAXPBY(nrow, 1.0, x, alpha, p, x, A.isWaxpbyOptimized);// x = x + alpha*p
    ComputeWAXPBY(nrow, 1.0, r, -alpha, Ap, r, A.isWaxpbyOptimized);  TOCK(t2);// r = r - alpha*Ap
    TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1);
    normr = sqrt(normr);
#ifdef HPCG_DEBUG
    if (A.geom->rank==0 && (k%print_freq == 0 || k == max_iter))
      HPCG_fout << "Iteration = "<< k << "   Scaled Residual = "<< normr/normr0 << std::endl;
#endif
    niters = k;
  }

  // Store times
  times[1] += t1; // dot-product time
  times[2] += t2; // WAXPBY time
  times[3] += t3; // SPMV time
  times[4] += t4; // AllReduce time
  times[5] += t5; // preconditioner apply time
//#ifndef HPCG_NOMPI
//  times[6] += t6; // exchange halo time
//#endif
  times[0] += mytimer() - t_begin;  // Total time. All done...
  return(0);
}
MeshBufferReader::MeshBufferReader(MeshLoadingSettings& settings, int width,
                                   int height, double K[3][3], int startFrame, int numTrackingFrames): trackerInitialized(false)
{
  m_nWidth = width;
  m_nHeight = height;
  startFrameNo = startFrame;
  currentFrameNo = startFrame;

  pCurrentColorImageRGB = new unsigned char[3*width*height];
  // in this case camPose will always be zero
  for(int i = 0; i < 6; ++i)
    camPose[i] = 0;

  useVisibilityMask = settings.visibilityMask;

  setIntrinsicMatrix(K);

  nRenderingLevel = 0;
  m_nNumMeshLevels = settings.meshLevelList.size();

  // a bit ugly
  nFrameStep = imageSourceSettings.frameStep;

  // loading meshes into buffer
  // outputInfoPyramidBuffer.resize(numTrackingFrames);
  // outputPropPyramidBuffer.resize(numTrackingFrames);
  int bufferSize = (numTrackingFrames - startFrameNo)/nFrameStep + 1;
  outputInfoPyramidBuffer.resize(bufferSize);
  outputPropPyramidBuffer.resize(bufferSize);

  m_nGoodFrames = 0;

  TICK("loadingMeshBuffer");

  for(int i = startFrameNo; i <= numTrackingFrames; i = i + nFrameStep)
    {

      // TICK("loadingOneFrame");

      if(!existenceTest(settings.meshPath, settings.meshLevelFormat,
                        i, settings.meshLevelList))
        break;

      ++m_nGoodFrames;
      currentMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath,
                                                        settings.meshLevelFormat, i, settings.meshLevelList));

      // TOCK("loadingOneFrame");

      if(settings.loadProp)
        {
          propMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath,
                                                         settings.propLevelFormat, i, settings.meshLevelList));
        }
      if(!settings.fastLoading)
        propMeshPyramid = currentMeshPyramid;


      // TICK("setOneFrame");

      setMeshPyramid();

      int bufferPos = (i-startFrameNo)/nFrameStep;
      outputInfoPyramidBuffer[ bufferPos ] = std::move(outputInfoPyramid);
      outputPropPyramidBuffer[ bufferPos ] = std::move(outputPropPyramid);

      // TOCK("setOneFrame");

      cout << "loading frame " << i << endl;
    }

  TOCK("loadingMeshBuffer");

}
예제 #18
0
//ToDo(robin): add support for log
int main(int argc, char ** argv) {
  CommandLineOptions options(argc, argv);
	Window * window = new Window(800, 600, APPLICATION_NAME);
	buffer points;
	buffer lines;
	int count = 1;

	std::vector<float> lines_buffer;
	std::vector<float> points_buffer;
	std::vector<ObjectInfo> obj_buffer;

	bool mouseDown = false;
	int mouseX = 0, mouseY = 0, mouseDX = 0, mouseDY = 0, screenSizeX = window->getSize().x, screenSizeY =  window->getSize().y;

	float scale[2] = {1.0, 1.0}, centerX = 0.0, centerY = 0.0, mouseDXScreen = 0.0, mouseDYScreen = 0.0;
	float radius = 50;
	unsigned int samplerate = options.samplerate();

	std::vector<SoundProcessor::v3> listener;

	std::signal(SIGTERM, terminate);
	std::signal(SIGINT, terminate);
	std::signal(SIGABRT, terminate);

	double distBetween = 0.42;

	double * mics = options.mics();

	for(int i = 0; i < options.micCount(); i++) {
		listener.push_back(SoundProcessor::v3(mics[3 * i], mics[3 * i + 1], mics[3 * i + 2]));
	}

	std::cout << "mics: [" << std::endl;
	for(auto l : listener) {
		std::cout << "[" << l.x << ", "
				  << l.y << ", "
				  << l.z << "]" << std::endl;

	}
	std::cout << "]" << std::endl;

	SoundProcessor soundProcessor(samplerate, listener);

	glew_init();

	int listener_count = init_listeners(listener, points_buffer, soundProcessor, radius);

	server = new Server(options.audioPort(), [listener](sf::TcpSocket * socket) {
			unsigned int size = listener.size();

			socket->send(&size, sizeof(int));
			std::cout << "client connected: " << socket->getRemoteAddress() << ":" << socket->getRemotePort() << std::endl;
		});

    GuiServer gserver(options.guiPort());

	count = listener.size();

	glGenBuffers(1, &points.vbo);
	glGenVertexArrays(1, &points.vao);

	glBindBuffer(GL_ARRAY_BUFFER, points.vbo);
	glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW);
	glBindVertexArray(points.vao);

	ShaderProgram * shaderProgram = new ShaderProgram("#version 130\n"
													  "uniform vec2 center;\n"
													  "uniform vec2 scale;\n"
													  "in vec3 vp;\n"
													  "in vec3 color;\n"
													  "out vec3 Color;\n"
													  "void main() {\n"
													  "   gl_Position = vec4((vp.x + center.x) / scale.x, (vp.y - center.y) / scale.y, 0.0, 1.0);\n"
													  "   gl_PointSize = vp.z / scale.x;\n"
													  "   Color = color;\n"
													  "}\n"
													  , "#version 130\n"
													  "in vec3 Color;\n"
													  "out vec4 frag_colour;\n"
													  "void main () {\n"
													  "    gl_FragColor = vec4(Color, 1.0);\n"
 													  "}");

	shaderProgram->vertexAttribPointer("color", 3, GL_FLOAT, false, 24, (void *) 12);
	shaderProgram->vertexAttribPointer("vp", 3, GL_FLOAT, false, 24, 0);

	lines_buffer.push_back(-1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);

	lines_buffer.push_back(1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);

	lines_buffer.push_back(0);
	lines_buffer.push_back(-1);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);

	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);
	lines_buffer.push_back(1);
	lines_buffer.push_back(0);

	glGenBuffers(1, &lines.vbo);
	glGenVertexArrays(1, &lines.vao);

	glBindBuffer(GL_ARRAY_BUFFER, lines.vbo);
	glBufferData(GL_ARRAY_BUFFER, lines_buffer.size() * sizeof(float), lines_buffer.data(), GL_STREAM_DRAW);
	glBindVertexArray(lines.vao);

	shaderProgram->vertexAttribPointer("color", 3, GL_FLOAT, false, 24, (void *) 12);
	shaderProgram->vertexAttribPointer("vp", 3, GL_FLOAT, false, 24, 0);

    glEnable(GL_POINT_SMOOTH);
	glEnable(GL_LINE_SMOOTH);
	glEnable(GL_PROGRAM_POINT_SIZE);

    glPointSize(20.0);
	glLineWidth(3.0);

	glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
	glEnable(GL_BLEND);

	glClearColor(1, 1, 1, 1);

	screenSizeX = window->getSize().x;
	screenSizeY = window->getSize().y;

	float freq = 100;
	auto now = std::chrono::high_resolution_clock::now();

	Stopwatch::getInstance().setCustomSignature(32435);

	int id = 0;

	while (window->open()) {
		TICK("simulation_total");
		TICK("simulation_process_events");

		auto events = window->pollEvents();

		for(auto event : events) {
			switch (event.type) {
			case sf::Event::Closed: {
				window->close();
				break;
			}
			case sf::Event::KeyPressed: {
				if(event.key.code == sf::Keyboard::Space && gserver.buffer != nullptr) {
					std::cout << id++ << ", "
							  << gserver.buffer[0] << ", "
							  << gserver.buffer[1] << ", "
							  << gserver.buffer[2] << std::endl;
				}
				break;
			}
			case sf::Event::Resized: {
                glViewport(0, 0, event.size.width, event.size.height);
				screenSizeX = event.size.width;
				screenSizeY = event.size.height;
				break;
			}
			case sf::Event::MouseButtonPressed: {
				if(event.mouseButton.button == sf::Mouse::Left) {
					mouseDown = true;

					mouseX = sf::Mouse::getPosition().x;
					mouseY = sf::Mouse::getPosition().y;
				}
				else if(event.mouseButton.button == sf::Mouse::Right) {
					float x = event.mouseButton.x, y = event.mouseButton.y, dx, dy;
					bool add = true;

					x = (2.0 * (x / screenSizeX) - 1.0) * scale[0] - centerX;
					y = -(2.0 * (y / screenSizeY) - 1.0) * scale[1] + centerY;

					for(int i = (points_buffer.size() / 6) - 1; i > listener.size() - 1; i--) {
						dx = (points_buffer[6 * i] - x) * screenSizeX * scale[0];
						dy = (points_buffer[6 * i + 1] - y) * screenSizeY * scale[1];

						if(sqrt(dx * dx + dy * dy) < radius * scale[0]) {
							soundProcessor.remove(points_buffer[6 * i], points_buffer[6 * i + 1]);
							points_buffer.erase(points_buffer.begin() + 6 * i - 1, points_buffer.begin() + 6 * i + 5);
							obj_buffer.erase(obj_buffer.begin() + i - listener.size());

							count--;
							add = false;

							break;
						}
					}

					if(add) {
						points_buffer.push_back(x);
						points_buffer.push_back(y);
						points_buffer.push_back(radius);
						points_buffer.push_back(0);
						points_buffer.push_back(0);
						points_buffer.push_back(0);

						ObjectInfo oinfo;
						SoundProcessor::SoundObject * obj = new SoundProcessor::SoundObject(x, y, freq);

						oinfo.id = soundProcessor.add(obj);

						obj_buffer.push_back(oinfo);

						std::cout << "adding with freq: " << freq << std::endl;

						//freq += 0;
						freq += FREQUENCY_INCREMENT;

						count++;
					}

					glBindBuffer(GL_ARRAY_BUFFER, points.vbo);
					glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW);
				}
				break;
			}
			case sf::Event::MouseButtonReleased: {
				if(event.mouseButton.button == sf::Mouse::Left) {
					mouseDown = false;
					centerX = centerX - mouseDXScreen;
					centerY = centerY - mouseDYScreen;
					mouseDXScreen = 0.0;
					mouseDYScreen = 0.0;
				}
				break;
			}
			case sf::Event::MouseWheelScrolled: {
				scale[0] *= (event.mouseWheelScroll.delta > 0 ? 0.9 : 1.11111111);
				scale[1] *= (event.mouseWheelScroll.delta > 0 ? 0.9 : 1.11111111);

				break;
			}
			default:
				break;
			}
		}

		glClear(GL_COLOR_BUFFER_BIT);

		if (mouseDown) {
			mouseDX = mouseX - sf::Mouse::getPosition().x;
			mouseDY = mouseY - sf::Mouse::getPosition().y;

			mouseDXScreen = ((((double) mouseDX * 2) / (double) screenSizeX)) * scale[0];
			mouseDYScreen = ((((double) mouseDY * 2) / (double) screenSizeY)) * scale[1];
		}

		TOCK("simulation_process_events");

		double time = (std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock().now() - now).count()) / 1000000000.0;
		now = std::chrono::high_resolution_clock().now();
		unsigned int samples = (float) samplerate * time;

		TICK("simulation_generate_samples");

		double * current_samples = soundProcessor.sample(samples);

		TOCK("simulation_generate_samples");

		server->send(current_samples, samples * listener.size());

		free(current_samples);

		TICK("simulation_draw");

	    glBindVertexArray(points.vao);

		shaderProgram->uniform2f("center", centerX - mouseDXScreen, centerY - mouseDYScreen);
		shaderProgram->uniform2f("scale", scale[0], scale[1]);

		glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW);

		glDrawArrays(GL_POINTS, 0, count);

		glBindVertexArray(lines.vao);

		std::vector<float> data = gserver.getPoints();

		lines_buffer.clear();
		lines_buffer.insert(lines_buffer.begin(), data.begin(), data.end());

		glBindBuffer(GL_ARRAY_BUFFER, lines.vbo);

		glBufferData(GL_ARRAY_BUFFER, data.size() * sizeof(float), data.data(), GL_STREAM_DRAW);

		// ToDo(robin): better solution!!
		//if(count > listener.size())
			glDrawArrays(GL_POINTS, 0, data.size() / 6);

		window->display();
		TOCK("simulation_draw");

		TOCK("simulation_total");

        Stopwatch::getInstance().sendAll();
    }

	terminate(0);

	return 0;
}
예제 #19
0
bool inline TrackerInterface::process()
{
    if(firstRun)
    {
        cudaSafeCall(cudaSetDevice(ConfigArgs::get().gpu));
        firstRun = false;
    }

    if(!threadPack.pauseCapture.getValue())
    {
        TICK(threadIdentifier);

        uint64_t start = Stopwatch::getCurrentSystemTime();

        bool returnVal = true;

        bool shouldEnd = endRequested.getValue();

        if(!logRead->grabNext(returnVal, currentFrame) || shouldEnd)
        {
            threadPack.pauseCapture.assignValue(true);
            threadPack.finalised.assignValue(true);

            finalise();

            while(!threadPack.cloudSliceProcessorFinished.getValueWait())
            {
                frontend->cloudSignal.notify_all();
            }

            return shouldEnd ? false : returnVal;
        }

        depth.data = (unsigned short *)logRead->decompressedDepth;
        rgb24.data = (PixelRGB *)logRead->decompressedImage;
        
        currentFrame++;

        depth.step = Resolution::get().width() * 2;
        depth.rows = Resolution::get().rows();
        depth.cols = Resolution::get().cols();

        rgb24.step = Resolution::get().width() * 3;
        rgb24.rows = Resolution::get().rows();
        rgb24.cols = Resolution::get().cols();

        depth_device.upload(depth.data, depth.step, depth.rows, depth.cols);
        colors_device.upload(rgb24.data, rgb24.step, rgb24.rows, rgb24.cols);

        TICK("processFrame");
        frontend->processFrame(depth_device,
                               colors_device,
                               logRead->decompressedImage,
                               logRead->decompressedDepth,
                               logRead->timestamp,
                               logRead->isCompressed,
                               logRead->compressedDepth,
                               logRead->compressedDepthSize,
                               logRead->compressedImage,
                               logRead->compressedImageSize);
        TOCK("processFrame");

        uint64_t duration = Stopwatch::getCurrentSystemTime() - start;

        if(threadPack.limit.getValue() && duration < 33333)
        {
            int sleepTime = std::max(int(33333 - duration), 0);
            usleep(sleepTime);
        }

        TOCK(threadIdentifier);
    }
    
    return true;
}
예제 #20
0
파일: test.c 프로젝트: B-Rich/mixminion
int
test_decode(void *code, int k, int index[], int sz, char *s)
{
    int errors;
    int reconstruct = 0 ;
    int item, i ;

    static int prev_k = 0, prev_sz = 0;
    static u_char **d_original = NULL, **d_src = NULL ;

    if (sz < 1 || sz > 8192) {
	fprintf(stderr, "test_decode: size %d invalid, must be 1..8K\n",
		sz);
	return 1 ;
    }
    if (k < 1 || k > GF_SIZE + 1) {
	fprintf(stderr, "test_decode: k %d invalid, must be 1..%d\n",
		k, GF_SIZE + 1 );
	return 2 ;
    }
    if (prev_k != k || prev_sz != sz) {
	if (d_original != NULL) {
	    for (i = 0 ; i < prev_k ; i++ ) {
		free(d_original[i]);
		free(d_src[i]);
	    }
	    free(d_original);
	    free(d_src);
	    d_original = NULL ;
	    d_src = NULL ;
	}
    }
    prev_k = k ;
    prev_sz = sz ;
    if (d_original == NULL) {
	d_original = my_malloc(k * sizeof(void *), "d_original ptr");
	d_src = my_malloc(k * sizeof(void *), "d_src ptr");

	for (i = 0 ; i < k ; i++ ) {
	    d_original[i] = my_malloc(sz, "d_original data");
	    d_src[i] = my_malloc(sz, "d_src data");
	}
	/*
	 * build sample data
	 */
	for (i = 0 ; i < k ; i++ ) {
	    for (item=0; item < sz; item++)
		d_original[i][item] = ((item ^ i) + 3) & GF_SIZE;
	}
    }

    errors = 0 ;

    for( i = 0 ; i < k ; i++ )
	if (index[i] >= k ) reconstruct ++ ;

    TICK(ticks[2]);
    for( i = 0 ; i < k ; i++ )
	fec_encode(code, d_original, d_src[i], index[i], sz );
    TOCK(ticks[2]);

    TICK(ticks[1]);
    if (fec_decode(code, d_src, index, sz)) {
	fprintf(stderr, "detected singular matrix for %s  \n", s);
	return 1 ;
    }
    TOCK(ticks[1]);

    for (i=0; i<k; i++)
	if (bcmp(d_original[i], d_src[i], sz )) {
	    errors++;
	    fprintf(stderr, "error reconstructing block %d\n", i);
	}
    if (errors)
	fprintf(stderr, "Errors reconstructing %d blocks out of %d\n",
	    errors, k);

    fprintf(stderr,
	"  k %3d, l %3d  c_enc %10.6f MB/s c_dec %10.6f MB/s     \r",
	k, reconstruct,
	(double)(k * sz * reconstruct)/(double)ticks[2],
	(double)(k * sz * reconstruct)/(double)ticks[1]);
    return errors ;
}
예제 #21
0
파일: cg_solve.hpp 프로젝트: Mantevo/miniFE
void
cg_solve(OperatorType& A,
         const VectorType& b,
         VectorType& x,
         Matvec matvec,
         typename OperatorType::LocalOrdinalType max_iter,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
         typename OperatorType::LocalOrdinalType& num_iters,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
         timer_type* my_cg_times)
{
  typedef typename OperatorType::ScalarType ScalarType;
  typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
  typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
  typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;

  timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
  timer_type total_time = mytimer();

  int myproc = 0;
#ifdef HAVE_MPI
  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
#endif

  if (!A.has_local_indices) {
    std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
       << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
       << std::endl;
    return;
  }

  char* str;
  int ngpu = 2;
  int local_rank = 0;
  int device = 0;
  int skip_gpu = 99999;
  if((str = getenv("CUDA_NGPU")) != NULL) {
    ngpu = atoi(str);
  }
  if((str = getenv("CUDA_SKIP_GPU")) != NULL) {
    skip_gpu = atoi(str);
  }
  if((str = getenv("SLURM_LOCALID")) != NULL) {
    local_rank = atoi(str);
    device = local_rank % ngpu;
    if(device >= skip_gpu) device++;
  }
  if((str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) != NULL) {
    local_rank = atoi(str);
    device = local_rank % ngpu;
    if(device >= skip_gpu) device++;
  }
  if((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) != NULL) {
    local_rank = atoi(str);
    device = local_rank % ngpu;
    if(device >= skip_gpu) device++;
  }

  size_t nrows = A.rows.size();
  LocalOrdinalType ncols = A.num_cols;

  NVAMG_SAFE_CALL(NVAMG_initialize());
  NVAMG_SAFE_CALL(NVAMG_initialize_plugins());
  NVAMG_matrix_handle matrix;
  NVAMG_vector_handle rhs;
  NVAMG_vector_handle soln;
  NVAMG_resources_handle rsrc = NULL;
  NVAMG_solver_handle solver = NULL;
  NVAMG_config_handle config;
  NVAMG_SAFE_CALL(NVAMG_config_create_from_file(&config,"NVAMG_CONFIG" ));

  MPI_Comm nvamg_comm;
  MPI_Comm_dup(MPI_COMM_WORLD, &nvamg_comm);
  int devices[] = {device};

  NVAMG_resources_create(&rsrc, config, &nvamg_comm, 1, devices);
  NVAMG_SAFE_CALL(NVAMG_solver_create(&solver, rsrc, NVAMG_mode_dDDI, config));
  NVAMG_SAFE_CALL(NVAMG_matrix_create(&matrix, rsrc, NVAMG_mode_dDDI));
  NVAMG_SAFE_CALL(NVAMG_vector_create(&rhs, rsrc, NVAMG_mode_dDDI));
  NVAMG_SAFE_CALL(NVAMG_vector_create(&soln, rsrc, NVAMG_mode_dDDI));

  //Generating communication Maps for NVAMG
  if(A.neighbors.size()>0) {
    int** send_map = new int*[A.neighbors.size()];
    int** recv_map = new int*[A.neighbors.size()];
    int send_offset = 0;
    int recv_offset = A.row_offsets.size()-1;;
    for(int i = 0; i<A.neighbors.size();i++) {
      send_map[i] = &A.elements_to_send[send_offset];
      send_offset += A.send_length[i];
      recv_map[i] = new int[A.recv_length[i]];
      for(int j=0; j<A.recv_length[i]; j++)
        recv_map[i][j] = recv_offset+j;
      recv_offset += A.recv_length[i];
    }
    const int** send_map_c = (const int**) send_map;
    const int** recv_map_c = (const int**) recv_map;
    NVAMG_SAFE_CALL(NVAMG_matrix_comm_from_maps_one_ring(
      matrix, 1, A.neighbors.size(),A.neighbors.data(),
      A.send_length.data(), send_map_c,
      A.recv_length.data(), recv_map_c));
    NVAMG_SAFE_CALL(NVAMG_vector_bind(rhs,matrix));
    NVAMG_SAFE_CALL(NVAMG_vector_bind(soln,matrix));
    for(int i=0; i<A.neighbors.size(); i++)
      delete [] recv_map[i];

  }

  for(int i=0;i<x.coefs.size();i++) x.coefs[i]=1;

  VectorType r(b.startIndex, nrows);
  VectorType p(0, ncols);
  VectorType Ap(b.startIndex, nrows);

  normr = 0;
  magnitude_type rtrans = 0;
  magnitude_type oldrtrans = 0;

  LocalOrdinalType print_freq = max_iter/10;
  if (print_freq>50) print_freq = 50;
  if (print_freq<1)  print_freq = 1;

  ScalarType one = 1.0;
  ScalarType zero = 0.0;

  TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);

  TICK();
  matvec(A, p, Ap);
  TOCK(tMATVEC);

  TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

  TICK(); rtrans = dot_r2(r); TOCK(tDOT);

  normr = std::sqrt(rtrans);

  if (myproc == 0) {
    std::cout << "Initial Residual = "<< normr << std::endl;
  }
  {

    //Matrix upload needs to happen before vector, otherwise it crashes
    NVAMG_SAFE_CALL(NVAMG_matrix_upload_all(matrix,A.row_offsets.size()-1, A.packed_coefs.size(),1,1, &A.row_offsets[0],&A.packed_cols[0],&A.packed_coefs[0], NULL));
    NVAMG_SAFE_CALL(NVAMG_vector_upload(soln, p.coefs.size(), 1, &p.coefs[0]));
    NVAMG_SAFE_CALL(NVAMG_vector_upload(rhs, b.coefs.size(), 1, &b.coefs[0]));

    int n = 0;
    int bsize_x = 0, bsize_y = 0;

    NVAMG_SAFE_CALL(NVAMG_solver_setup(solver, matrix));
    NVAMG_SAFE_CALL(NVAMG_solver_solve(solver, rhs, soln));
    NVAMG_SAFE_CALL(NVAMG_vector_download(soln, &x.coefs[0]));

    int niter;
    NVAMG_SAFE_CALL(NVAMG_solver_get_iterations_number(solver, &niter));

    TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);
    TICK();
    matvec(A, p, Ap);
    TOCK(tMATVEC);

    TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

    TICK(); rtrans = dot_r2(r); TOCK(tDOT);

    normr = std::sqrt(rtrans);

    if (myproc == 0) {
      std::cout << "Final Residual = "<< normr << " after " << niter << " iterations" << std::endl;
    }
   }

  my_cg_times[WAXPY] = tWAXPY;
  my_cg_times[DOT] = tDOT;
  my_cg_times[MATVEC] = tMATVEC;
  my_cg_times[MATVECDOT] = tMATVECDOT;
  my_cg_times[TOTAL] = mytimer() - total_time;
}
예제 #22
0
파일: cg_solve.hpp 프로젝트: Mantevo/miniFE
void
cg_solve(OperatorType& A,
         const VectorType& b,
         VectorType& x,
         Matvec matvec,
         typename OperatorType::LocalOrdinalType max_iter,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
         typename OperatorType::LocalOrdinalType& num_iters,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
         timer_type* my_cg_times)
{
  typedef typename OperatorType::ScalarType ScalarType;
  typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
  typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
  typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;

  timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
  timer_type total_time = mytimer();

  int myproc = 0;
#ifdef HAVE_MPI
  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
#endif

  if (!A.has_local_indices) {
    std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
       << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
       << std::endl;
    return;
  }

  size_t nrows = A.rows.size();
  LocalOrdinalType ncols = A.num_cols;

  VectorType r(b.startIndex, nrows, 256);
  VectorType p(0, ncols, 512);
  VectorType Ap(b.startIndex, nrows, 64);

  normr = 0;
  magnitude_type rtrans = 0;
  magnitude_type oldrtrans = 0;

  LocalOrdinalType print_freq = max_iter/10;
  if (print_freq>50) print_freq = 50;
  if (print_freq<1)  print_freq = 1;

  ScalarType one = 1.0;
  ScalarType zero = 0.0;

  TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);

//  print_vec(p.coefs, "p");

  TICK();
  matvec(A, p, Ap);
  TOCK(tMATVEC);

  TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

  TICK(); rtrans = dot_r2(r); TOCK(tDOT);

//std::cout << "rtrans="<<rtrans<<std::endl;

  normr = std::sqrt(rtrans);

  if (myproc == 0) {
    std::cout << "Initial Residual = "<< normr << std::endl;
  }

  magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();

#ifdef MINIFE_DEBUG
  std::ostream& os = outstream();
  os << "brkdown_tol = " << brkdown_tol << std::endl;
#endif

#ifdef MINIFE_DEBUG_OPENMP
  std::cout << "Starting CG Solve Phase..." << std::endl;
#endif

  for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
    if (k == 1) {
      //TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY);
	TICK(); daxpby(one, r, zero, p); TOCK(tWAXPY);
    }
    else {
      oldrtrans = rtrans;
      TICK(); rtrans = dot_r2(r); TOCK(tDOT);
      const magnitude_type beta = rtrans/oldrtrans;
      TICK(); daxpby(one, r, beta, p); TOCK(tWAXPY);
    }

    normr = sqrt(rtrans);

    if (myproc == 0 && (k%print_freq==0 || k==max_iter)) {
      std::cout << "Iteration = "<<k<<"   Residual = "<<normr<<std::endl;
    }

    magnitude_type alpha = 0;
    magnitude_type p_ap_dot = 0;

    TICK(); matvec(A, p, Ap); TOCK(tMATVEC);
    TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT);

#ifdef MINIFE_DEBUG
    os << "iter " << k << ", p_ap_dot = " << p_ap_dot;
    os.flush();
#endif
    if (p_ap_dot < brkdown_tol) {
      if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) {
        std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
#ifdef MINIFE_DEBUG
        os << "ERROR, numerical breakdown!"<<std::endl;
#endif
        //update the timers before jumping out.
        my_cg_times[WAXPY] = tWAXPY;
        my_cg_times[DOT] = tDOT;
        my_cg_times[MATVEC] = tMATVEC;
        my_cg_times[TOTAL] = mytimer() - total_time;
        return;
      }
      else brkdown_tol = 0.1 * p_ap_dot;
    }
    alpha = rtrans/p_ap_dot;
#ifdef MINIFE_DEBUG
    os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl;
#endif

    TICK(); daxpby(alpha, p, one, x);
            daxpby(-alpha, Ap, one, r); TOCK(tWAXPY);

    num_iters = k;
  }

  my_cg_times[WAXPY] = tWAXPY;
  my_cg_times[DOT] = tDOT;
  my_cg_times[MATVEC] = tMATVEC;
  my_cg_times[MATVECDOT] = tMATVECDOT;
  my_cg_times[TOTAL] = mytimer() - total_time;
}
예제 #23
0
void MainController::run()
{
    while(!pangolin::ShouldQuit() && !((!logReader->hasMore()) && quiet) && !(eFusion->getTick() == end && quiet))
    {
        if(!gui->pause->Get() || pangolin::Pushed(*gui->step))
        {
            if((logReader->hasMore() || rewind) && eFusion->getTick() < end)
            {
                TICK("LogRead");
                if(rewind)
                {
                    if(!logReader->hasMore())
                    {
                        logReader->getBack();
                    }
                    else
                    {
                        logReader->getNext();
                    }

                    if(logReader->rewound())
                    {
                        logReader->currentFrame = 0;
                    }
                }
                else
                {
                    logReader->getNext();
                }
                TOCK("LogRead");

                if(eFusion->getTick() < start)
                {
                    eFusion->setTick(start);
                    logReader->fastForward(start);
                }

                float weightMultiplier = framesToSkip + 1;

                if(framesToSkip > 0)
                {
                    eFusion->setTick(eFusion->getTick() + framesToSkip);
                    logReader->fastForward(logReader->currentFrame + framesToSkip);
                    framesToSkip = 0;
                }

                Eigen::Matrix4f * currentPose = 0;

                if(groundTruthOdometry)
                {
                    currentPose = new Eigen::Matrix4f;
                    currentPose->setIdentity();
                    *currentPose = groundTruthOdometry->getIncrementalTransformation(logReader->timestamp);
                }

                eFusion->processFrame(logReader->rgb, logReader->depth, logReader->timestamp, currentPose, weightMultiplier);

                if(currentPose)
                {
                    delete currentPose;
                }

                if(frameskip && Stopwatch::getInstance().getTimings().at("Run") > 1000.f / 30.f)
                {
                    framesToSkip = int(Stopwatch::getInstance().getTimings().at("Run") / (1000.f / 30.f));
                }
            }
        }
        else
        {
            eFusion->predict();
        }

        TICK("GUI");

        if(gui->followPose->Get())
        {
            pangolin::OpenGlMatrix mv;

            Eigen::Matrix4f currPose = eFusion->getCurrPose();
            Eigen::Matrix3f currRot = currPose.topLeftCorner(3, 3);

            Eigen::Quaternionf currQuat(currRot);
            Eigen::Vector3f forwardVector(0, 0, 1);
            Eigen::Vector3f upVector(0, iclnuim ? 1 : -1, 0);

            Eigen::Vector3f forward = (currQuat * forwardVector).normalized();
            Eigen::Vector3f up = (currQuat * upVector).normalized();

            Eigen::Vector3f eye(currPose(0, 3), currPose(1, 3), currPose(2, 3));

            eye -= forward;

            Eigen::Vector3f at = eye + forward;

            Eigen::Vector3f z = (eye - at).normalized();  // Forward
            Eigen::Vector3f x = up.cross(z).normalized(); // Right
            Eigen::Vector3f y = z.cross(x);

            Eigen::Matrix4d m;
            m << x(0),  x(1),  x(2),  -(x.dot(eye)),
                 y(0),  y(1),  y(2),  -(y.dot(eye)),
                 z(0),  z(1),  z(2),  -(z.dot(eye)),
                    0,     0,     0,              1;

            memcpy(&mv.m[0], m.data(), sizeof(Eigen::Matrix4d));

            gui->s_cam.SetModelViewMatrix(mv);
        }

        gui->preCall();

        std::stringstream stri;
        stri << eFusion->getModelToModel().lastICPCount;
        gui->trackInliers->Ref().Set(stri.str());

        std::stringstream stre;
        stre << (isnan(eFusion->getModelToModel().lastICPError) ? 0 : eFusion->getModelToModel().lastICPError);
        gui->trackRes->Ref().Set(stre.str());

        if(!gui->pause->Get())
        {
            gui->resLog.Log((isnan(eFusion->getModelToModel().lastICPError) ? std::numeric_limits<float>::max() : eFusion->getModelToModel().lastICPError), icpErrThresh);
            gui->inLog.Log(eFusion->getModelToModel().lastICPCount, icpCountThresh);
        }

        Eigen::Matrix4f pose = eFusion->getCurrPose();

        if(gui->drawRawCloud->Get() || gui->drawFilteredCloud->Get())
        {
            eFusion->computeFeedbackBuffers();
        }

        if(gui->drawRawCloud->Get())
        {
            eFusion->getFeedbackBuffers().at(FeedbackBuffer::RAW)->render(gui->s_cam.GetProjectionModelViewMatrix(), pose, gui->drawNormals->Get(), gui->drawColors->Get());
        }

        if(gui->drawFilteredCloud->Get())
        {
            eFusion->getFeedbackBuffers().at(FeedbackBuffer::FILTERED)->render(gui->s_cam.GetProjectionModelViewMatrix(), pose, gui->drawNormals->Get(), gui->drawColors->Get());
        }

        if(gui->drawGlobalModel->Get())
        {
            glFinish();
            TICK("Global");

            if(gui->drawFxaa->Get())
            {
                gui->drawFXAA(gui->s_cam.GetProjectionModelViewMatrix(),
                              gui->s_cam.GetModelViewMatrix(),
                              eFusion->getGlobalModel().model(),
                              eFusion->getConfidenceThreshold(),
                              eFusion->getTick(),
                              eFusion->getTimeDelta(),
                              iclnuim);
            }
            else
            {
                eFusion->getGlobalModel().renderPointCloud(gui->s_cam.GetProjectionModelViewMatrix(),
                                                           eFusion->getConfidenceThreshold(),
                                                           gui->drawUnstable->Get(),
                                                           gui->drawNormals->Get(),
                                                           gui->drawColors->Get(),
                                                           gui->drawPoints->Get(),
                                                           gui->drawWindow->Get(),
                                                           gui->drawTimes->Get(),
                                                           eFusion->getTick(),
                                                           eFusion->getTimeDelta());
            }
            glFinish();
            TOCK("Global");
        }

        if(eFusion->getLost())
        {
            glColor3f(1, 1, 0);
        }
        else
        {
            glColor3f(1, 0, 1);
        }
        gui->drawFrustum(pose);
        glColor3f(1, 1, 1);

        if(gui->drawFerns->Get())
        {
            glColor3f(0, 0, 0);
            for(size_t i = 0; i < eFusion->getFerns().frames.size(); i++)
            {
                if((int)i == eFusion->getFerns().lastClosest)
                    continue;

                gui->drawFrustum(eFusion->getFerns().frames.at(i)->pose);
            }
            glColor3f(1, 1, 1);
        }

        if(gui->drawDefGraph->Get())
        {
            const std::vector<GraphNode*> & graph = eFusion->getLocalDeformation().getGraph();

            for(size_t i = 0; i < graph.size(); i++)
            {
                pangolin::glDrawCross(graph.at(i)->position(0),
                                      graph.at(i)->position(1),
                                      graph.at(i)->position(2),
                                      0.1);

                for(size_t j = 0; j < graph.at(i)->neighbours.size(); j++)
                {
                    pangolin::glDrawLine(graph.at(i)->position(0),
                                         graph.at(i)->position(1),
                                         graph.at(i)->position(2),
                                         graph.at(graph.at(i)->neighbours.at(j))->position(0),
                                         graph.at(graph.at(i)->neighbours.at(j))->position(1),
                                         graph.at(graph.at(i)->neighbours.at(j))->position(2));
                }
            }
        }

        if(eFusion->getFerns().lastClosest != -1)
        {
            glColor3f(1, 0, 0);
            gui->drawFrustum(eFusion->getFerns().frames.at(eFusion->getFerns().lastClosest)->pose);
            glColor3f(1, 1, 1);
        }

        const std::vector<PoseMatch> & poseMatches = eFusion->getPoseMatches();

        int maxDiff = 0;
        for(size_t i = 0; i < poseMatches.size(); i++)
        {
            if(poseMatches.at(i).secondId - poseMatches.at(i).firstId > maxDiff)
            {
                maxDiff = poseMatches.at(i).secondId - poseMatches.at(i).firstId;
            }
        }

        for(size_t i = 0; i < poseMatches.size(); i++)
        {
            if(gui->drawDeforms->Get())
            {
                if(poseMatches.at(i).fern)
                {
                    glColor3f(1, 0, 0);
                }
                else
                {
                    glColor3f(0, 1, 0);
                }
                for(size_t j = 0; j < poseMatches.at(i).constraints.size(); j++)
                {
                    pangolin::glDrawLine(poseMatches.at(i).constraints.at(j).sourcePoint(0), poseMatches.at(i).constraints.at(j).sourcePoint(1), poseMatches.at(i).constraints.at(j).sourcePoint(2),
                                         poseMatches.at(i).constraints.at(j).targetPoint(0), poseMatches.at(i).constraints.at(j).targetPoint(1), poseMatches.at(i).constraints.at(j).targetPoint(2));
                }
            }
        }
        glColor3f(1, 1, 1);

        eFusion->normaliseDepth(0.3f, gui->depthCutoff->Get());

        for(std::map<std::string, GPUTexture*>::const_iterator it = eFusion->getTextures().begin(); it != eFusion->getTextures().end(); ++it)
        {
            if(it->second->draw)
            {
                gui->displayImg(it->first, it->second);
            }
        }

        eFusion->getIndexMap().renderDepth(gui->depthCutoff->Get());

        gui->displayImg("ModelImg", eFusion->getIndexMap().imageTex());
        gui->displayImg("Model", eFusion->getIndexMap().drawTex());

        std::stringstream strs;
        strs << eFusion->getGlobalModel().lastCount();

        gui->totalPoints->operator=(strs.str());

        std::stringstream strs2;
        strs2 << eFusion->getLocalDeformation().getGraph().size();

        gui->totalNodes->operator=(strs2.str());

        std::stringstream strs3;
        strs3 << eFusion->getFerns().frames.size();

        gui->totalFerns->operator=(strs3.str());

        std::stringstream strs4;
        strs4 << eFusion->getDeforms();

        gui->totalDefs->operator=(strs4.str());

        std::stringstream strs5;
        strs5 << eFusion->getTick() << "/" << logReader->getNumFrames();

        gui->logProgress->operator=(strs5.str());

        std::stringstream strs6;
        strs6 << eFusion->getFernDeforms();

        gui->totalFernDefs->operator=(strs6.str());

        gui->postCall();

        logReader->flipColors = gui->flipColors->Get();
        eFusion->setRgbOnly(gui->rgbOnly->Get());
        eFusion->setPyramid(gui->pyramid->Get());
        eFusion->setFastOdom(gui->fastOdom->Get());
        eFusion->setConfidenceThreshold(gui->confidenceThreshold->Get());
        eFusion->setDepthCutoff(gui->depthCutoff->Get());
        eFusion->setIcpWeight(gui->icpWeight->Get());
        eFusion->setSo3(gui->so3->Get());
        eFusion->setFrameToFrameRGB(gui->frameToFrameRGB->Get());

        resetButton = pangolin::Pushed(*gui->reset);

        if(gui->autoSettings)
        {
            static bool last = gui->autoSettings->Get();

            if(gui->autoSettings->Get() != last)
            {
                last = gui->autoSettings->Get();
                static_cast<LiveLogReader *>(logReader)->setAuto(last);
            }
        }

        Stopwatch::getInstance().sendAll();

        if(resetButton)
        {
            break;
        }

        if(pangolin::Pushed(*gui->save))
        {
            eFusion->savePly();
        }

        TOCK("GUI");
    }
}
예제 #24
0
void RGBDOdometry::getIncrementalTransformation(Eigen::Vector3f & trans,
                                                Eigen::Matrix<float, 3, 3, Eigen::RowMajor> & rot,
                                                const bool & rgbOnly,
                                                const float & icpWeight,
                                                const bool & pyramid,
                                                const bool & fastOdom,
                                                const bool & so3)
{
    bool icp = !rgbOnly && icpWeight > 0;
    bool rgb = rgbOnly || icpWeight < 100;

    Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rprev = rot;
    Eigen::Vector3f tprev = trans;

    Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rcurr = Rprev;
    Eigen::Vector3f tcurr = tprev;

    if(rgb)
    {
        for(int i = 0; i < NUM_PYRS; i++)
        {
            computeDerivativeImages(nextImage[i], nextdIdx[i], nextdIdy[i]);
        }
    }

    Eigen::Matrix<double, 3, 3, Eigen::RowMajor> resultR = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Identity();

    if(so3)
    {
        int pyramidLevel = 2;

        Eigen::Matrix<float, 3, 3, Eigen::RowMajor> R_lr = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>::Identity();

        Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Zero();

        K(0, 0) = intr(pyramidLevel).fx;
        K(1, 1) = intr(pyramidLevel).fy;
        K(0, 2) = intr(pyramidLevel).cx;
        K(1, 2) = intr(pyramidLevel).cy;
        K(2, 2) = 1;

        float lastError = std::numeric_limits<float>::max() / 2;
        float lastCount = std::numeric_limits<float>::max() / 2;

        Eigen::Matrix<double, 3, 3, Eigen::RowMajor> lastResultR = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Identity();

        for(int i = 0; i < 10; i++)
        {
            Eigen::Matrix<float, 3, 3, Eigen::RowMajor> jtj;
            Eigen::Matrix<float, 3, 1> jtr;

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> homography = K * resultR * K.inverse();

            mat33 imageBasis;
            memcpy(&imageBasis.data[0], homography.cast<float>().eval().data(), sizeof(mat33));

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K_inv = K.inverse();
            mat33 kinv;
            memcpy(&kinv.data[0], K_inv.cast<float>().eval().data(), sizeof(mat33));

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K_R_lr = K * resultR;
            mat33 krlr;
            memcpy(&krlr.data[0], K_R_lr.cast<float>().eval().data(), sizeof(mat33));

            float residual[2];

            TICK("so3Step");
            so3Step(lastNextImage[pyramidLevel],
                    nextImage[pyramidLevel],
                    imageBasis,
                    kinv,
                    krlr,
                    sumDataSO3,
                    outDataSO3,
                    jtj.data(),
                    jtr.data(),
                    &residual[0],
                    GPUConfig::getInstance().so3StepThreads,
                    GPUConfig::getInstance().so3StepBlocks);
            TOCK("so3Step");

            lastSO3Error = sqrt(residual[0]) / residual[1];
            lastSO3Count = residual[1];

            //Converged
            if(lastSO3Error < lastError && lastCount == lastSO3Count)
            {
                break;
            }
            else if(lastSO3Error > lastError + 0.001) //Diverging
            {
                lastSO3Error = lastError;
                lastSO3Count = lastCount;
                resultR = lastResultR;
                break;
            }

            lastError = lastSO3Error;
            lastCount = lastSO3Count;
            lastResultR = resultR;

            Eigen::Vector3f delta = jtj.ldlt().solve(jtr);

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> rotUpdate = OdometryProvider::rodrigues(delta.cast<double>());

            R_lr = rotUpdate.cast<float>() * R_lr;

            for(int x = 0; x < 3; x++)
            {
                for(int y = 0; y < 3; y++)
                {
                    resultR(x, y) = R_lr(x, y);
                }
            }
        }
    }

    iterations[0] = fastOdom ? 3 : 10;
    iterations[1] = pyramid ? 5 : 0;
    iterations[2] = pyramid ? 4 : 0;

    Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rprev_inv = Rprev.inverse();
    mat33 device_Rprev_inv = Rprev_inv;
    float3 device_tprev = *reinterpret_cast<float3*>(tprev.data());

    Eigen::Matrix<double, 4, 4, Eigen::RowMajor> resultRt = Eigen::Matrix<double, 4, 4, Eigen::RowMajor>::Identity();

    if(so3)
    {
        for(int x = 0; x < 3; x++)
        {
            for(int y = 0; y < 3; y++)
            {
                resultRt(x, y) = resultR(x, y);
            }
        }
    }

    for(int i = NUM_PYRS - 1; i >= 0; i--)
    {
        if(rgb)
        {
            projectToPointCloud(lastDepth[i], pointClouds[i], intr, i);
        }

        Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Zero();

        K(0, 0) = intr(i).fx;
        K(1, 1) = intr(i).fy;
        K(0, 2) = intr(i).cx;
        K(1, 2) = intr(i).cy;
        K(2, 2) = 1;

        lastRGBError = std::numeric_limits<float>::max();

        for(int j = 0; j < iterations[i]; j++)
        {
            Eigen::Matrix<double, 4, 4, Eigen::RowMajor> Rt = resultRt.inverse();

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> R = Rt.topLeftCorner(3, 3);

            Eigen::Matrix<double, 3, 3, Eigen::RowMajor> KRK_inv = K * R * K.inverse();
            mat33 krkInv;
            memcpy(&krkInv.data[0], KRK_inv.cast<float>().eval().data(), sizeof(mat33));

            Eigen::Vector3d Kt = Rt.topRightCorner(3, 1);
            Kt = K * Kt;
            float3 kt = {(float)Kt(0), (float)Kt(1), (float)Kt(2)};

            int sigma = 0;
            int rgbSize = 0;

            if(rgb)
            {
                TICK("computeRgbResidual");
                computeRgbResidual(pow(minimumGradientMagnitudes[i], 2.0) / pow(sobelScale, 2.0),
                                   nextdIdx[i],
                                   nextdIdy[i],
                                   lastDepth[i],
                                   nextDepth[i],
                                   lastImage[i],
                                   nextImage[i],
                                   corresImg[i],
                                   sumResidualRGB,
                                   maxDepthDeltaRGB,
                                   kt,
                                   krkInv,
                                   sigma,
                                   rgbSize,
                                   GPUConfig::getInstance().rgbResThreads,
                                   GPUConfig::getInstance().rgbResBlocks);
                TOCK("computeRgbResidual");
            }

            float sigmaVal = std::sqrt((float)sigma / rgbSize == 0 ? 1 : rgbSize);
            float rgbError = std::sqrt(sigma) / (rgbSize == 0 ? 1 : rgbSize);

            if(rgbOnly && rgbError > lastRGBError)
            {
                break;
            }

            lastRGBError = rgbError;
            lastRGBCount = rgbSize;

            if(rgbOnly)
            {
                sigmaVal = -1; //Signals the internal optimisation to weight evenly
            }

            Eigen::Matrix<float, 6, 6, Eigen::RowMajor> A_icp;
            Eigen::Matrix<float, 6, 1> b_icp;

            mat33 device_Rcurr = Rcurr;
            float3 device_tcurr = *reinterpret_cast<float3*>(tcurr.data());

            DeviceArray2D<float>& vmap_curr = vmaps_curr_[i];
            DeviceArray2D<float>& nmap_curr = nmaps_curr_[i];

            DeviceArray2D<float>& vmap_g_prev = vmaps_g_prev_[i];
            DeviceArray2D<float>& nmap_g_prev = nmaps_g_prev_[i];

            float residual[2];

            if(icp)
            {
                TICK("icpStep");
                icpStep(device_Rcurr,
                        device_tcurr,
                        vmap_curr,
                        nmap_curr,
                        device_Rprev_inv,
                        device_tprev,
                        intr(i),
                        vmap_g_prev,
                        nmap_g_prev,
                        distThres_,
                        angleThres_,
                        sumDataSE3,
                        outDataSE3,
                        A_icp.data(),
                        b_icp.data(),
                        &residual[0],
                        GPUConfig::getInstance().icpStepThreads,
                        GPUConfig::getInstance().icpStepBlocks);
                TOCK("icpStep");
            }

            lastICPError = sqrt(residual[0]) / residual[1];
            lastICPCount = residual[1];

            Eigen::Matrix<float, 6, 6, Eigen::RowMajor> A_rgbd;
            Eigen::Matrix<float, 6, 1> b_rgbd;

            if(rgb)
            {
                TICK("rgbStep");
                rgbStep(corresImg[i],
                        sigmaVal,
                        pointClouds[i],
                        intr(i).fx,
                        intr(i).fy,
                        nextdIdx[i],
                        nextdIdy[i],
                        sobelScale,
                        sumDataSE3,
                        outDataSE3,
                        A_rgbd.data(),
                        b_rgbd.data(),
                        GPUConfig::getInstance().rgbStepThreads,
                        GPUConfig::getInstance().rgbStepBlocks);
                TOCK("rgbStep");
            }

            Eigen::Matrix<double, 6, 1> result;
            Eigen::Matrix<double, 6, 6, Eigen::RowMajor> dA_rgbd = A_rgbd.cast<double>();
            Eigen::Matrix<double, 6, 6, Eigen::RowMajor> dA_icp = A_icp.cast<double>();
            Eigen::Matrix<double, 6, 1> db_rgbd = b_rgbd.cast<double>();
            Eigen::Matrix<double, 6, 1> db_icp = b_icp.cast<double>();

            if(icp && rgb)
            {
                double w = icpWeight;
                lastA = dA_rgbd + w * w * dA_icp;
                lastb = db_rgbd + w * db_icp;
                result = lastA.ldlt().solve(lastb);
            }
            else if(icp)
            {
                lastA = dA_icp;
                lastb = db_icp;
                result = lastA.ldlt().solve(lastb);
            }
            else if(rgb)
            {
                lastA = dA_rgbd;
                lastb = db_rgbd;
                result = lastA.ldlt().solve(lastb);
            }
            else
            {
                assert(false && "Control shouldn't reach here");
            }

            Eigen::Isometry3f rgbOdom;

            OdometryProvider::computeUpdateSE3(resultRt, result, rgbOdom);

            Eigen::Isometry3f currentT;
            currentT.setIdentity();
            currentT.rotate(Rprev);
            currentT.translation() = tprev;

            currentT = currentT * rgbOdom.inverse();

            tcurr = currentT.translation();
            Rcurr = currentT.rotation();
        }
    }

    if(rgb && (tcurr - tprev).norm() > 0.3)
    {
        Rcurr = Rprev;
        tcurr = tprev;
    }

    if(so3)
    {
        for(int i = 0; i < NUM_PYRS; i++)
        {
            std::swap(lastNextImage[i], nextImage[i]);
        }
    }

    trans = tcurr;
    rot = Rcurr;
}
예제 #25
0
void GlobalModel::clean(const Eigen::Matrix4f & pose,
                        const int & time,
                        GPUTexture * indexMap,
                        GPUTexture * vertConfMap,
                        GPUTexture * colorTimeMap,
                        GPUTexture * normRadMap,
                        GPUTexture * depthMap,
                        const float confThreshold,
                        std::vector<float> & graph,
                        const int timeDelta,
                        const float maxDepth,
                        const bool isFern)
{
    assert(graph.size() / 16 < MAX_NODES);

    if(graph.size() > 0)
    {
        //Can be optimised by only uploading new nodes with offset
        glBindTexture(GL_TEXTURE_2D, deformationNodes.texture->tid);
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, graph.size(), 1, GL_LUMINANCE, GL_FLOAT, graph.data());
    }

    TICK("Fuse::Copy");
    //Next we copy the new unstable vertices from the newUnstableFid transform feedback into the global map
    unstableProgram->Bind();
    unstableProgram->setUniform(Uniform("time", time));
    unstableProgram->setUniform(Uniform("confThreshold", confThreshold));
    unstableProgram->setUniform(Uniform("scale", (float)IndexMap::FACTOR));
    unstableProgram->setUniform(Uniform("indexSampler", 0));
    unstableProgram->setUniform(Uniform("vertConfSampler", 1));
    unstableProgram->setUniform(Uniform("colorTimeSampler", 2));
    unstableProgram->setUniform(Uniform("normRadSampler", 3));
    unstableProgram->setUniform(Uniform("nodeSampler", 4));
    unstableProgram->setUniform(Uniform("depthSampler", 5));
    unstableProgram->setUniform(Uniform("nodes", (float)(graph.size() / 16)));
    unstableProgram->setUniform(Uniform("nodeCols", (float)NODE_TEXTURE_DIMENSION));
    unstableProgram->setUniform(Uniform("timeDelta", timeDelta));
    unstableProgram->setUniform(Uniform("maxDepth", maxDepth));
    unstableProgram->setUniform(Uniform("isFern", (int)isFern));

    Eigen::Matrix4f t_inv = pose.inverse();
    unstableProgram->setUniform(Uniform("t_inv", t_inv));

    unstableProgram->setUniform(Uniform("cam", Eigen::Vector4f(Intrinsics::getInstance().cx(),
                                                         Intrinsics::getInstance().cy(),
                                                         Intrinsics::getInstance().fx(),
                                                         Intrinsics::getInstance().fy())));
    unstableProgram->setUniform(Uniform("cols", (float)Resolution::getInstance().cols()));
    unstableProgram->setUniform(Uniform("rows", (float)Resolution::getInstance().rows()));

    glBindBuffer(GL_ARRAY_BUFFER, vbos[target].first);

    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0);

    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f)));

    glEnableVertexAttribArray(2);
    glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2));

    glEnable(GL_RASTERIZER_DISCARD);

    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, vbos[renderSource].second);

    glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbos[renderSource].first);

    glBeginTransformFeedback(GL_POINTS);

    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, indexMap->texture->tid);

    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, vertConfMap->texture->tid);

    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, colorTimeMap->texture->tid);

    glActiveTexture(GL_TEXTURE3);
    glBindTexture(GL_TEXTURE_2D, normRadMap->texture->tid);

    glActiveTexture(GL_TEXTURE4);
    glBindTexture(GL_TEXTURE_2D, deformationNodes.texture->tid);

    glActiveTexture(GL_TEXTURE5);
    glBindTexture(GL_TEXTURE_2D, depthMap->texture->tid);

    glBeginQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, countQuery);

    glDrawTransformFeedback(GL_POINTS, vbos[target].second);

    glBindBuffer(GL_ARRAY_BUFFER, newUnstableVbo);

    glEnableVertexAttribArray(0);
    glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0);

    glEnableVertexAttribArray(1);
    glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f)));

    glEnableVertexAttribArray(2);
    glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2));

    glDrawTransformFeedback(GL_POINTS, newUnstableFid);

    glEndQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN);

    glGetQueryObjectuiv(countQuery, GL_QUERY_RESULT, &count);

    glEndTransformFeedback();

    glDisable(GL_RASTERIZER_DISCARD);

    glBindTexture(GL_TEXTURE_2D, 0);
    glActiveTexture(GL_TEXTURE0);

    glDisableVertexAttribArray(0);
    glDisableVertexAttribArray(1);
    glDisableVertexAttribArray(2);
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);

    unstableProgram->Unbind();

    std::swap(target, renderSource);

    glFinish();
    TOCK("Fuse::Copy");
}
예제 #26
0
파일: cg_solve.hpp 프로젝트: Mantevo/miniFE
void
cg_solve(OperatorType& A,
         const VectorType& b,
         VectorType& x,
         Matvec matvec,
         typename OperatorType::LocalOrdinalType max_iter,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance,
         typename OperatorType::LocalOrdinalType& num_iters,
         typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr,
         timer_type* my_cg_times)
{
  typedef typename OperatorType::ScalarType ScalarType;
  typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType;
  typedef typename OperatorType::LocalOrdinalType LocalOrdinalType;
  typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type;

  timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0;
  timer_type total_time = mytimer();

  int myproc = 0;
#ifdef HAVE_MPI
  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
#endif

  if (!A.has_local_indices) {
    std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means "
       << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve."
       << std::endl;
    return;
  }

  size_t nrows = A.rows.size();
  LocalOrdinalType ncols = A.num_cols;

  nvtxRangeId_t r1=nvtxRangeStartA("Allocation of Temporary Vectors");
  VectorType r(b.startIndex, nrows);
  VectorType p(0, ncols);
  VectorType Ap(b.startIndex, nrows);
  nvtxRangeEnd(r1);

#ifdef HAVE_MPI
#ifndef GPUDIRECT
  //TODO move outside?
  cudaHostRegister(&p.coefs[0],ncols*sizeof(typename VectorType::ScalarType),0);
  cudaCheckError();
  if(A.send_buffer.size()>0) cudaHostRegister(&A.send_buffer[0],A.send_buffer.size()*sizeof(typename VectorType::ScalarType),0);
  cudaCheckError();
#endif
#endif

  normr = 0;
  magnitude_type rtrans = 0;
  magnitude_type oldrtrans = 0;

  LocalOrdinalType print_freq = max_iter/10;
  if (print_freq>50) print_freq = 50;
  if (print_freq<1)  print_freq = 1;

  ScalarType one = 1.0;
  ScalarType zero = 0.0;

  TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY);

  TICK();
  matvec(A, p, Ap);
  TOCK(tMATVEC);

  TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY);

  TICK(); rtrans = dot(r, r); TOCK(tDOT);

  normr = std::sqrt(rtrans);

  if (myproc == 0) {
    std::cout << "Initial Residual = "<< normr << std::endl;
  }

  magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();

#ifdef MINIFE_DEBUG
  std::ostream& os = outstream();
  os << "brkdown_tol = " << brkdown_tol << std::endl;
#endif

  for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) {
    if (k == 1) {
      TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY);
    }
    else {
      oldrtrans = rtrans;
      TICK(); rtrans = dot(r, r); TOCK(tDOT);
      magnitude_type beta = rtrans/oldrtrans;
      TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY);
    }

    normr = std::sqrt(rtrans);

    if (myproc == 0 && (k%print_freq==0 || k==max_iter)) {
      std::cout << "Iteration = "<<k<<"   Residual = "<<normr<<std::endl;
    }

    magnitude_type alpha = 0;
    magnitude_type p_ap_dot = 0;

    TICK(); matvec(A, p, Ap); TOCK(tMATVEC);

    TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT);

#ifdef MINIFE_DEBUG
    os << "iter " << k << ", p_ap_dot = " << p_ap_dot;
    os.flush();
#endif
    //TODO remove false below
    if (false && p_ap_dot < brkdown_tol) {
      if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) {
        std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl;
#ifdef MINIFE_DEBUG
        os << "ERROR, numerical breakdown!"<<std::endl;
#endif
        //update the timers before jumping out.
        my_cg_times[WAXPY] = tWAXPY;
        my_cg_times[DOT] = tDOT;
        my_cg_times[MATVEC] = tMATVEC;
        my_cg_times[TOTAL] = mytimer() - total_time;
        return;
      }
      else brkdown_tol = 0.1 * p_ap_dot;
    }
    alpha = rtrans/p_ap_dot;
#ifdef MINIFE_DEBUG
    os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl;
#endif

    TICK(); waxpby(one, x, alpha, p, x);
            waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY);
    num_iters = k;
  }
  
#ifdef HAVE_MPI
#ifndef GPUDIRECT
  //TODO move outside?
  cudaHostUnregister(&p.coefs[0]);
  cudaCheckError();
  if(A.send_buffer.size()>0) cudaHostUnregister(&A.send_buffer[0]);
  cudaCheckError();
#endif
#endif

  my_cg_times[WAXPY] = tWAXPY;
  my_cg_times[DOT] = tDOT;
  my_cg_times[MATVEC] = tMATVEC;
  my_cg_times[MATVECDOT] = tMATVECDOT;
  my_cg_times[TOTAL] = mytimer() - total_time;
}