void MeshBufferReader::setMeshPyramid() { TICK("setupMeshBufferRendering"); visibilityMaskPyramid.resize(m_nNumMeshLevels); outputInfoPyramid.resize(m_nNumMeshLevels); outputPropPyramid.resize(m_nNumMeshLevels); for(int i = 0; i < m_nNumMeshLevels; ++i) { int numVertices = currentMeshPyramid.levels[i].numVertices; visibilityMaskPyramid[i].resize(numVertices,true); vector<CoordinateType> proj2D; proj2D.resize(2); proj2D[0] = 0; proj2D[1] = 0; outputInfoPyramid[i].meshData = std::move(currentMeshPyramid.levels[i]); // outputInfoPyramid[i].meshDataGT = currentMeshPyramid.levels[i]; // outputInfoPyramid[i].meshDataColorDiff = currentMeshPyramid.levels[i]; outputInfoPyramid[i].nRenderLevel = i; outputInfoPyramid[i].meshProj.resize(numVertices, proj2D); // outputInfoPyramid[i].meshProjGT = outputInfoPyramid[i].meshProj; outputInfoPyramid[i].visibilityMask.resize(numVertices, true); memset(outputInfoPyramid[i].camPose, 0, 6*sizeof(double)); UpdateRenderingData(outputInfoPyramid[i], KK, camPose, outputInfoPyramid[i].meshData); //////////////////////////// outputPropPyramid if(meshLoadingSettings.loadProp) { outputPropPyramid[i].meshData = std::move(propMeshPyramid.levels[i]); // outputPropPyramid[i].meshDataGT = propMeshPyramid.levels[i]; // outputPropPyramid[i].meshDataColorDiff = propMeshPyramid.levels[i]; outputPropPyramid[i].nRenderLevel = i; outputPropPyramid[i].meshProj.resize(numVertices, proj2D); // outputPropPyramid[i].meshProjGT = outputPropPyramid[i].meshProj; outputPropPyramid[i].visibilityMask.resize(numVertices, true); memset(outputPropPyramid[i].camPose, 0, 6*sizeof(double)); UpdateRenderingData(outputPropPyramid[i], KK, camPose, outputPropPyramid[i].meshData); } // update the visibility of each vertex if(useVisibilityMask) { TICK( "visibilityMask" + std::to_string(i) ); UpdateVisibilityMaskGL(outputInfoPyramid[i], visibilityMaskPyramid[i], KK, camPose, m_nWidth, m_nHeight); if(meshLoadingSettings.loadProp) UpdateVisibilityMaskGL(outputPropPyramid[i], visibilityMaskPyramid[i], KK, camPose, m_nWidth, m_nHeight); TOCK( "visibilityMask" + std::to_string(i) ); } } TOCK("setupMeshBufferRendering"); }
bool MainEngine::ProcessOneFrame(int nFrame) { // read input // if(!GetInput(nFrame)) // return false; // if(inputThreadGroup.size() > 0){ // inputThreadGroup.join_all(); // memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3); // inputThreadGroup.remove_thread(pInputThread); // pInputThread = inputThreadGroup.create_thread( boost::bind(&MainEngine::GetInput, this, nFrame) ); // } // // for the first frame, we have to wait // else{ // pInputThread = inputThreadGroup.create_thread( boost::bind(&MainEngine::GetInput, this, nFrame) ); // inputThreadGroup.join_all(); // memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3); // } TICK("timePerFrame"); TICK("getInput"); if(pInputThread == NULL) { pInputThread = new boost::thread(boost::bind(&MainEngine::GetInput, this, nFrame)); pInputThread->join(); memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3); } else { pInputThread->join(); memcpy(m_pColorImageRGB, m_pColorImageRGBBuffer, m_nWidth * m_nHeight * 3); delete pInputThread; pInputThread = new boost::thread(boost::bind(&MainEngine::GetInput, this, nFrame)); } TOCK("getInput"); if(!inputFlag) { cout << "getting input failure" << endl; return false; } // do tracking TICK("tracking"); if(!m_pTrackingEngine->trackFrame(nFrame, m_pColorImageRGB, &pOutputInfo)) { cout << "tracking failed: " << endl; return false; } TOCK("tracking"); TOCK("timePerFrame"); return true; }
inline static int pfile_write_unlocked(int fd, lsn_t off, const byte *dat, lsn_t len) { int error = 0; ssize_t bytes_written = 0; TICK(write_hist); while (bytes_written < len) { ssize_t count = pwrite(fd, dat + bytes_written, len - bytes_written, off + bytes_written); if (count == -1) { if (errno == EAGAIN || errno == EINTR) { // @see file.c for an explanation; basically; we ignore these, // and try again. count = 0; } else { if (errno == EBADF) { error = EBADF; } else { error = errno; } break; } } bytes_written += count; if (bytes_written != len) { DEBUG("pwrite spinning\n"); } } TOCK(write_hist); return error; }
MeshPyramidReader::MeshPyramidReader(MeshLoadingSettings& settings, int width, int height, double K[3][3], int startFrame, int numTrackingFrames): trackerInitialized(false) { m_nWidth = width; m_nHeight = height; startFrameNo = startFrame; currentFrameNo = startFrame; pCurrentColorImageRGB = new unsigned char[3*width*height]; // in this case camPose will always be zero for(int i = 0; i < 6; ++i) camPose[i] = 0; useVisibilityMask = settings.visibilityMask; setIntrinsicMatrix(K); TICK("loadingMesh"); currentMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath, settings.meshLevelFormat, currentFrameNo, settings.meshLevelList)); if(settings.loadProp) { propMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath, settings.propLevelFormat, currentFrameNo, settings.meshLevelList)); } TOCK("loadingMesh"); m_nNumMeshLevels = settings.meshLevelList.size(); }
void run_benchmarks(void) { int asizes[] = {2, 5, 10, 30, 500}; int i, j; yatrie_t trie = (yatrie_t)NULL; int set_time = 0; int get_time = 0; BENCHMARK_INIT(); for (i = 0; i < 5; i++) { int array_size = asizes[i]; /* Contiguous keys */ TICK(); for (j = 0; j < array_size; j++) trie = yatrie_insert(trie, j, j); TOCK(); set_time += benchmark_total_time * 500 / array_size; TICK(); for (j = 0; j < array_size; j++) yatrie_get(trie, j); TOCK(); get_time += benchmark_total_time * 500 / array_size; yatrie_free(trie); trie = (yatrie_t)NULL; /* Uniform keys */ srand(1234567); TICK(); for (j = 0; j < array_size; j++) trie = yatrie_insert(trie, rand(), j); TOCK(); set_time += benchmark_total_time * 500 / array_size; TICK(); for (j = 0; j < array_size; j++) yatrie_get(trie, rand()); TOCK(); get_time += benchmark_total_time * 500 / array_size; yatrie_free(trie); trie = (yatrie_t)NULL; } printf("%i %i\n", get_time, set_time); }
static int pfile_read(stasis_handle_t *h, lsn_t off, byte *buf, lsn_t len) { pfile_impl *impl = (pfile_impl*)(h->impl); int error = 0; if (off < 0) { error = EDOM; } else { ssize_t bytes_read = 0; TICK(read_hist); while (bytes_read < len) { ssize_t count = pread(impl->fd, buf + bytes_read, len - bytes_read, off + bytes_read); if (count == -1) { if (errno == EAGAIN || errno == EINTR) { count = 0; } else { if (errno == EBADF) { h->error = EBADF; } else { int err = errno; // The other errors either involve memory bugs (EFAULT), logic bugs // (EISDIR, EFIFO, EOVERFLOW), or bad hardware (EIO), so print // something to console, and uncleanly crash. perror("pfile_read encountered an unknown error code."); fprintf(stderr, "pread() returned -1; errno is %d\n",err); abort(); } error = errno; break; } } else if(count == 0) { // EOF if(bytes_read != 0) { fprintf(stderr, "short read at end of storefile. Assuming that this is due to strange recovery scenario, and continuing.\n"); } error = EDOM; break; } else { bytes_read += count; if (bytes_read != len) { DEBUG("pread spinning\n"); } } } TOCK(read_hist); assert(error || bytes_read == len); } return error; }
void MeshSequenceReader::trackerUpdate(TrackerOutputInfo& outputInfo) { TICK("visualRenderingUpdate"); UpdateRenderingData(outputInfo, KK, camPose, currentMesh); UpdateRenderingDataFast(outputInfo, KK, currentMesh); if(useVisibilityMask) { UpdateVisibilityMaskGL(outputInfo, visibilityMask, KK, camPose, m_nWidth, m_nHeight); //UpdateVisibilityMask(outputInfo, visibilityMask, m_nWidth, m_nHeight); UpdateColorDiff(outputInfo, visibilityMask, colorImageSplit); } TOCK("visualRenderingUpdate"); }
bool MeshSequenceReader::setCurrentFrame(int curFrame) { if(currentFrameNo != curFrame) { currentFrameNo = curFrame; // changing new frame time TICK("setCurrentFrame"); if(!loadMesh(meshLoadingSettings.meshPath, meshLoadingSettings.meshFormat,currentFrameNo)) return false; TOCK("setCurrentFrame"); } return true; }
int main(int argc, char *argv[]) { float *u = new float[N]; float *v = new float[N]; float alpha = 2.3; double time4=0; initializeVectors(u,v); TICK(); axpyGPU(u,v,alpha,N); TOCK(time4); outputStats(time4); delete [] u; delete [] v; }
bool MeshPyramidReader::setCurrentFrame(int curFrame) { if(currentFrameNo != curFrame) { currentFrameNo = curFrame; TICK("setCurrentFrame"); if(!loadMeshPyramid(meshLoadingSettings.meshPath, meshLoadingSettings.meshLevelFormat, currentFrameNo, meshLoadingSettings.meshLevelList)) return false; TOCK("setCurrentFrame"); } return true; }
static int pfile_force_range(stasis_handle_t *h, lsn_t start, lsn_t stop) { TICK(force_range_hist); pfile_impl * impl = h->impl; #ifdef HAVE_SYNC_FILE_RANGE // stop of zero syncs to eof. DEBUG("pfile_force_range calling sync_file_range %lld %lld\n", start, stop-start); fflush(stdout); int ret = sync_file_range(impl->fd, start, stop-start, SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); if(ret) { int error = errno; assert(ret == -1); // With the possible exceptions of ENOMEM and ENOSPACE, all of the sync // errors are unrecoverable. h->error = EBADF; ret = error; } #else #ifdef HAVE_FDATASYNC DEBUG("pfile_force_range() is calling fdatasync()\n"); fdatasync(impl->fd); #else DEBUG("pfile_force_range() is calling fsync()\n"); fsync(impl->fd); #endif int ret = 0; #endif #ifdef HAVE_POSIX_FADVISE if(impl->sequential) { int err = posix_fadvise(impl->fd, start, stop-start, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL (for a range of a file) to kernel failed"); } #endif TOCK(force_range_hist); return ret; }
static int pfile_force(stasis_handle_t *h) { TICK(force_hist); pfile_impl *impl = h->impl; if(!(impl->file_flags & O_SYNC)) { #ifdef HAVE_FDATASYNC DEBUG("pfile_force() is calling fdatasync()\n"); fdatasync(impl->fd); #else DEBUG("pfile_force() is calling fsync()\n"); fsync(impl->fd); #endif } else { DEBUG("File was opened with O_SYNC. pfile_force() is a no-op\n"); } if(impl->sequential) { #ifdef HAVE_POSIX_FADVISE int err = posix_fadvise(impl->fd, 0, 0, POSIX_FADV_DONTNEED); if(err) perror("Attempt to pass POSIX_FADV_SEQUENTIAL to kernel failed"); #endif } TOCK(force_hist); return 0; }
bool MainFrame::ProcessOneFrame(int nFrame) { // if(trackingType != DEFORMNRSFM && m_pControlPanel->m_nCurrentFrame == nFrame && m_nCurrentFrame == nFrame) // return true; isTrackingFinished = false; cout << "processing frame: " << nFrame << endl; // // read input // TICK("getInput"); // if(!GetInput(nFrame)) // return false; // TOCK("getInput"); // // do tracking // TICK("tracking"); // if(!m_pTrackingEngine->trackFrame(nFrame, m_pColorImageRGB, &pOutputInfo)) // { // cout << "tracking failed: " << endl; // return false; // } // TOCK("tracking"); if(!MainEngine::ProcessOneFrame(nFrame)) return false; // update imagePanel TICK("update2DRendering"); m_pOverlayPane->updateImage(m_pColorImageRGB, m_nWidth, m_nHeight); m_pImagePane->updateImage(m_pColorImageRGB, m_nWidth, m_nHeight); TOCK("update2DRendering"); isTrackingFinished = true; Stopwatch::getInstance().printAll(); return true; }
void GlobalModel::fuse(const Eigen::Matrix4f & pose, const int & time, GPUTexture * rgb, GPUTexture * depthRaw, GPUTexture * depthFiltered, GPUTexture * indexMap, GPUTexture * vertConfMap, GPUTexture * colorTimeMap, GPUTexture * normRadMap, const float depthCutoff, const float confThreshold, const float weighting) { TICK("Fuse::Data"); //This first part does data association and computes the vertex to merge with, storing //in an array that sets which vertices to update by index frameBuffer.Bind(); glPushAttrib(GL_VIEWPORT_BIT); glViewport(0, 0, renderBuffer.width, renderBuffer.height); glClearColor(0, 0, 0, 0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); dataProgram->Bind(); dataProgram->setUniform(Uniform("cSampler", 0)); dataProgram->setUniform(Uniform("drSampler", 1)); dataProgram->setUniform(Uniform("drfSampler", 2)); dataProgram->setUniform(Uniform("indexSampler", 3)); dataProgram->setUniform(Uniform("vertConfSampler", 4)); dataProgram->setUniform(Uniform("colorTimeSampler", 5)); dataProgram->setUniform(Uniform("normRadSampler", 6)); dataProgram->setUniform(Uniform("time", (float)time)); dataProgram->setUniform(Uniform("weighting", weighting)); dataProgram->setUniform(Uniform("cam", Eigen::Vector4f(Intrinsics::getInstance().cx(), Intrinsics::getInstance().cy(), 1.0 / Intrinsics::getInstance().fx(), 1.0 / Intrinsics::getInstance().fy()))); dataProgram->setUniform(Uniform("cols", (float)Resolution::getInstance().cols())); dataProgram->setUniform(Uniform("rows", (float)Resolution::getInstance().rows())); dataProgram->setUniform(Uniform("scale", (float)IndexMap::FACTOR)); dataProgram->setUniform(Uniform("texDim", (float)TEXTURE_DIMENSION)); dataProgram->setUniform(Uniform("pose", pose)); dataProgram->setUniform(Uniform("maxDepth", depthCutoff)); glEnableVertexAttribArray(0); glBindBuffer(GL_ARRAY_BUFFER, uvo); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, 0); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, newUnstableFid); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, newUnstableVbo); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, rgb->texture->tid); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, depthRaw->texture->tid); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, depthFiltered->texture->tid); glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, indexMap->texture->tid); glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, vertConfMap->texture->tid); glActiveTexture(GL_TEXTURE5); glBindTexture(GL_TEXTURE_2D, colorTimeMap->texture->tid); glActiveTexture(GL_TEXTURE6); glBindTexture(GL_TEXTURE_2D, normRadMap->texture->tid); glBeginTransformFeedback(GL_POINTS); glDrawArrays(GL_POINTS, 0, uvSize); glEndTransformFeedback(); frameBuffer.Unbind(); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glDisableVertexAttribArray(0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); dataProgram->Unbind(); glPopAttrib(); glFinish(); TOCK("Fuse::Data"); TICK("Fuse::Update"); //Next we update the vertices at the indexes stored in the update textures //Using a transform feedback conditional on a texture sample updateProgram->Bind(); updateProgram->setUniform(Uniform("vertSamp", 0)); updateProgram->setUniform(Uniform("colorSamp", 1)); updateProgram->setUniform(Uniform("normSamp", 2)); updateProgram->setUniform(Uniform("texDim", (float)TEXTURE_DIMENSION)); updateProgram->setUniform(Uniform("time", time)); glBindBuffer(GL_ARRAY_BUFFER, vbos[target].first); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0); glEnableVertexAttribArray(1); glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f))); glEnableVertexAttribArray(2); glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2)); glEnable(GL_RASTERIZER_DISCARD); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, vbos[renderSource].second); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbos[renderSource].first); glBeginTransformFeedback(GL_POINTS); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, updateMapVertsConfs.texture->tid); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, updateMapColorsTime.texture->tid); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, updateMapNormsRadii.texture->tid); glDrawTransformFeedback(GL_POINTS, vbos[target].second); glEndTransformFeedback(); glDisable(GL_RASTERIZER_DISCARD); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); glDisableVertexAttribArray(2); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); updateProgram->Unbind(); std::swap(target, renderSource); glFinish(); TOCK("Fuse::Update"); }
void MeshSequenceReader::trackerInitSetup(TrackerOutputInfo& outputInfo) { TICK("visualRenderingInit"); outputInfo.meshData = currentMesh; outputInfo.meshDataGT = outputInfo.meshData; // get 2d projections double X,Y,Z; double u,v,w; vector<CoordinateType> proj2D, proj2DGT; proj2D.resize(2); proj2DGT.resize(2); for(int vertex = 0; vertex < currentMesh.numVertices; ++vertex) { X = currentMesh.vertices[vertex][0]; Y = currentMesh.vertices[vertex][1]; Z = currentMesh.vertices[vertex][2]; if(KK[0][2] == 0) // this is orthographic camera { proj2D[0] = X; proj2D[1] = Y; proj2DGT[0] = X; proj2DGT[1] = Y; } else { u = KK[0][0] * X + KK[0][1] * Y + KK[0][2] * Z; v = KK[1][0] * X + KK[1][1] * Y + KK[1][2] * Z; w = KK[2][0] * X + KK[2][1] * Y + KK[2][2] * Z; if(w != 0) { u = u/w; v = v/w; } proj2D[0] = u; proj2D[1] = v; proj2DGT[0] = u; proj2DGT[1] = v; } outputInfo.meshProj.push_back(proj2D); outputInfo.meshProjGT.push_back(proj2DGT); } outputInfo.visibilityMask.resize(outputInfo.meshData.numVertices,true); // update the visiblity mask if(useVisibilityMask) { UpdateVisibilityMaskGL(outputInfo, visibilityMask, KK, camPose, m_nWidth, m_nHeight); //UpdateVisibilityMask(outputInfo, visibilityMask, m_nWidth, m_nHeight); outputInfo.meshDataColorDiff = outputInfo.meshData; UpdateColorDiff(outputInfo, visibilityMask, colorImageSplit); } // camera pose is always 0 in this case for(int i = 0; i < 6; ++i) outputInfo.camPose[i] = 0; trackerInitialized = true; TOCK("visualRenderingInit"); }
/*! Routine to compute an approximate solution to Ax = b @param[in] geom The description of the problem's geometry. @param[inout] A The known system matrix @param[inout] data The data structure with all necessary CG vectors preallocated @param[in] b The known right hand side vector @param[inout] x On entry: the initial guess; on exit: the new approximate solution @param[in] max_iter The maximum number of iterations to perform, even if tolerance is not met. @param[in] tolerance The stopping criterion to assert convergence: if norm of residual is <= to tolerance. @param[out] niters The number of iterations actually performed. @param[out] normr The 2-norm of the residual vector after the last iteration. @param[out] normr0 The 2-norm of the residual vector before the first iteration. @param[out] times The 7-element vector of the timing information accumulated during all of the iterations. @param[in] doPreconditioning The flag to indicate whether the preconditioner should be invoked at each iteration. @return Returns zero on success and a non-zero value otherwise. @see CG_ref() */ int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x, const int max_iter, const double tolerance, int & niters, double & normr, double & normr0, double * times, bool doPreconditioning) { double t_begin = mytimer(); // Start timing right away normr = 0.0; double rtz = 0.0, oldrtz = 0.0, alpha = 0.0, beta = 0.0, pAp = 0.0; double t0 = 0.0, t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0, t5 = 0.0; //#ifndef HPCG_NOMPI // double t6 = 0.0; //#endif local_int_t nrow = A.localNumberOfRows; Vector & r = data.r; // Residual vector Vector & z = data.z; // Preconditioned residual vector Vector & p = data.p; // Direction vector (in MPI mode ncol>=nrow) Vector & Ap = data.Ap; if (!doPreconditioning && A.geom->rank==0) HPCG_fout << "WARNING: PERFORMING UNPRECONDITIONED ITERATIONS" << std::endl; #ifdef HPCG_DEBUG int print_freq = 1; if (print_freq>50) print_freq=50; if (print_freq<1) print_freq=1; #endif // p is of length ncols, copy x to p for sparse MV operation CopyVector(x, p); //TODO paralel TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p TICK(); ComputeWAXPBY(nrow, 1.0, b, -1.0, Ap, r, A.isWaxpbyOptimized); TOCK(t2); // r = b - Ax (x stored in p) TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1); normr = sqrt(normr); #ifdef HPCG_DEBUG if (A.geom->rank==0) HPCG_fout << "Initial Residual = "<< normr << std::endl; #endif // Record initial residual for convergence testing normr0 = normr; // Start iterations for (int k=1; k<=max_iter && normr/normr0 > tolerance; k++ ) { TICK(); if (doPreconditioning) ComputeMG(A, r, z); // Apply preconditioner else CopyVector (r, z); // copy r to z (no preconditioning) TOCK(t5); // Preconditioner apply time if (k == 1) { TICK(); ComputeWAXPBY(nrow, 1.0, z, 0.0, z, p, A.isWaxpbyOptimized); TOCK(t2); // Copy Mr to p TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z } else { oldrtz = rtz; TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z beta = rtz/oldrtz; TICK(); ComputeWAXPBY (nrow, 1.0, z, beta, p, p, A.isWaxpbyOptimized); TOCK(t2); // p = beta*p + z } TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p TICK(); ComputeDotProduct(nrow, p, Ap, pAp, t4, A.isDotProductOptimized); TOCK(t1); // alpha = p'*Ap alpha = rtz/pAp; TICK(); ComputeWAXPBY(nrow, 1.0, x, alpha, p, x, A.isWaxpbyOptimized);// x = x + alpha*p ComputeWAXPBY(nrow, 1.0, r, -alpha, Ap, r, A.isWaxpbyOptimized); TOCK(t2);// r = r - alpha*Ap TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1); normr = sqrt(normr); #ifdef HPCG_DEBUG if (A.geom->rank==0 && (k%print_freq == 0 || k == max_iter)) HPCG_fout << "Iteration = "<< k << " Scaled Residual = "<< normr/normr0 << std::endl; #endif niters = k; } // Store times times[1] += t1; // dot-product time times[2] += t2; // WAXPBY time times[3] += t3; // SPMV time times[4] += t4; // AllReduce time times[5] += t5; // preconditioner apply time //#ifndef HPCG_NOMPI // times[6] += t6; // exchange halo time //#endif times[0] += mytimer() - t_begin; // Total time. All done... return(0); }
MeshBufferReader::MeshBufferReader(MeshLoadingSettings& settings, int width, int height, double K[3][3], int startFrame, int numTrackingFrames): trackerInitialized(false) { m_nWidth = width; m_nHeight = height; startFrameNo = startFrame; currentFrameNo = startFrame; pCurrentColorImageRGB = new unsigned char[3*width*height]; // in this case camPose will always be zero for(int i = 0; i < 6; ++i) camPose[i] = 0; useVisibilityMask = settings.visibilityMask; setIntrinsicMatrix(K); nRenderingLevel = 0; m_nNumMeshLevels = settings.meshLevelList.size(); // a bit ugly nFrameStep = imageSourceSettings.frameStep; // loading meshes into buffer // outputInfoPyramidBuffer.resize(numTrackingFrames); // outputPropPyramidBuffer.resize(numTrackingFrames); int bufferSize = (numTrackingFrames - startFrameNo)/nFrameStep + 1; outputInfoPyramidBuffer.resize(bufferSize); outputPropPyramidBuffer.resize(bufferSize); m_nGoodFrames = 0; TICK("loadingMeshBuffer"); for(int i = startFrameNo; i <= numTrackingFrames; i = i + nFrameStep) { // TICK("loadingOneFrame"); if(!existenceTest(settings.meshPath, settings.meshLevelFormat, i, settings.meshLevelList)) break; ++m_nGoodFrames; currentMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath, settings.meshLevelFormat, i, settings.meshLevelList)); // TOCK("loadingOneFrame"); if(settings.loadProp) { propMeshPyramid = std::move(PangaeaMeshPyramid(settings.meshPath, settings.propLevelFormat, i, settings.meshLevelList)); } if(!settings.fastLoading) propMeshPyramid = currentMeshPyramid; // TICK("setOneFrame"); setMeshPyramid(); int bufferPos = (i-startFrameNo)/nFrameStep; outputInfoPyramidBuffer[ bufferPos ] = std::move(outputInfoPyramid); outputPropPyramidBuffer[ bufferPos ] = std::move(outputPropPyramid); // TOCK("setOneFrame"); cout << "loading frame " << i << endl; } TOCK("loadingMeshBuffer"); }
//ToDo(robin): add support for log int main(int argc, char ** argv) { CommandLineOptions options(argc, argv); Window * window = new Window(800, 600, APPLICATION_NAME); buffer points; buffer lines; int count = 1; std::vector<float> lines_buffer; std::vector<float> points_buffer; std::vector<ObjectInfo> obj_buffer; bool mouseDown = false; int mouseX = 0, mouseY = 0, mouseDX = 0, mouseDY = 0, screenSizeX = window->getSize().x, screenSizeY = window->getSize().y; float scale[2] = {1.0, 1.0}, centerX = 0.0, centerY = 0.0, mouseDXScreen = 0.0, mouseDYScreen = 0.0; float radius = 50; unsigned int samplerate = options.samplerate(); std::vector<SoundProcessor::v3> listener; std::signal(SIGTERM, terminate); std::signal(SIGINT, terminate); std::signal(SIGABRT, terminate); double distBetween = 0.42; double * mics = options.mics(); for(int i = 0; i < options.micCount(); i++) { listener.push_back(SoundProcessor::v3(mics[3 * i], mics[3 * i + 1], mics[3 * i + 2])); } std::cout << "mics: [" << std::endl; for(auto l : listener) { std::cout << "[" << l.x << ", " << l.y << ", " << l.z << "]" << std::endl; } std::cout << "]" << std::endl; SoundProcessor soundProcessor(samplerate, listener); glew_init(); int listener_count = init_listeners(listener, points_buffer, soundProcessor, radius); server = new Server(options.audioPort(), [listener](sf::TcpSocket * socket) { unsigned int size = listener.size(); socket->send(&size, sizeof(int)); std::cout << "client connected: " << socket->getRemoteAddress() << ":" << socket->getRemotePort() << std::endl; }); GuiServer gserver(options.guiPort()); count = listener.size(); glGenBuffers(1, &points.vbo); glGenVertexArrays(1, &points.vao); glBindBuffer(GL_ARRAY_BUFFER, points.vbo); glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW); glBindVertexArray(points.vao); ShaderProgram * shaderProgram = new ShaderProgram("#version 130\n" "uniform vec2 center;\n" "uniform vec2 scale;\n" "in vec3 vp;\n" "in vec3 color;\n" "out vec3 Color;\n" "void main() {\n" " gl_Position = vec4((vp.x + center.x) / scale.x, (vp.y - center.y) / scale.y, 0.0, 1.0);\n" " gl_PointSize = vp.z / scale.x;\n" " Color = color;\n" "}\n" , "#version 130\n" "in vec3 Color;\n" "out vec4 frag_colour;\n" "void main () {\n" " gl_FragColor = vec4(Color, 1.0);\n" "}"); shaderProgram->vertexAttribPointer("color", 3, GL_FLOAT, false, 24, (void *) 12); shaderProgram->vertexAttribPointer("vp", 3, GL_FLOAT, false, 24, 0); lines_buffer.push_back(-1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(0); lines_buffer.push_back(-1); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(1); lines_buffer.push_back(0); lines_buffer.push_back(1); lines_buffer.push_back(0); glGenBuffers(1, &lines.vbo); glGenVertexArrays(1, &lines.vao); glBindBuffer(GL_ARRAY_BUFFER, lines.vbo); glBufferData(GL_ARRAY_BUFFER, lines_buffer.size() * sizeof(float), lines_buffer.data(), GL_STREAM_DRAW); glBindVertexArray(lines.vao); shaderProgram->vertexAttribPointer("color", 3, GL_FLOAT, false, 24, (void *) 12); shaderProgram->vertexAttribPointer("vp", 3, GL_FLOAT, false, 24, 0); glEnable(GL_POINT_SMOOTH); glEnable(GL_LINE_SMOOTH); glEnable(GL_PROGRAM_POINT_SIZE); glPointSize(20.0); glLineWidth(3.0); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glEnable(GL_BLEND); glClearColor(1, 1, 1, 1); screenSizeX = window->getSize().x; screenSizeY = window->getSize().y; float freq = 100; auto now = std::chrono::high_resolution_clock::now(); Stopwatch::getInstance().setCustomSignature(32435); int id = 0; while (window->open()) { TICK("simulation_total"); TICK("simulation_process_events"); auto events = window->pollEvents(); for(auto event : events) { switch (event.type) { case sf::Event::Closed: { window->close(); break; } case sf::Event::KeyPressed: { if(event.key.code == sf::Keyboard::Space && gserver.buffer != nullptr) { std::cout << id++ << ", " << gserver.buffer[0] << ", " << gserver.buffer[1] << ", " << gserver.buffer[2] << std::endl; } break; } case sf::Event::Resized: { glViewport(0, 0, event.size.width, event.size.height); screenSizeX = event.size.width; screenSizeY = event.size.height; break; } case sf::Event::MouseButtonPressed: { if(event.mouseButton.button == sf::Mouse::Left) { mouseDown = true; mouseX = sf::Mouse::getPosition().x; mouseY = sf::Mouse::getPosition().y; } else if(event.mouseButton.button == sf::Mouse::Right) { float x = event.mouseButton.x, y = event.mouseButton.y, dx, dy; bool add = true; x = (2.0 * (x / screenSizeX) - 1.0) * scale[0] - centerX; y = -(2.0 * (y / screenSizeY) - 1.0) * scale[1] + centerY; for(int i = (points_buffer.size() / 6) - 1; i > listener.size() - 1; i--) { dx = (points_buffer[6 * i] - x) * screenSizeX * scale[0]; dy = (points_buffer[6 * i + 1] - y) * screenSizeY * scale[1]; if(sqrt(dx * dx + dy * dy) < radius * scale[0]) { soundProcessor.remove(points_buffer[6 * i], points_buffer[6 * i + 1]); points_buffer.erase(points_buffer.begin() + 6 * i - 1, points_buffer.begin() + 6 * i + 5); obj_buffer.erase(obj_buffer.begin() + i - listener.size()); count--; add = false; break; } } if(add) { points_buffer.push_back(x); points_buffer.push_back(y); points_buffer.push_back(radius); points_buffer.push_back(0); points_buffer.push_back(0); points_buffer.push_back(0); ObjectInfo oinfo; SoundProcessor::SoundObject * obj = new SoundProcessor::SoundObject(x, y, freq); oinfo.id = soundProcessor.add(obj); obj_buffer.push_back(oinfo); std::cout << "adding with freq: " << freq << std::endl; //freq += 0; freq += FREQUENCY_INCREMENT; count++; } glBindBuffer(GL_ARRAY_BUFFER, points.vbo); glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW); } break; } case sf::Event::MouseButtonReleased: { if(event.mouseButton.button == sf::Mouse::Left) { mouseDown = false; centerX = centerX - mouseDXScreen; centerY = centerY - mouseDYScreen; mouseDXScreen = 0.0; mouseDYScreen = 0.0; } break; } case sf::Event::MouseWheelScrolled: { scale[0] *= (event.mouseWheelScroll.delta > 0 ? 0.9 : 1.11111111); scale[1] *= (event.mouseWheelScroll.delta > 0 ? 0.9 : 1.11111111); break; } default: break; } } glClear(GL_COLOR_BUFFER_BIT); if (mouseDown) { mouseDX = mouseX - sf::Mouse::getPosition().x; mouseDY = mouseY - sf::Mouse::getPosition().y; mouseDXScreen = ((((double) mouseDX * 2) / (double) screenSizeX)) * scale[0]; mouseDYScreen = ((((double) mouseDY * 2) / (double) screenSizeY)) * scale[1]; } TOCK("simulation_process_events"); double time = (std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock().now() - now).count()) / 1000000000.0; now = std::chrono::high_resolution_clock().now(); unsigned int samples = (float) samplerate * time; TICK("simulation_generate_samples"); double * current_samples = soundProcessor.sample(samples); TOCK("simulation_generate_samples"); server->send(current_samples, samples * listener.size()); free(current_samples); TICK("simulation_draw"); glBindVertexArray(points.vao); shaderProgram->uniform2f("center", centerX - mouseDXScreen, centerY - mouseDYScreen); shaderProgram->uniform2f("scale", scale[0], scale[1]); glBufferData(GL_ARRAY_BUFFER, 6 * count * sizeof(float), points_buffer.data(), GL_STREAM_DRAW); glDrawArrays(GL_POINTS, 0, count); glBindVertexArray(lines.vao); std::vector<float> data = gserver.getPoints(); lines_buffer.clear(); lines_buffer.insert(lines_buffer.begin(), data.begin(), data.end()); glBindBuffer(GL_ARRAY_BUFFER, lines.vbo); glBufferData(GL_ARRAY_BUFFER, data.size() * sizeof(float), data.data(), GL_STREAM_DRAW); // ToDo(robin): better solution!! //if(count > listener.size()) glDrawArrays(GL_POINTS, 0, data.size() / 6); window->display(); TOCK("simulation_draw"); TOCK("simulation_total"); Stopwatch::getInstance().sendAll(); } terminate(0); return 0; }
bool inline TrackerInterface::process() { if(firstRun) { cudaSafeCall(cudaSetDevice(ConfigArgs::get().gpu)); firstRun = false; } if(!threadPack.pauseCapture.getValue()) { TICK(threadIdentifier); uint64_t start = Stopwatch::getCurrentSystemTime(); bool returnVal = true; bool shouldEnd = endRequested.getValue(); if(!logRead->grabNext(returnVal, currentFrame) || shouldEnd) { threadPack.pauseCapture.assignValue(true); threadPack.finalised.assignValue(true); finalise(); while(!threadPack.cloudSliceProcessorFinished.getValueWait()) { frontend->cloudSignal.notify_all(); } return shouldEnd ? false : returnVal; } depth.data = (unsigned short *)logRead->decompressedDepth; rgb24.data = (PixelRGB *)logRead->decompressedImage; currentFrame++; depth.step = Resolution::get().width() * 2; depth.rows = Resolution::get().rows(); depth.cols = Resolution::get().cols(); rgb24.step = Resolution::get().width() * 3; rgb24.rows = Resolution::get().rows(); rgb24.cols = Resolution::get().cols(); depth_device.upload(depth.data, depth.step, depth.rows, depth.cols); colors_device.upload(rgb24.data, rgb24.step, rgb24.rows, rgb24.cols); TICK("processFrame"); frontend->processFrame(depth_device, colors_device, logRead->decompressedImage, logRead->decompressedDepth, logRead->timestamp, logRead->isCompressed, logRead->compressedDepth, logRead->compressedDepthSize, logRead->compressedImage, logRead->compressedImageSize); TOCK("processFrame"); uint64_t duration = Stopwatch::getCurrentSystemTime() - start; if(threadPack.limit.getValue() && duration < 33333) { int sleepTime = std::max(int(33333 - duration), 0); usleep(sleepTime); } TOCK(threadIdentifier); } return true; }
int test_decode(void *code, int k, int index[], int sz, char *s) { int errors; int reconstruct = 0 ; int item, i ; static int prev_k = 0, prev_sz = 0; static u_char **d_original = NULL, **d_src = NULL ; if (sz < 1 || sz > 8192) { fprintf(stderr, "test_decode: size %d invalid, must be 1..8K\n", sz); return 1 ; } if (k < 1 || k > GF_SIZE + 1) { fprintf(stderr, "test_decode: k %d invalid, must be 1..%d\n", k, GF_SIZE + 1 ); return 2 ; } if (prev_k != k || prev_sz != sz) { if (d_original != NULL) { for (i = 0 ; i < prev_k ; i++ ) { free(d_original[i]); free(d_src[i]); } free(d_original); free(d_src); d_original = NULL ; d_src = NULL ; } } prev_k = k ; prev_sz = sz ; if (d_original == NULL) { d_original = my_malloc(k * sizeof(void *), "d_original ptr"); d_src = my_malloc(k * sizeof(void *), "d_src ptr"); for (i = 0 ; i < k ; i++ ) { d_original[i] = my_malloc(sz, "d_original data"); d_src[i] = my_malloc(sz, "d_src data"); } /* * build sample data */ for (i = 0 ; i < k ; i++ ) { for (item=0; item < sz; item++) d_original[i][item] = ((item ^ i) + 3) & GF_SIZE; } } errors = 0 ; for( i = 0 ; i < k ; i++ ) if (index[i] >= k ) reconstruct ++ ; TICK(ticks[2]); for( i = 0 ; i < k ; i++ ) fec_encode(code, d_original, d_src[i], index[i], sz ); TOCK(ticks[2]); TICK(ticks[1]); if (fec_decode(code, d_src, index, sz)) { fprintf(stderr, "detected singular matrix for %s \n", s); return 1 ; } TOCK(ticks[1]); for (i=0; i<k; i++) if (bcmp(d_original[i], d_src[i], sz )) { errors++; fprintf(stderr, "error reconstructing block %d\n", i); } if (errors) fprintf(stderr, "Errors reconstructing %d blocks out of %d\n", errors, k); fprintf(stderr, " k %3d, l %3d c_enc %10.6f MB/s c_dec %10.6f MB/s \r", k, reconstruct, (double)(k * sz * reconstruct)/(double)ticks[2], (double)(k * sz * reconstruct)/(double)ticks[1]); return errors ; }
void cg_solve(OperatorType& A, const VectorType& b, VectorType& x, Matvec matvec, typename OperatorType::LocalOrdinalType max_iter, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance, typename OperatorType::LocalOrdinalType& num_iters, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr, timer_type* my_cg_times) { typedef typename OperatorType::ScalarType ScalarType; typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type; timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; timer_type total_time = mytimer(); int myproc = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myproc); #endif if (!A.has_local_indices) { std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." << std::endl; return; } char* str; int ngpu = 2; int local_rank = 0; int device = 0; int skip_gpu = 99999; if((str = getenv("CUDA_NGPU")) != NULL) { ngpu = atoi(str); } if((str = getenv("CUDA_SKIP_GPU")) != NULL) { skip_gpu = atoi(str); } if((str = getenv("SLURM_LOCALID")) != NULL) { local_rank = atoi(str); device = local_rank % ngpu; if(device >= skip_gpu) device++; } if((str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) != NULL) { local_rank = atoi(str); device = local_rank % ngpu; if(device >= skip_gpu) device++; } if((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) != NULL) { local_rank = atoi(str); device = local_rank % ngpu; if(device >= skip_gpu) device++; } size_t nrows = A.rows.size(); LocalOrdinalType ncols = A.num_cols; NVAMG_SAFE_CALL(NVAMG_initialize()); NVAMG_SAFE_CALL(NVAMG_initialize_plugins()); NVAMG_matrix_handle matrix; NVAMG_vector_handle rhs; NVAMG_vector_handle soln; NVAMG_resources_handle rsrc = NULL; NVAMG_solver_handle solver = NULL; NVAMG_config_handle config; NVAMG_SAFE_CALL(NVAMG_config_create_from_file(&config,"NVAMG_CONFIG" )); MPI_Comm nvamg_comm; MPI_Comm_dup(MPI_COMM_WORLD, &nvamg_comm); int devices[] = {device}; NVAMG_resources_create(&rsrc, config, &nvamg_comm, 1, devices); NVAMG_SAFE_CALL(NVAMG_solver_create(&solver, rsrc, NVAMG_mode_dDDI, config)); NVAMG_SAFE_CALL(NVAMG_matrix_create(&matrix, rsrc, NVAMG_mode_dDDI)); NVAMG_SAFE_CALL(NVAMG_vector_create(&rhs, rsrc, NVAMG_mode_dDDI)); NVAMG_SAFE_CALL(NVAMG_vector_create(&soln, rsrc, NVAMG_mode_dDDI)); //Generating communication Maps for NVAMG if(A.neighbors.size()>0) { int** send_map = new int*[A.neighbors.size()]; int** recv_map = new int*[A.neighbors.size()]; int send_offset = 0; int recv_offset = A.row_offsets.size()-1;; for(int i = 0; i<A.neighbors.size();i++) { send_map[i] = &A.elements_to_send[send_offset]; send_offset += A.send_length[i]; recv_map[i] = new int[A.recv_length[i]]; for(int j=0; j<A.recv_length[i]; j++) recv_map[i][j] = recv_offset+j; recv_offset += A.recv_length[i]; } const int** send_map_c = (const int**) send_map; const int** recv_map_c = (const int**) recv_map; NVAMG_SAFE_CALL(NVAMG_matrix_comm_from_maps_one_ring( matrix, 1, A.neighbors.size(),A.neighbors.data(), A.send_length.data(), send_map_c, A.recv_length.data(), recv_map_c)); NVAMG_SAFE_CALL(NVAMG_vector_bind(rhs,matrix)); NVAMG_SAFE_CALL(NVAMG_vector_bind(soln,matrix)); for(int i=0; i<A.neighbors.size(); i++) delete [] recv_map[i]; } for(int i=0;i<x.coefs.size();i++) x.coefs[i]=1; VectorType r(b.startIndex, nrows); VectorType p(0, ncols); VectorType Ap(b.startIndex, nrows); normr = 0; magnitude_type rtrans = 0; magnitude_type oldrtrans = 0; LocalOrdinalType print_freq = max_iter/10; if (print_freq>50) print_freq = 50; if (print_freq<1) print_freq = 1; ScalarType one = 1.0; ScalarType zero = 0.0; TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot_r2(r); TOCK(tDOT); normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Initial Residual = "<< normr << std::endl; } { //Matrix upload needs to happen before vector, otherwise it crashes NVAMG_SAFE_CALL(NVAMG_matrix_upload_all(matrix,A.row_offsets.size()-1, A.packed_coefs.size(),1,1, &A.row_offsets[0],&A.packed_cols[0],&A.packed_coefs[0], NULL)); NVAMG_SAFE_CALL(NVAMG_vector_upload(soln, p.coefs.size(), 1, &p.coefs[0])); NVAMG_SAFE_CALL(NVAMG_vector_upload(rhs, b.coefs.size(), 1, &b.coefs[0])); int n = 0; int bsize_x = 0, bsize_y = 0; NVAMG_SAFE_CALL(NVAMG_solver_setup(solver, matrix)); NVAMG_SAFE_CALL(NVAMG_solver_solve(solver, rhs, soln)); NVAMG_SAFE_CALL(NVAMG_vector_download(soln, &x.coefs[0])); int niter; NVAMG_SAFE_CALL(NVAMG_solver_get_iterations_number(solver, &niter)); TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot_r2(r); TOCK(tDOT); normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Final Residual = "<< normr << " after " << niter << " iterations" << std::endl; } } my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[MATVECDOT] = tMATVECDOT; my_cg_times[TOTAL] = mytimer() - total_time; }
void cg_solve(OperatorType& A, const VectorType& b, VectorType& x, Matvec matvec, typename OperatorType::LocalOrdinalType max_iter, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance, typename OperatorType::LocalOrdinalType& num_iters, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr, timer_type* my_cg_times) { typedef typename OperatorType::ScalarType ScalarType; typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type; timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; timer_type total_time = mytimer(); int myproc = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myproc); #endif if (!A.has_local_indices) { std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." << std::endl; return; } size_t nrows = A.rows.size(); LocalOrdinalType ncols = A.num_cols; VectorType r(b.startIndex, nrows, 256); VectorType p(0, ncols, 512); VectorType Ap(b.startIndex, nrows, 64); normr = 0; magnitude_type rtrans = 0; magnitude_type oldrtrans = 0; LocalOrdinalType print_freq = max_iter/10; if (print_freq>50) print_freq = 50; if (print_freq<1) print_freq = 1; ScalarType one = 1.0; ScalarType zero = 0.0; TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); // print_vec(p.coefs, "p"); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot_r2(r); TOCK(tDOT); //std::cout << "rtrans="<<rtrans<<std::endl; normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Initial Residual = "<< normr << std::endl; } magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon(); #ifdef MINIFE_DEBUG std::ostream& os = outstream(); os << "brkdown_tol = " << brkdown_tol << std::endl; #endif #ifdef MINIFE_DEBUG_OPENMP std::cout << "Starting CG Solve Phase..." << std::endl; #endif for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) { if (k == 1) { //TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY); TICK(); daxpby(one, r, zero, p); TOCK(tWAXPY); } else { oldrtrans = rtrans; TICK(); rtrans = dot_r2(r); TOCK(tDOT); const magnitude_type beta = rtrans/oldrtrans; TICK(); daxpby(one, r, beta, p); TOCK(tWAXPY); } normr = sqrt(rtrans); if (myproc == 0 && (k%print_freq==0 || k==max_iter)) { std::cout << "Iteration = "<<k<<" Residual = "<<normr<<std::endl; } magnitude_type alpha = 0; magnitude_type p_ap_dot = 0; TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT); #ifdef MINIFE_DEBUG os << "iter " << k << ", p_ap_dot = " << p_ap_dot; os.flush(); #endif if (p_ap_dot < brkdown_tol) { if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) { std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl; #ifdef MINIFE_DEBUG os << "ERROR, numerical breakdown!"<<std::endl; #endif //update the timers before jumping out. my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[TOTAL] = mytimer() - total_time; return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = rtrans/p_ap_dot; #ifdef MINIFE_DEBUG os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl; #endif TICK(); daxpby(alpha, p, one, x); daxpby(-alpha, Ap, one, r); TOCK(tWAXPY); num_iters = k; } my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[MATVECDOT] = tMATVECDOT; my_cg_times[TOTAL] = mytimer() - total_time; }
void MainController::run() { while(!pangolin::ShouldQuit() && !((!logReader->hasMore()) && quiet) && !(eFusion->getTick() == end && quiet)) { if(!gui->pause->Get() || pangolin::Pushed(*gui->step)) { if((logReader->hasMore() || rewind) && eFusion->getTick() < end) { TICK("LogRead"); if(rewind) { if(!logReader->hasMore()) { logReader->getBack(); } else { logReader->getNext(); } if(logReader->rewound()) { logReader->currentFrame = 0; } } else { logReader->getNext(); } TOCK("LogRead"); if(eFusion->getTick() < start) { eFusion->setTick(start); logReader->fastForward(start); } float weightMultiplier = framesToSkip + 1; if(framesToSkip > 0) { eFusion->setTick(eFusion->getTick() + framesToSkip); logReader->fastForward(logReader->currentFrame + framesToSkip); framesToSkip = 0; } Eigen::Matrix4f * currentPose = 0; if(groundTruthOdometry) { currentPose = new Eigen::Matrix4f; currentPose->setIdentity(); *currentPose = groundTruthOdometry->getIncrementalTransformation(logReader->timestamp); } eFusion->processFrame(logReader->rgb, logReader->depth, logReader->timestamp, currentPose, weightMultiplier); if(currentPose) { delete currentPose; } if(frameskip && Stopwatch::getInstance().getTimings().at("Run") > 1000.f / 30.f) { framesToSkip = int(Stopwatch::getInstance().getTimings().at("Run") / (1000.f / 30.f)); } } } else { eFusion->predict(); } TICK("GUI"); if(gui->followPose->Get()) { pangolin::OpenGlMatrix mv; Eigen::Matrix4f currPose = eFusion->getCurrPose(); Eigen::Matrix3f currRot = currPose.topLeftCorner(3, 3); Eigen::Quaternionf currQuat(currRot); Eigen::Vector3f forwardVector(0, 0, 1); Eigen::Vector3f upVector(0, iclnuim ? 1 : -1, 0); Eigen::Vector3f forward = (currQuat * forwardVector).normalized(); Eigen::Vector3f up = (currQuat * upVector).normalized(); Eigen::Vector3f eye(currPose(0, 3), currPose(1, 3), currPose(2, 3)); eye -= forward; Eigen::Vector3f at = eye + forward; Eigen::Vector3f z = (eye - at).normalized(); // Forward Eigen::Vector3f x = up.cross(z).normalized(); // Right Eigen::Vector3f y = z.cross(x); Eigen::Matrix4d m; m << x(0), x(1), x(2), -(x.dot(eye)), y(0), y(1), y(2), -(y.dot(eye)), z(0), z(1), z(2), -(z.dot(eye)), 0, 0, 0, 1; memcpy(&mv.m[0], m.data(), sizeof(Eigen::Matrix4d)); gui->s_cam.SetModelViewMatrix(mv); } gui->preCall(); std::stringstream stri; stri << eFusion->getModelToModel().lastICPCount; gui->trackInliers->Ref().Set(stri.str()); std::stringstream stre; stre << (isnan(eFusion->getModelToModel().lastICPError) ? 0 : eFusion->getModelToModel().lastICPError); gui->trackRes->Ref().Set(stre.str()); if(!gui->pause->Get()) { gui->resLog.Log((isnan(eFusion->getModelToModel().lastICPError) ? std::numeric_limits<float>::max() : eFusion->getModelToModel().lastICPError), icpErrThresh); gui->inLog.Log(eFusion->getModelToModel().lastICPCount, icpCountThresh); } Eigen::Matrix4f pose = eFusion->getCurrPose(); if(gui->drawRawCloud->Get() || gui->drawFilteredCloud->Get()) { eFusion->computeFeedbackBuffers(); } if(gui->drawRawCloud->Get()) { eFusion->getFeedbackBuffers().at(FeedbackBuffer::RAW)->render(gui->s_cam.GetProjectionModelViewMatrix(), pose, gui->drawNormals->Get(), gui->drawColors->Get()); } if(gui->drawFilteredCloud->Get()) { eFusion->getFeedbackBuffers().at(FeedbackBuffer::FILTERED)->render(gui->s_cam.GetProjectionModelViewMatrix(), pose, gui->drawNormals->Get(), gui->drawColors->Get()); } if(gui->drawGlobalModel->Get()) { glFinish(); TICK("Global"); if(gui->drawFxaa->Get()) { gui->drawFXAA(gui->s_cam.GetProjectionModelViewMatrix(), gui->s_cam.GetModelViewMatrix(), eFusion->getGlobalModel().model(), eFusion->getConfidenceThreshold(), eFusion->getTick(), eFusion->getTimeDelta(), iclnuim); } else { eFusion->getGlobalModel().renderPointCloud(gui->s_cam.GetProjectionModelViewMatrix(), eFusion->getConfidenceThreshold(), gui->drawUnstable->Get(), gui->drawNormals->Get(), gui->drawColors->Get(), gui->drawPoints->Get(), gui->drawWindow->Get(), gui->drawTimes->Get(), eFusion->getTick(), eFusion->getTimeDelta()); } glFinish(); TOCK("Global"); } if(eFusion->getLost()) { glColor3f(1, 1, 0); } else { glColor3f(1, 0, 1); } gui->drawFrustum(pose); glColor3f(1, 1, 1); if(gui->drawFerns->Get()) { glColor3f(0, 0, 0); for(size_t i = 0; i < eFusion->getFerns().frames.size(); i++) { if((int)i == eFusion->getFerns().lastClosest) continue; gui->drawFrustum(eFusion->getFerns().frames.at(i)->pose); } glColor3f(1, 1, 1); } if(gui->drawDefGraph->Get()) { const std::vector<GraphNode*> & graph = eFusion->getLocalDeformation().getGraph(); for(size_t i = 0; i < graph.size(); i++) { pangolin::glDrawCross(graph.at(i)->position(0), graph.at(i)->position(1), graph.at(i)->position(2), 0.1); for(size_t j = 0; j < graph.at(i)->neighbours.size(); j++) { pangolin::glDrawLine(graph.at(i)->position(0), graph.at(i)->position(1), graph.at(i)->position(2), graph.at(graph.at(i)->neighbours.at(j))->position(0), graph.at(graph.at(i)->neighbours.at(j))->position(1), graph.at(graph.at(i)->neighbours.at(j))->position(2)); } } } if(eFusion->getFerns().lastClosest != -1) { glColor3f(1, 0, 0); gui->drawFrustum(eFusion->getFerns().frames.at(eFusion->getFerns().lastClosest)->pose); glColor3f(1, 1, 1); } const std::vector<PoseMatch> & poseMatches = eFusion->getPoseMatches(); int maxDiff = 0; for(size_t i = 0; i < poseMatches.size(); i++) { if(poseMatches.at(i).secondId - poseMatches.at(i).firstId > maxDiff) { maxDiff = poseMatches.at(i).secondId - poseMatches.at(i).firstId; } } for(size_t i = 0; i < poseMatches.size(); i++) { if(gui->drawDeforms->Get()) { if(poseMatches.at(i).fern) { glColor3f(1, 0, 0); } else { glColor3f(0, 1, 0); } for(size_t j = 0; j < poseMatches.at(i).constraints.size(); j++) { pangolin::glDrawLine(poseMatches.at(i).constraints.at(j).sourcePoint(0), poseMatches.at(i).constraints.at(j).sourcePoint(1), poseMatches.at(i).constraints.at(j).sourcePoint(2), poseMatches.at(i).constraints.at(j).targetPoint(0), poseMatches.at(i).constraints.at(j).targetPoint(1), poseMatches.at(i).constraints.at(j).targetPoint(2)); } } } glColor3f(1, 1, 1); eFusion->normaliseDepth(0.3f, gui->depthCutoff->Get()); for(std::map<std::string, GPUTexture*>::const_iterator it = eFusion->getTextures().begin(); it != eFusion->getTextures().end(); ++it) { if(it->second->draw) { gui->displayImg(it->first, it->second); } } eFusion->getIndexMap().renderDepth(gui->depthCutoff->Get()); gui->displayImg("ModelImg", eFusion->getIndexMap().imageTex()); gui->displayImg("Model", eFusion->getIndexMap().drawTex()); std::stringstream strs; strs << eFusion->getGlobalModel().lastCount(); gui->totalPoints->operator=(strs.str()); std::stringstream strs2; strs2 << eFusion->getLocalDeformation().getGraph().size(); gui->totalNodes->operator=(strs2.str()); std::stringstream strs3; strs3 << eFusion->getFerns().frames.size(); gui->totalFerns->operator=(strs3.str()); std::stringstream strs4; strs4 << eFusion->getDeforms(); gui->totalDefs->operator=(strs4.str()); std::stringstream strs5; strs5 << eFusion->getTick() << "/" << logReader->getNumFrames(); gui->logProgress->operator=(strs5.str()); std::stringstream strs6; strs6 << eFusion->getFernDeforms(); gui->totalFernDefs->operator=(strs6.str()); gui->postCall(); logReader->flipColors = gui->flipColors->Get(); eFusion->setRgbOnly(gui->rgbOnly->Get()); eFusion->setPyramid(gui->pyramid->Get()); eFusion->setFastOdom(gui->fastOdom->Get()); eFusion->setConfidenceThreshold(gui->confidenceThreshold->Get()); eFusion->setDepthCutoff(gui->depthCutoff->Get()); eFusion->setIcpWeight(gui->icpWeight->Get()); eFusion->setSo3(gui->so3->Get()); eFusion->setFrameToFrameRGB(gui->frameToFrameRGB->Get()); resetButton = pangolin::Pushed(*gui->reset); if(gui->autoSettings) { static bool last = gui->autoSettings->Get(); if(gui->autoSettings->Get() != last) { last = gui->autoSettings->Get(); static_cast<LiveLogReader *>(logReader)->setAuto(last); } } Stopwatch::getInstance().sendAll(); if(resetButton) { break; } if(pangolin::Pushed(*gui->save)) { eFusion->savePly(); } TOCK("GUI"); } }
void RGBDOdometry::getIncrementalTransformation(Eigen::Vector3f & trans, Eigen::Matrix<float, 3, 3, Eigen::RowMajor> & rot, const bool & rgbOnly, const float & icpWeight, const bool & pyramid, const bool & fastOdom, const bool & so3) { bool icp = !rgbOnly && icpWeight > 0; bool rgb = rgbOnly || icpWeight < 100; Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rprev = rot; Eigen::Vector3f tprev = trans; Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rcurr = Rprev; Eigen::Vector3f tcurr = tprev; if(rgb) { for(int i = 0; i < NUM_PYRS; i++) { computeDerivativeImages(nextImage[i], nextdIdx[i], nextdIdy[i]); } } Eigen::Matrix<double, 3, 3, Eigen::RowMajor> resultR = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Identity(); if(so3) { int pyramidLevel = 2; Eigen::Matrix<float, 3, 3, Eigen::RowMajor> R_lr = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>::Identity(); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Zero(); K(0, 0) = intr(pyramidLevel).fx; K(1, 1) = intr(pyramidLevel).fy; K(0, 2) = intr(pyramidLevel).cx; K(1, 2) = intr(pyramidLevel).cy; K(2, 2) = 1; float lastError = std::numeric_limits<float>::max() / 2; float lastCount = std::numeric_limits<float>::max() / 2; Eigen::Matrix<double, 3, 3, Eigen::RowMajor> lastResultR = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Identity(); for(int i = 0; i < 10; i++) { Eigen::Matrix<float, 3, 3, Eigen::RowMajor> jtj; Eigen::Matrix<float, 3, 1> jtr; Eigen::Matrix<double, 3, 3, Eigen::RowMajor> homography = K * resultR * K.inverse(); mat33 imageBasis; memcpy(&imageBasis.data[0], homography.cast<float>().eval().data(), sizeof(mat33)); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K_inv = K.inverse(); mat33 kinv; memcpy(&kinv.data[0], K_inv.cast<float>().eval().data(), sizeof(mat33)); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K_R_lr = K * resultR; mat33 krlr; memcpy(&krlr.data[0], K_R_lr.cast<float>().eval().data(), sizeof(mat33)); float residual[2]; TICK("so3Step"); so3Step(lastNextImage[pyramidLevel], nextImage[pyramidLevel], imageBasis, kinv, krlr, sumDataSO3, outDataSO3, jtj.data(), jtr.data(), &residual[0], GPUConfig::getInstance().so3StepThreads, GPUConfig::getInstance().so3StepBlocks); TOCK("so3Step"); lastSO3Error = sqrt(residual[0]) / residual[1]; lastSO3Count = residual[1]; //Converged if(lastSO3Error < lastError && lastCount == lastSO3Count) { break; } else if(lastSO3Error > lastError + 0.001) //Diverging { lastSO3Error = lastError; lastSO3Count = lastCount; resultR = lastResultR; break; } lastError = lastSO3Error; lastCount = lastSO3Count; lastResultR = resultR; Eigen::Vector3f delta = jtj.ldlt().solve(jtr); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> rotUpdate = OdometryProvider::rodrigues(delta.cast<double>()); R_lr = rotUpdate.cast<float>() * R_lr; for(int x = 0; x < 3; x++) { for(int y = 0; y < 3; y++) { resultR(x, y) = R_lr(x, y); } } } } iterations[0] = fastOdom ? 3 : 10; iterations[1] = pyramid ? 5 : 0; iterations[2] = pyramid ? 4 : 0; Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rprev_inv = Rprev.inverse(); mat33 device_Rprev_inv = Rprev_inv; float3 device_tprev = *reinterpret_cast<float3*>(tprev.data()); Eigen::Matrix<double, 4, 4, Eigen::RowMajor> resultRt = Eigen::Matrix<double, 4, 4, Eigen::RowMajor>::Identity(); if(so3) { for(int x = 0; x < 3; x++) { for(int y = 0; y < 3; y++) { resultRt(x, y) = resultR(x, y); } } } for(int i = NUM_PYRS - 1; i >= 0; i--) { if(rgb) { projectToPointCloud(lastDepth[i], pointClouds[i], intr, i); } Eigen::Matrix<double, 3, 3, Eigen::RowMajor> K = Eigen::Matrix<double, 3, 3, Eigen::RowMajor>::Zero(); K(0, 0) = intr(i).fx; K(1, 1) = intr(i).fy; K(0, 2) = intr(i).cx; K(1, 2) = intr(i).cy; K(2, 2) = 1; lastRGBError = std::numeric_limits<float>::max(); for(int j = 0; j < iterations[i]; j++) { Eigen::Matrix<double, 4, 4, Eigen::RowMajor> Rt = resultRt.inverse(); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> R = Rt.topLeftCorner(3, 3); Eigen::Matrix<double, 3, 3, Eigen::RowMajor> KRK_inv = K * R * K.inverse(); mat33 krkInv; memcpy(&krkInv.data[0], KRK_inv.cast<float>().eval().data(), sizeof(mat33)); Eigen::Vector3d Kt = Rt.topRightCorner(3, 1); Kt = K * Kt; float3 kt = {(float)Kt(0), (float)Kt(1), (float)Kt(2)}; int sigma = 0; int rgbSize = 0; if(rgb) { TICK("computeRgbResidual"); computeRgbResidual(pow(minimumGradientMagnitudes[i], 2.0) / pow(sobelScale, 2.0), nextdIdx[i], nextdIdy[i], lastDepth[i], nextDepth[i], lastImage[i], nextImage[i], corresImg[i], sumResidualRGB, maxDepthDeltaRGB, kt, krkInv, sigma, rgbSize, GPUConfig::getInstance().rgbResThreads, GPUConfig::getInstance().rgbResBlocks); TOCK("computeRgbResidual"); } float sigmaVal = std::sqrt((float)sigma / rgbSize == 0 ? 1 : rgbSize); float rgbError = std::sqrt(sigma) / (rgbSize == 0 ? 1 : rgbSize); if(rgbOnly && rgbError > lastRGBError) { break; } lastRGBError = rgbError; lastRGBCount = rgbSize; if(rgbOnly) { sigmaVal = -1; //Signals the internal optimisation to weight evenly } Eigen::Matrix<float, 6, 6, Eigen::RowMajor> A_icp; Eigen::Matrix<float, 6, 1> b_icp; mat33 device_Rcurr = Rcurr; float3 device_tcurr = *reinterpret_cast<float3*>(tcurr.data()); DeviceArray2D<float>& vmap_curr = vmaps_curr_[i]; DeviceArray2D<float>& nmap_curr = nmaps_curr_[i]; DeviceArray2D<float>& vmap_g_prev = vmaps_g_prev_[i]; DeviceArray2D<float>& nmap_g_prev = nmaps_g_prev_[i]; float residual[2]; if(icp) { TICK("icpStep"); icpStep(device_Rcurr, device_tcurr, vmap_curr, nmap_curr, device_Rprev_inv, device_tprev, intr(i), vmap_g_prev, nmap_g_prev, distThres_, angleThres_, sumDataSE3, outDataSE3, A_icp.data(), b_icp.data(), &residual[0], GPUConfig::getInstance().icpStepThreads, GPUConfig::getInstance().icpStepBlocks); TOCK("icpStep"); } lastICPError = sqrt(residual[0]) / residual[1]; lastICPCount = residual[1]; Eigen::Matrix<float, 6, 6, Eigen::RowMajor> A_rgbd; Eigen::Matrix<float, 6, 1> b_rgbd; if(rgb) { TICK("rgbStep"); rgbStep(corresImg[i], sigmaVal, pointClouds[i], intr(i).fx, intr(i).fy, nextdIdx[i], nextdIdy[i], sobelScale, sumDataSE3, outDataSE3, A_rgbd.data(), b_rgbd.data(), GPUConfig::getInstance().rgbStepThreads, GPUConfig::getInstance().rgbStepBlocks); TOCK("rgbStep"); } Eigen::Matrix<double, 6, 1> result; Eigen::Matrix<double, 6, 6, Eigen::RowMajor> dA_rgbd = A_rgbd.cast<double>(); Eigen::Matrix<double, 6, 6, Eigen::RowMajor> dA_icp = A_icp.cast<double>(); Eigen::Matrix<double, 6, 1> db_rgbd = b_rgbd.cast<double>(); Eigen::Matrix<double, 6, 1> db_icp = b_icp.cast<double>(); if(icp && rgb) { double w = icpWeight; lastA = dA_rgbd + w * w * dA_icp; lastb = db_rgbd + w * db_icp; result = lastA.ldlt().solve(lastb); } else if(icp) { lastA = dA_icp; lastb = db_icp; result = lastA.ldlt().solve(lastb); } else if(rgb) { lastA = dA_rgbd; lastb = db_rgbd; result = lastA.ldlt().solve(lastb); } else { assert(false && "Control shouldn't reach here"); } Eigen::Isometry3f rgbOdom; OdometryProvider::computeUpdateSE3(resultRt, result, rgbOdom); Eigen::Isometry3f currentT; currentT.setIdentity(); currentT.rotate(Rprev); currentT.translation() = tprev; currentT = currentT * rgbOdom.inverse(); tcurr = currentT.translation(); Rcurr = currentT.rotation(); } } if(rgb && (tcurr - tprev).norm() > 0.3) { Rcurr = Rprev; tcurr = tprev; } if(so3) { for(int i = 0; i < NUM_PYRS; i++) { std::swap(lastNextImage[i], nextImage[i]); } } trans = tcurr; rot = Rcurr; }
void GlobalModel::clean(const Eigen::Matrix4f & pose, const int & time, GPUTexture * indexMap, GPUTexture * vertConfMap, GPUTexture * colorTimeMap, GPUTexture * normRadMap, GPUTexture * depthMap, const float confThreshold, std::vector<float> & graph, const int timeDelta, const float maxDepth, const bool isFern) { assert(graph.size() / 16 < MAX_NODES); if(graph.size() > 0) { //Can be optimised by only uploading new nodes with offset glBindTexture(GL_TEXTURE_2D, deformationNodes.texture->tid); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, graph.size(), 1, GL_LUMINANCE, GL_FLOAT, graph.data()); } TICK("Fuse::Copy"); //Next we copy the new unstable vertices from the newUnstableFid transform feedback into the global map unstableProgram->Bind(); unstableProgram->setUniform(Uniform("time", time)); unstableProgram->setUniform(Uniform("confThreshold", confThreshold)); unstableProgram->setUniform(Uniform("scale", (float)IndexMap::FACTOR)); unstableProgram->setUniform(Uniform("indexSampler", 0)); unstableProgram->setUniform(Uniform("vertConfSampler", 1)); unstableProgram->setUniform(Uniform("colorTimeSampler", 2)); unstableProgram->setUniform(Uniform("normRadSampler", 3)); unstableProgram->setUniform(Uniform("nodeSampler", 4)); unstableProgram->setUniform(Uniform("depthSampler", 5)); unstableProgram->setUniform(Uniform("nodes", (float)(graph.size() / 16))); unstableProgram->setUniform(Uniform("nodeCols", (float)NODE_TEXTURE_DIMENSION)); unstableProgram->setUniform(Uniform("timeDelta", timeDelta)); unstableProgram->setUniform(Uniform("maxDepth", maxDepth)); unstableProgram->setUniform(Uniform("isFern", (int)isFern)); Eigen::Matrix4f t_inv = pose.inverse(); unstableProgram->setUniform(Uniform("t_inv", t_inv)); unstableProgram->setUniform(Uniform("cam", Eigen::Vector4f(Intrinsics::getInstance().cx(), Intrinsics::getInstance().cy(), Intrinsics::getInstance().fx(), Intrinsics::getInstance().fy()))); unstableProgram->setUniform(Uniform("cols", (float)Resolution::getInstance().cols())); unstableProgram->setUniform(Uniform("rows", (float)Resolution::getInstance().rows())); glBindBuffer(GL_ARRAY_BUFFER, vbos[target].first); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0); glEnableVertexAttribArray(1); glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f))); glEnableVertexAttribArray(2); glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2)); glEnable(GL_RASTERIZER_DISCARD); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, vbos[renderSource].second); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbos[renderSource].first); glBeginTransformFeedback(GL_POINTS); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, indexMap->texture->tid); glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, vertConfMap->texture->tid); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, colorTimeMap->texture->tid); glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, normRadMap->texture->tid); glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, deformationNodes.texture->tid); glActiveTexture(GL_TEXTURE5); glBindTexture(GL_TEXTURE_2D, depthMap->texture->tid); glBeginQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, countQuery); glDrawTransformFeedback(GL_POINTS, vbos[target].second); glBindBuffer(GL_ARRAY_BUFFER, newUnstableVbo); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, 0); glEnableVertexAttribArray(1); glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f))); glEnableVertexAttribArray(2); glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, Vertex::SIZE, reinterpret_cast<GLvoid*>(sizeof(Eigen::Vector4f) * 2)); glDrawTransformFeedback(GL_POINTS, newUnstableFid); glEndQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN); glGetQueryObjectuiv(countQuery, GL_QUERY_RESULT, &count); glEndTransformFeedback(); glDisable(GL_RASTERIZER_DISCARD); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glDisableVertexAttribArray(0); glDisableVertexAttribArray(1); glDisableVertexAttribArray(2); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); unstableProgram->Unbind(); std::swap(target, renderSource); glFinish(); TOCK("Fuse::Copy"); }
void cg_solve(OperatorType& A, const VectorType& b, VectorType& x, Matvec matvec, typename OperatorType::LocalOrdinalType max_iter, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& tolerance, typename OperatorType::LocalOrdinalType& num_iters, typename TypeTraits<typename OperatorType::ScalarType>::magnitude_type& normr, timer_type* my_cg_times) { typedef typename OperatorType::ScalarType ScalarType; typedef typename OperatorType::GlobalOrdinalType GlobalOrdinalType; typedef typename OperatorType::LocalOrdinalType LocalOrdinalType; typedef typename TypeTraits<ScalarType>::magnitude_type magnitude_type; timer_type t0 = 0, tWAXPY = 0, tDOT = 0, tMATVEC = 0, tMATVECDOT = 0; timer_type total_time = mytimer(); int myproc = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &myproc); #endif if (!A.has_local_indices) { std::cerr << "miniFE::cg_solve ERROR, A.has_local_indices is false, needs to be true. This probably means " << "miniFE::make_local_matrix(A) was not called prior to calling miniFE::cg_solve." << std::endl; return; } size_t nrows = A.rows.size(); LocalOrdinalType ncols = A.num_cols; nvtxRangeId_t r1=nvtxRangeStartA("Allocation of Temporary Vectors"); VectorType r(b.startIndex, nrows); VectorType p(0, ncols); VectorType Ap(b.startIndex, nrows); nvtxRangeEnd(r1); #ifdef HAVE_MPI #ifndef GPUDIRECT //TODO move outside? cudaHostRegister(&p.coefs[0],ncols*sizeof(typename VectorType::ScalarType),0); cudaCheckError(); if(A.send_buffer.size()>0) cudaHostRegister(&A.send_buffer[0],A.send_buffer.size()*sizeof(typename VectorType::ScalarType),0); cudaCheckError(); #endif #endif normr = 0; magnitude_type rtrans = 0; magnitude_type oldrtrans = 0; LocalOrdinalType print_freq = max_iter/10; if (print_freq>50) print_freq = 50; if (print_freq<1) print_freq = 1; ScalarType one = 1.0; ScalarType zero = 0.0; TICK(); waxpby(one, x, zero, x, p); TOCK(tWAXPY); TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); waxpby(one, b, -one, Ap, r); TOCK(tWAXPY); TICK(); rtrans = dot(r, r); TOCK(tDOT); normr = std::sqrt(rtrans); if (myproc == 0) { std::cout << "Initial Residual = "<< normr << std::endl; } magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon(); #ifdef MINIFE_DEBUG std::ostream& os = outstream(); os << "brkdown_tol = " << brkdown_tol << std::endl; #endif for(LocalOrdinalType k=1; k <= max_iter && normr > tolerance; ++k) { if (k == 1) { TICK(); waxpby(one, r, zero, r, p); TOCK(tWAXPY); } else { oldrtrans = rtrans; TICK(); rtrans = dot(r, r); TOCK(tDOT); magnitude_type beta = rtrans/oldrtrans; TICK(); waxpby(one, r, beta, p, p); TOCK(tWAXPY); } normr = std::sqrt(rtrans); if (myproc == 0 && (k%print_freq==0 || k==max_iter)) { std::cout << "Iteration = "<<k<<" Residual = "<<normr<<std::endl; } magnitude_type alpha = 0; magnitude_type p_ap_dot = 0; TICK(); matvec(A, p, Ap); TOCK(tMATVEC); TICK(); p_ap_dot = dot(Ap, p); TOCK(tDOT); #ifdef MINIFE_DEBUG os << "iter " << k << ", p_ap_dot = " << p_ap_dot; os.flush(); #endif //TODO remove false below if (false && p_ap_dot < brkdown_tol) { if (p_ap_dot < 0 || breakdown(p_ap_dot, Ap, p)) { std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!"<<std::endl; #ifdef MINIFE_DEBUG os << "ERROR, numerical breakdown!"<<std::endl; #endif //update the timers before jumping out. my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[TOTAL] = mytimer() - total_time; return; } else brkdown_tol = 0.1 * p_ap_dot; } alpha = rtrans/p_ap_dot; #ifdef MINIFE_DEBUG os << ", rtrans = " << rtrans << ", alpha = " << alpha << std::endl; #endif TICK(); waxpby(one, x, alpha, p, x); waxpby(one, r, -alpha, Ap, r); TOCK(tWAXPY); num_iters = k; } #ifdef HAVE_MPI #ifndef GPUDIRECT //TODO move outside? cudaHostUnregister(&p.coefs[0]); cudaCheckError(); if(A.send_buffer.size()>0) cudaHostUnregister(&A.send_buffer[0]); cudaCheckError(); #endif #endif my_cg_times[WAXPY] = tWAXPY; my_cg_times[DOT] = tDOT; my_cg_times[MATVEC] = tMATVEC; my_cg_times[MATVECDOT] = tMATVECDOT; my_cg_times[TOTAL] = mytimer() - total_time; }