KOKKOS_INLINE_FUNCTION void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>:: operator() (const StokesFOImplicitThicknessUpdateResid_Tag& tag, const int& cell) const { double rho_g=rho*g; for (int node=0; node < numNodes; ++node){ res(node,0)=0.0; res(node,1)=0.0; } for (int qp=0; qp < numQPs; ++qp) { ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0); ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1); for (int node=0; node < numNodes; ++node) { dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0); dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1); } for (int node=0; node < numNodes; ++node) { res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp); res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp); } } for (int node=0; node < numNodes; ++node) { Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0); Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1); if(numVecDims==3) Residual(cell,node,2) = InputResidual(cell,node,2); } }
/// <summary> /// Computes the thresholded gradient (Snatos97). /// </summary> /// <returns>The focus measure value</returns> double BasicFM::computeGRAT() { if (checkInput()) { cv::Mat dH = mSrcImg(cv::Range::all(), cv::Range(1, mSrcImg.cols)) - mSrcImg(cv::Range::all(), cv::Range(0, mSrcImg.cols - 1)); cv::Mat dV = mSrcImg(cv::Range(1, mSrcImg.rows), cv::Range::all()) - mSrcImg(cv::Range(0, mSrcImg.rows - 1), cv::Range::all()); //dH = cv::abs(dH); //dV = cv::abs(dV); //cv::Mat FM = cv::max(dH, dV); cv::Mat FM = cv::max(dH(cv::Range(0, dH.rows - 1), cv::Range::all()), dV(cv::Range::all(), cv::Range(0, dV.cols - 1))); double thr = 0; cv::Mat mask = FM >= thr; mask.convertTo(mask, CV_32FC1, 255.0); FM = FM.mul(mask); cv::Scalar fm = cv::sum(FM) / cv::sum(mask); //normalize mVal = fm[0] / 255.0; } return mVal; }
void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { #ifndef ALBANY_KOKKOS_UNDER_DEVELOPMENT typedef Intrepid2::FunctionSpaceTools FST; // Initialize residual to 0.0 Intrepid2::FieldContainer_Kokkos<ScalarT, PHX::Layout, PHX::Device> res(numNodes,2); double rho_g=rho*g; for (std::size_t cell=0; cell < workset.numCells; ++cell) { res.initialize(); for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0); ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1); for (std::size_t node=0; node < numNodes; ++node) { dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0); dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1); } for (std::size_t node=0; node < numNodes; ++node) { res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp); res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp); } } for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0); Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1); if(numVecDims==3) Residual(cell,node,2) = InputResidual(cell,node,2); } } #else Kokkos::parallel_for(StokesFOImplicitThicknessUpdateResid_Policy(0,workset.numCells),*this); #endif }
void UpdateZCoordinateMovingTop<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { Teuchos::RCP<const Tpetra_Vector> xT = workset.xT; Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView(); const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering(); const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution"); int numLayers = layeredMeshNumbering.numLayers; const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID = workset.disc->getWsElNodeID()[workset.wsIndex]; const Teuchos::ArrayRCP<double>& layers_ratio = layeredMeshNumbering.layers_ratio; Teuchos::ArrayRCP<double> sigmaLevel(numLayers+1); sigmaLevel[0] = 0.; sigmaLevel[numLayers] = 1.; for(int i=1; i<numLayers; ++i) sigmaLevel[i] = sigmaLevel[i-1] + layers_ratio[i-1]; for (std::size_t cell=0; cell < workset.numCells; ++cell ) { const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[cell]; const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID = workset.wsElNodeEqID[cell]; const int neq = nodeID[0].size(); const std::size_t num_dof = neq * this->numNodes; for (std::size_t node = 0; node < this->numNodes; ++node) { LO lnodeId = workset.disc->getOverlapNodeMapT()->getLocalElement(elNodeID[node]); LO base_id, ilevel; layeredMeshNumbering.getIndices(lnodeId, base_id, ilevel); MeshScalarT h = H0(cell,node)+dH(cell,node); MeshScalarT bed = topSurface(cell,node)- H0(cell,node); for(std::size_t icomp=0; icomp< numDims; icomp++) { typename PHAL::Ref<MeshScalarT>::type val = coordVecOut(cell,node,icomp); val = (icomp==2) ? (h>minH) ? MeshScalarT(bed + sigmaLevel[ ilevel]*h) : MeshScalarT(bed + sigmaLevel[ ilevel]*minH) : coordVecIn(cell,node,icomp); } } } }
/// <summary> /// Computes Brenner's focus measure and determines the ratio of the median/mean. /// </summary> /// <returns>The focus measure value</returns> double BasicFM::computeROGR() { if (checkInput()) { cv::Mat dH = mSrcImg(cv::Range::all(), cv::Range(1, mSrcImg.cols)) - mSrcImg(cv::Range::all(), cv::Range(0, mSrcImg.cols - 1)); cv::Mat dV = mSrcImg(cv::Range(1, mSrcImg.rows), cv::Range::all()) - mSrcImg(cv::Range(0, mSrcImg.rows - 1), cv::Range::all()); dH = cv::abs(dH); dV = cv::abs(dV); cv::Mat FM = cv::max(dH(cv::Range(0, dH.rows - 1), cv::Range::all()), dV(cv::Range::all(), cv::Range(0, dV.cols - 1))); FM = FM.mul(FM); cv::Scalar m = cv::mean(FM); cv::Mat tmp; FM.convertTo(tmp, CV_32F); double r = 255.0*255.0; //mVal = r > 0 ? m[0] / r : m[0]; mVal = m[0] / r; } return mVal; }
extern "C" magma_int_t magma_cgmres( magma_c_sparse_matrix A, magma_c_vector b, magma_c_vector *x, magma_c_solver_par *solver_par, magma_queue_t queue ) { magma_int_t stat = 0; // set queue for old dense routines magma_queue_t orig_queue; magmablasGetKernelStream( &orig_queue ); magma_int_t stat_cpu = 0, stat_dev = 0; // prepare solver feedback solver_par->solver = Magma_GMRES; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE, c_mone = MAGMA_C_NEG_ONE; magma_int_t dofs = A.num_rows; magma_int_t i, j, k, m = 0; magma_int_t restart = min( dofs-1, solver_par->restart ); magma_int_t ldh = restart+1; float nom, rNorm, RNorm, nom0, betanom, r0 = 0.; // CPU workspace //magma_setdevice(0); magmaFloatComplex *H, *HH, *y, *h1; stat_cpu += magma_cmalloc_pinned( &H, (ldh+1)*ldh ); stat_cpu += magma_cmalloc_pinned( &y, ldh ); stat_cpu += magma_cmalloc_pinned( &HH, ldh*ldh ); stat_cpu += magma_cmalloc_pinned( &h1, ldh ); if( stat_cpu != 0){ magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_HOST_ALLOC; } // GPU workspace magma_c_vector r, q, q_t; magma_c_vinit( &r, Magma_DEV, dofs, c_zero, queue ); magma_c_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero, queue ); q_t.memory_location = Magma_DEV; q_t.dval = NULL; q_t.num_rows = q_t.nnz = dofs; q_t.num_cols = 1; magmaFloatComplex *dy = NULL, *dH = NULL; stat_dev += magma_cmalloc( &dy, ldh ); stat_dev += magma_cmalloc( &dH, (ldh+1)*ldh ); if( stat_dev != 0){ magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); magma_free( dH ); magma_free( dy ); magma_free( dH ); magma_free( dy ); magmablasSetKernelStream( orig_queue ); return MAGMA_ERR_DEVICE_ALLOC; } // GPU stream magma_queue_t stream[2]; magma_event_t event[1]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); magma_event_create( &event[0] ); //magmablasSetKernelStream(stream[0]); magma_cscal( dofs, c_zero, x->dval, 1 ); // x = 0 magma_ccopy( dofs, b.dval, 1, r.dval, 1 ); // r = b nom0 = betanom = magma_scnrm2( dofs, r.dval, 1 ); // nom0= || r|| nom = nom0 * nom0; solver_par->init_res = nom0; H(1,0) = MAGMA_C_MAKE( nom0, 0. ); magma_csetvector(1, &H(1,0), 1, &dH(1,0), 1); if ( (r0 = nom0 * solver_par->epsilon ) < ATOLERANCE ){ r0 = solver_par->epsilon; } if ( nom < r0 ) { magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { for(k=1; k<=restart; k++) { magma_ccopy(dofs, r.dval, 1, q(k-1), 1); // q[0] = 1.0/||r|| magma_cscal(dofs, 1./H(k,k-1), q(k-1), 1); // (to be fused) q_t.dval = q(k-1); //magmablasSetKernelStream(stream[0]); magma_c_spmv( c_one, A, q_t, c_zero, r, queue ); // r = A q[k] // if (solver_par->ortho == Magma_MGS ) { // modified Gram-Schmidt for (i=1; i<=k; i++) { H(i,k) =magma_cdotc(dofs, q(i-1), 1, r.dval, 1); // H(i,k) = q[i] . r magma_caxpy(dofs,-H(i,k), q(i-1), 1, r.dval, 1); // r = r - H(i,k) q[i] } H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = ||r|| /*} else if (solver_par->ortho == Magma_FUSED_CGS ) { // fusing cgemv with scnrm2 in classical Gram-Schmidt magmablasSetKernelStream(stream[0]); magma_ccopy(dofs, r.dval, 1, q(k), 1); // dH(1:k+1,k) = q[0:k] . r magmablas_cgemv(MagmaTrans, dofs, k+1, c_one, q(0), dofs, r.dval, 1, c_zero, &dH(1,k), 1); // r = r - q[0:k-1] dH(1:k,k) magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.dval, 1); // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) ) magma_ccopyscale( dofs, k, r.dval, q(k), &dH(1,k) ); // 2) q[k] = q[k] / dH(k+1,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); // asynch copy dH(1:(k+1),k) to H(1:(k+1),k) } else { // classical Gram-Schmidt (default) // > explicitly calling magmabls magmablasSetKernelStream(stream[0]); magmablas_cgemv(MagmaTrans, dofs, k, c_one, q(0), dofs, r.dval, 1, c_zero, &dH(1,k), 1, queue ); // dH(1:k,k) = q[0:k-1] . r #ifndef SCNRM2SCALE // start copying dH(1:k,k) to H(1:k,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_cgetvector_async(k, &dH(1,k), 1, &H(1,k), 1, stream[1]); #endif // r = r - q[0:k-1] dH(1:k,k) magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.dval, 1); #ifdef SCNRM2SCALE magma_ccopy(dofs, r.dval, 1, q(k), 1); // q[k] = r / H(k,k-1) magma_scnrm2scale(dofs, q(k), dofs, &dH(k+1,k) ); // dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k) magma_event_record( event[0], stream[0] ); // start sending dH(1:k,k) to H(1:k,k) magma_queue_wait_event( stream[1], event[0] ); // can we keep H(k+1,k) on GPU and combine? magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); #else H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = sqrt(r . r) if ( k<solver_par->restart ) { magmablasSetKernelStream(stream[0]); magma_ccopy(dofs, r.dval, 1, q(k), 1); // q[k] = 1.0/H[k][k-1] r magma_cscal(dofs, 1./H(k+1,k), q(k), 1); // (to be fused) } #endif }*/ /* Minimization of || b-Ax || in H_k */ for (i=1; i<=k; i++) { HH(k,i) = magma_cblas_cdotc( i+1, &H(1,k), 1, &H(1,i), 1 ); } h1[k] = H(1,k)*H(1,0); if (k != 1) { for (i=1; i<k; i++) { HH(k,i) = HH(k,i)/HH(i,i);// for (m=i+1; m<=k; m++) { HH(k,m) -= HH(k,i) * HH(m,i) * HH(i,i); } h1[k] -= h1[i] * HH(k,i); } } y[k] = h1[k]/HH(k,k); if (k != 1) for (i=k-1; i>=1; i--) { y[i] = h1[i]/HH(i,i); for (j=i+1; j<=k; j++) y[i] -= y[j] * HH(j,i); } m = k; rNorm = fabs(MAGMA_C_REAL(H(k+1,k))); }/* Minimization done */ // compute solution approximation magma_csetmatrix(m, 1, y+1, m, dy, m ); magma_cgemv(MagmaNoTrans, dofs, m, c_one, q(0), dofs, dy, 1, c_one, x->dval, 1); // compute residual magma_c_spmv( c_mone, A, *x, c_zero, r, queue ); // r = - A * x magma_caxpy(dofs, c_one, b.dval, 1, r.dval, 1); // r = r + b H(1,0) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // RNorm = H[1][0] = || r || RNorm = MAGMA_C_REAL( H(1,0) ); betanom = fabs(RNorm); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; float residual; magma_cresidual( A, b, *x, &residual, queue ); solver_par->iter_res = betanom; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_SLOW_CONVERGENCE; } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } // free pinned memory magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); // free GPU memory magma_free(dy); if (dH != NULL ) magma_free(dH); magma_c_vfree(&r, queue ); magma_c_vfree(&q, queue ); // free GPU streams and events magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_event_destroy( event[0] ); //magmablasSetKernelStream(NULL); magmablasSetKernelStream( orig_queue ); return MAGMA_SUCCESS; } /* magma_cgmres */
magma_int_t magma_dpgmres( magma_d_sparse_matrix A, magma_d_vector b, magma_d_vector *x, magma_d_solver_par *solver_par, magma_d_preconditioner *precond_par ){ // prepare solver feedback solver_par->solver = Magma_PGMRES; solver_par->numiter = 0; solver_par->info = 0; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE, c_mone = MAGMA_D_NEG_ONE; magma_int_t dofs = A.num_rows; magma_int_t i, j, k, m = 0; magma_int_t restart = min( dofs-1, solver_par->restart ); magma_int_t ldh = restart+1; double nom, rNorm, RNorm, nom0, betanom, r0 = 0.; // CPU workspace magma_setdevice(0); double *H, *HH, *y, *h1; magma_dmalloc_pinned( &H, (ldh+1)*ldh ); magma_dmalloc_pinned( &y, ldh ); magma_dmalloc_pinned( &HH, ldh*ldh ); magma_dmalloc_pinned( &h1, ldh ); // GPU workspace magma_d_vector r, q, q_t, z, z_t, t; magma_d_vinit( &t, Magma_DEV, dofs, c_zero ); magma_d_vinit( &r, Magma_DEV, dofs, c_zero ); magma_d_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero ); magma_d_vinit( &z, Magma_DEV, dofs*(ldh+1), c_zero ); magma_d_vinit( &z_t, Magma_DEV, dofs, c_zero ); q_t.memory_location = Magma_DEV; q_t.val = NULL; q_t.num_rows = q_t.nnz = dofs; double *dy, *dH = NULL; if (MAGMA_SUCCESS != magma_dmalloc( &dy, ldh )) return MAGMA_ERR_DEVICE_ALLOC; if (MAGMA_SUCCESS != magma_dmalloc( &dH, (ldh+1)*ldh )) return MAGMA_ERR_DEVICE_ALLOC; // GPU stream magma_queue_t stream[2]; magma_event_t event[1]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); magma_event_create( &event[0] ); magmablasSetKernelStream(stream[0]); magma_dscal( dofs, c_zero, x->val, 1 ); // x = 0 magma_dcopy( dofs, b.val, 1, r.val, 1 ); // r = b nom0 = betanom = magma_dnrm2( dofs, r.val, 1 ); // nom0= || r|| nom = nom0 * nom0; solver_par->init_res = nom0; H(1,0) = MAGMA_D_MAKE( nom0, 0. ); magma_dsetvector(1, &H(1,0), 1, &dH(1,0), 1); if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) return MAGMA_SUCCESS; //Chronometry real_Double_t tempo1, tempo2; magma_device_sync(); tempo1=magma_wtime(); if( solver_par->verbose > 0 ){ solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ){ magma_dcopy(dofs, r.val, 1, q(0), 1); // q[0] = 1.0/H(1,0) r magma_dscal(dofs, 1./H(1,0), q(0), 1); // (to be fused) for(k=1; k<=restart; k++) { q_t.val = q(k-1); magmablasSetKernelStream(stream[0]); // preconditioner // z[k] = M^(-1) q(k) magma_d_applyprecond_left( A, q_t, &t, precond_par ); magma_d_applyprecond_right( A, t, &z_t, precond_par ); magma_dcopy(dofs, z_t.val, 1, z(k-1), 1); // r = A q[k] magma_d_spmv( c_one, A, z_t, c_zero, r ); if (solver_par->ortho == Magma_MGS ) { // modified Gram-Schmidt magmablasSetKernelStream(stream[0]); for (i=1; i<=k; i++) { H(i,k) =magma_ddot(dofs, q(i-1), 1, r.val, 1); // H(i,k) = q[i] . r magma_daxpy(dofs,-H(i,k), q(i-1), 1, r.val, 1); // r = r - H(i,k) q[i] } H(k+1,k) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. ); // H(k+1,k) = sqrt(r . r) if (k < restart) { magma_dcopy(dofs, r.val, 1, q(k), 1); // q[k] = 1.0/H[k][k-1] r magma_dscal(dofs, 1./H(k+1,k), q(k), 1); // (to be fused) } } else if (solver_par->ortho == Magma_FUSED_CGS ) { // fusing dgemv with dnrm2 in classical Gram-Schmidt magmablasSetKernelStream(stream[0]); magma_dcopy(dofs, r.val, 1, q(k), 1); // dH(1:k+1,k) = q[0:k] . r magmablas_dgemv(MagmaTrans, dofs, k+1, c_one, q(0), dofs, r.val, 1, c_zero, &dH(1,k), 1); // r = r - q[0:k-1] dH(1:k,k) magmablas_dgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.val, 1); // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) ) magma_dcopyscale( dofs, k, r.val, q(k), &dH(1,k) ); // 2) q[k] = q[k] / dH(k+1,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_dgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); // asynch copy dH(1:(k+1),k) to H(1:(k+1),k) } else { // classical Gram-Schmidt (default) // > explicitly calling magmabls magmablasSetKernelStream(stream[0]); magmablas_dgemv(MagmaTrans, dofs, k, c_one, q(0), dofs, r.val, 1, c_zero, &dH(1,k), 1); // dH(1:k,k) = q[0:k-1] . r #ifndef DNRM2SCALE // start copying dH(1:k,k) to H(1:k,k) magma_event_record( event[0], stream[0] ); magma_queue_wait_event( stream[1], event[0] ); magma_dgetvector_async(k, &dH(1,k), 1, &H(1,k), 1, stream[1]); #endif // r = r - q[0:k-1] dH(1:k,k) magmablas_dgemv(MagmaNoTrans, dofs, k, c_mone, q(0), dofs, &dH(1,k), 1, c_one, r.val, 1); #ifdef DNRM2SCALE magma_dcopy(dofs, r.val, 1, q(k), 1); // q[k] = r / H(k,k-1) magma_dnrm2scale(dofs, q(k), dofs, &dH(k+1,k) ); // dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k) magma_event_record( event[0], stream[0] ); // start sending dH(1:k,k) to H(1:k,k) magma_queue_wait_event( stream[1], event[0] ); // can we keep H(k+1,k) on GPU and combine? magma_dgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); #else H(k+1,k) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. ); // H(k+1,k) = sqrt(r . r) if( k<solver_par->restart ){ magmablasSetKernelStream(stream[0]); magma_dcopy(dofs, r.val, 1, q(k), 1); // q[k] = 1.0/H[k][k-1] r magma_dscal(dofs, 1./H(k+1,k), q(k), 1); // (to be fused) } #endif } } magma_queue_sync( stream[1] ); for( k=1; k<=restart; k++ ){ /* Minimization of || b-Ax || in H_k */ for (i=1; i<=k; i++) { #if defined(PRECISION_z) || defined(PRECISION_c) cblas_ddot_sub( i+1, &H(1,k), 1, &H(1,i), 1, &HH(k,i) ); #else HH(k,i) = cblas_ddot(i+1, &H(1,k), 1, &H(1,i), 1); #endif } h1[k] = H(1,k)*H(1,0); if (k != 1) for (i=1; i<k; i++) { for (m=i+1; m<k; m++){ HH(k,m) -= HH(k,i) * HH(m,i); } HH(k,k) -= HH(k,i) * HH(k,i) / HH(i,i); HH(k,i) = HH(k,i)/HH(i,i); h1[k] -= h1[i] * HH(k,i); } y[k] = h1[k]/HH(k,k); if (k != 1) for (i=k-1; i>=1; i--) { y[i] = h1[i]/HH(i,i); for (j=i+1; j<=k; j++) y[i] -= y[j] * HH(j,i); } m = k; rNorm = fabs(MAGMA_D_REAL(H(k+1,k))); } magma_dsetmatrix_async(m, 1, y+1, m, dy, m, stream[0]); magmablasSetKernelStream(stream[0]); magma_dgemv(MagmaNoTrans, dofs, m, c_one, z(0), dofs, dy, 1, c_one, x->val, 1); magma_d_spmv( c_mone, A, *x, c_zero, r ); // r = - A * x magma_daxpy(dofs, c_one, b.val, 1, r.val, 1); // r = r + b H(1,0) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. ); // RNorm = H[1][0] = || r || RNorm = MAGMA_D_REAL( H(1,0) ); betanom = fabs(RNorm); if( solver_par->verbose > 0 ){ magma_device_sync(); tempo2=magma_wtime(); if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } } magma_device_sync(); tempo2=magma_wtime(); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; magma_dresidual( A, b, *x, &residual ); solver_par->iter_res = betanom; solver_par->final_res = residual; if( solver_par->numiter < solver_par->maxiter){ solver_par->info = 0; }else if( solver_par->init_res > solver_par->final_res ){ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -2; } else{ if( solver_par->verbose > 0 ){ if( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = -1; } // free pinned memory magma_free_pinned( H ); magma_free_pinned( y ); magma_free_pinned( HH ); magma_free_pinned( h1 ); // free GPU memory magma_free(dy); if (dH != NULL ) magma_free(dH); magma_d_vfree(&t); magma_d_vfree(&r); magma_d_vfree(&q); magma_d_vfree(&z); magma_d_vfree(&z_t); // free GPU streams and events magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_event_destroy( event[0] ); magmablasSetKernelStream(NULL); return MAGMA_SUCCESS; } /* magma_dgmres */
void Distance_Function_RHS (UserCtx *user, Vec Levelset_RHS, int wall_distance) { DA da = user->da, fda = user->fda; DALocalInfo info = user->info; PetscInt xs, xe, ys, ye, zs, ze; PetscInt mx, my, mz; PetscInt i, j, k; PetscReal dpdc, dpde, dpdz; Vec Csi = user->lCsi, Eta = user->lEta, Zet = user->lZet; Vec Aj = user->lAj; Cmpnts ***csi, ***eta, ***zet; PetscReal ***aj, ***level, ***level0, ***rhs, ***grad_level; PetscInt lxs, lys, lzs, lxe, lye, lze; PetscReal ***nvert; Vec L, lLevelset0; // grad level, level0 xs = info.xs; xe = xs + info.xm; ys = info.ys; ye = ys + info.ym; zs = info.zs; ze = zs + info.zm; lxs = xs; lxe = xe; lys = ys; lye = ye; lzs = zs; lze = ze; mx = info.mx; my = info.my; mz = info.mz; if (xs==0) lxs = xs+1; if (ys==0) lys = ys+1; if (zs==0) lzs = zs+1; if (xe==mx) lxe = xe-1; if (ye==my) lye = ye-1; if (ze==mz) lze = ze-1; VecSet(Levelset_RHS, 0); VecDuplicate(user->lP, &L); VecDuplicate(user->lP, &lLevelset0); DAGlobalToLocalBegin(user->da, LevelSet0, INSERT_VALUES, lLevelset0); DAGlobalToLocalEnd(user->da, LevelSet0, INSERT_VALUES, lLevelset0); DAVecGetArray(fda, Csi, &csi); DAVecGetArray(fda, Eta, &eta); DAVecGetArray(fda, Zet, &zet); DAVecGetArray(da, Aj, &aj); DAVecGetArray(da, user->lNvert, &nvert); DAVecGetArray(da, user->lLevelset, &level); DAVecGetArray(da, lLevelset0, &level0); DAVecGetArray(da, Levelset_RHS, &rhs); DAVecGetArray(da, L, &grad_level); for (k=lzs; k<lze; k++) for (j=lys; j<lye; j++) for (i=lxs; i<lxe; i++) { double dldc, dlde, dldz; double dl_dx, dl_dy, dl_dz; double csi0=csi[k][j][i].x,csi1=csi[k][j][i].y, csi2=csi[k][j][i].z; double eta0=eta[k][j][i].x,eta1=eta[k][j][i].y, eta2=eta[k][j][i].z; double zet0=zet[k][j][i].x,zet1=zet[k][j][i].y, zet2=zet[k][j][i].z; double ajc = aj[k][j][i]; double dx=pow(1./aj[k][j][i],1./3.); if(dthick_set) dx = dthick; double sgn = sign1(level0[k][j][i], dx); //if(wall_distance) sgn = sign(level0[k][j][i]); //Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sgn, wall_distance, level, nvert, &dldc, &dlde, &dldz); Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sign(level0[k][j][i]), wall_distance, level, nvert, &dldc, &dlde, &dldz); //100521 Compute_dscalar_dxyz (csi0, csi1, csi2, eta0, eta1, eta2, zet0, zet1, zet2, ajc, dldc, dlde, dldz, &dl_dx, &dl_dy, &dl_dz); grad_level[k][j][i] = sqrt( dl_dx*dl_dx + dl_dy*dl_dy + dl_dz*dl_dz ); if(nvert[k][j][i]>0.1) grad_level[k][j][i]=0; } DAVecRestoreArray(da, L, &grad_level); DALocalToLocalBegin(user->da, L, INSERT_VALUES, L); DALocalToLocalEnd(user->da, L, INSERT_VALUES, L); DAVecGetArray(da, L, &grad_level); // Neumann, periodic conditions if(xs==0 || xe==mx) { int from, to; for (k=lzs; k<lze; k++) for (j=lys; j<lye; j++) { if(xs==0) { i = 1, from = i, to = 0; if(i_periodic) from = mx-2; else if(ii_periodic) from = -2; grad_level[k][j][to] = grad_level[k][j][from]; } if(xe==mx) { i = mx-2, from = i, to = mx-1; if(i_periodic) from = 1; else if(ii_periodic) from = mx+1; grad_level[k][j][to] = grad_level[k][j][from]; } } } if(ys==0 || ye==my) { int from, to; for (k=lzs; k<lze; k++) for (i=lxs; i<lxe; i++) { if(ys==0) { j = 1, from = j, to = 0; if(j_periodic) from = my-2; else if(jj_periodic) from = -2; grad_level[k][to][i] = grad_level[k][from][i]; } if(ye==my) { j = my-2, from = j, to = my-1; if(j_periodic) from = 1; else if(jj_periodic) from = my+1; grad_level[k][to][i] = grad_level[k][from][i]; } } } if(zs==0 || ze==mz) { int from, to; for (j=lys; j<lye; j++) for (i=lxs; i<lxe; i++) { if(zs==0) { k = 1, from = k, to = 0; if(k_periodic) from = mz-2; else if(kk_periodic) from = -2; grad_level[to][j][i] = grad_level[from][j][i]; } if(ze==mz) { k = mz-2, from = k, to = mz-1; if(k_periodic) from = 1; else if(kk_periodic) from = mz+1; grad_level[to][j][i] = grad_level[from][j][i]; } } } DAVecRestoreArray(da, L, &grad_level); DALocalToLocalBegin(user->da, L, INSERT_VALUES, L); DALocalToLocalEnd(user->da, L, INSERT_VALUES, L); DAVecGetArray(da, L, &grad_level); for(k=zs; k<ze; k++) for(j=ys; j<ye; j++) for(i=xs; i<xe; i++) { if (i<= 0 || i>= mx-1 || j<=0 || j>=my-1 || k<=0 || k>=mz-1 || nvert[k][j][i]>1.1){ rhs[k][j][i]=0.; continue; } if(nvert[k][j][i]>0.1) { rhs[k][j][i]=0.; continue; } if(wall_distance) { if(nvert[k][j][i]>0.1) { rhs[k][j][i]=0.; continue; } if(i <= 1 && (user->bctype[0]==1 || user->bctype[0]==-1 || user->bctype[0]==-2)) { rhs[k][j][i]=0.; continue; } if(i >= mx-2 && (user->bctype[1]==1 || user->bctype[1]==-1 || user->bctype[1]==-2)) { rhs[k][j][i]=0.; continue; } if(j <=1 && (user->bctype[2]==1 || user->bctype[2]==-1 || user->bctype[2]==-2)) { rhs[k][j][i]=0.; continue; } if(j >=my-2 && (user->bctype[3]==1 || user->bctype[3]==-1 || user->bctype[3]==-2 || user->bctype[3]==12)) { rhs[k][j][i]=0.; continue; } if(k<=1 && (user->bctype[4]==1 || user->bctype[4]==-1 || user->bctype[4]==-2)) { rhs[k][j][i]=0.; continue; } if(k>=mz-2 && (user->bctype[5]==1 || user->bctype[5]==-1 || user->bctype[5]==-2)){ rhs[k][j][i]=0.; continue; } } else if( !wall_distance && user->bctype[4]==5 && user->bctype[5]==4) { //if ( fix_inlet && k==1 ) { rhs[k][j][i] = 0; continue; } //haha if ( fix_outlet && k==mz-2 ) { rhs[k][j][i] = 0; continue; } // important to stabilize outlet } double dx=pow(1./aj[k][j][i],1./3.); if(dthick_set) dx = dthick; double sgn = sign1(level0[k][j][i],dx); //if(wall_distance) sgn = sign(level0[k][j][i]); double denom[3][3][3], num[3][3][3], weight[3][3][3]; for(int p=-1; p<=1; p++) for(int q=-1; q<=1; q++) for(int r=-1; r<=1; r++) { int R=r+1, Q=q+1, P=p+1; int K=k+r, J=j+q, I=i+p; double phi = level[K][J][I], grad = grad_level[K][J][I], dx=pow(1./aj[K][J][I],1./3.); if(dthick_set) dx = dthick; double f = dH(phi,dx) * grad; double _sgn = sign1( level0[K][J][I], dx ); //if(wall_distance) _sgn = sign(level0[K][J][I]); num[R][Q][P] = dH(phi,dx) * _sgn * ( 1. - grad ); denom[R][Q][P] = dH(phi,dx) * f; } for(int p=-1; p<=1; p++) for(int q=-1; q<=1; q++) for(int r=-1; r<=1; r++) { int R=r+1, Q=q+1, P=p+1; int K=k+r, J=j+q, I=i+p; if( (!i_periodic && !ii_periodic && (I==0 || I==mx-1 ) ) || (!j_periodic && !jj_periodic && (J==0 || J==my-1 ) ) || (!k_periodic && !kk_periodic && (K==0 || K==mz-1) ) || nvert[K][J][I]>0.1) { num[R][Q][P] = num[1][1][1]; denom[R][Q][P] = denom[1][1][1]; } } get_weight (i, j, k, mx, my, mz, aj, nvert, 0.1, weight); double numerator = integrate_testfilter(num, weight); double denominator = integrate_testfilter(denom, weight); double correction; if( fabs(denominator)<1.e-10 ) correction=0; else { double grad = grad_level[k][j][i]; double phi = level[k][j][i]; double dx=pow(1./aj[k][j][i],1./3.); if(dthick_set) dx = dthick; double f = dH(phi,dx) * grad; correction = - numerator / denominator; correction *= dH(phi,dx) * grad; } double dlevel_dx, dlevel_dy, dlevel_dz; double dldc, dlde, dldz; double csi0 = csi[k][j][i].x, csi1 = csi[k][j][i].y, csi2 = csi[k][j][i].z; double eta0 = eta[k][j][i].x, eta1 = eta[k][j][i].y, eta2 = eta[k][j][i].z; double zet0 = zet[k][j][i].x, zet1 = zet[k][j][i].y, zet2 = zet[k][j][i].z; double ajc = aj[k][j][i]; //Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sgn, wall_distance, level, nvert, &dldc, &dlde, &dldz); Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sign(level0[k][j][i]), wall_distance, level, nvert, &dldc, &dlde, &dldz); //100521 Compute_dscalar_dxyz (csi0, csi1, csi2, eta0, eta1, eta2, zet0, zet1, zet2, ajc, dldc, dlde, dldz, &dlevel_dx, &dlevel_dy, &dlevel_dz); rhs[k][j][i] = sgn * ( 1. - sqrt( dlevel_dx*dlevel_dx + dlevel_dy*dlevel_dy + dlevel_dz*dlevel_dz ) ); if(nvert[k][j][i+1]+nvert[k][j][i-1]+nvert[k][j+1][i]+nvert[k][j-1][i]+nvert[k+1][j][i]+nvert[k-1][j][i]>0.1) { // correction = 0; } if(!wall_distance) rhs[k][j][i] += correction; // Sussman Fetami } DAVecRestoreArray(da, L, &grad_level); DAVecRestoreArray(fda, Csi, &csi); DAVecRestoreArray(fda, Eta, &eta); DAVecRestoreArray(fda, Zet, &zet); DAVecRestoreArray(da, Aj, &aj); DAVecRestoreArray(da, user->lNvert, &nvert); DAVecRestoreArray(da, user->lLevelset, &level); DAVecRestoreArray(da, lLevelset0, &level0); DAVecRestoreArray(da, Levelset_RHS, &rhs); VecDestroy(L); VecDestroy(lLevelset0); }
SolutionInfo Alignment::align(bool n) { // create initial solution SolutionInfo si; si.volume = -1000.0; si.iterations = 0; si.center1 = _refCenter; si.center2 = _dbCenter; si.rotation1 = _refRotMat; si.rotation2 = _dbRotMat; // scaling of the exclusion spheres double scale(1.0); if (_nbrExcl != 0) { scale /= _nbrExcl; } // try 4 different start orientations for (unsigned int _call(0); _call < 4; ++_call ) { // create initial rotation quaternion SiMath::Vector rotor(4,0.0); rotor[_call] = 1.0; double volume(0.0), oldVolume(-999.99), v(0.0); SiMath::Vector dG(4,0.0); // gradient update SiMath::Matrix hessian(4,4,0.0), dH(4,4,0.0); // hessian and hessian update unsigned int ii(0); for ( ; ii < 100; ++ii) { // compute gradient of volume _grad = 0.0; volume = 0.0; hessian = 0.0; for (unsigned int i(0); i < _refMap.size(); ++i) { // compute the volume overlap of the two pharmacophore points SiMath::Vector Aq(4,0.0); SiMath::Matrix * AkA = _AkA[i]; Aq[0] = (*AkA)[0][0] * rotor[0] + (*AkA)[0][1] * rotor[1] + (*AkA)[0][2] * rotor[2] + (*AkA)[0][3] * rotor[3]; Aq[1] = (*AkA)[1][0] * rotor[0] + (*AkA)[1][1] * rotor[1] + (*AkA)[1][2] * rotor[2] + (*AkA)[1][3] * rotor[3]; Aq[2] = (*AkA)[2][0] * rotor[0] + (*AkA)[2][1] * rotor[1] + (*AkA)[2][2] * rotor[2] + (*AkA)[2][3] * rotor[3]; Aq[3] = (*AkA)[3][0] * rotor[0] + (*AkA)[3][1] * rotor[1] + (*AkA)[3][2] * rotor[2] + (*AkA)[3][3] * rotor[3]; double qAq = Aq[0] * rotor[0] + Aq[1] * rotor[1] + Aq[2] * rotor[2] +Aq[3] * rotor[3]; v = GCI2 * pow(PI/(_refMap[i].alpha+_dbMap[i].alpha),1.5) * exp(-qAq); double c(1.0); // add normal if AROM-AROM // in this case the absolute value of the angle is needed if (n && (_refMap[i].func == AROM) && (_dbMap[i].func == AROM) && (_refMap[i].hasNormal) && (_dbMap[i].hasNormal)) { // for aromatic rings only the planar directions count // therefore the absolute value of the cosine is taken c = _normalContribution(_refMap[i].normal, _dbMap[i].normal, rotor); // update based on the sign of the cosine if (c < 0) { c *= -1.0; _dCdq *= -1.0; _d2Cdq2 *= -1.0; } for (unsigned int hi(0); hi < 4; hi++) { _grad[hi] += v * ( _dCdq[hi] - 2.0 * c * Aq[hi] ); for (unsigned int hj(0); hj < 4; hj++) { hessian[hi][hj] += v * (_d2Cdq2[hi][hj] - 2.0 * _dCdq[hi]*Aq[hj] + 2.0 * c * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj])); } } v *= c; } else if (n && ((_refMap[i].func == HACC) || (_refMap[i].func == HDON) || (_refMap[i].func == HYBH)) && ((_dbMap[i].func == HYBH) || (_dbMap[i].func == HACC) || (_dbMap[i].func == HDON)) && (_refMap[i].hasNormal) && (_dbMap[i].hasNormal)) { // hydrogen donors and acceptor also have a direction // in this case opposite directions have negative impact c = _normalContribution(_refMap[i].normal, _dbMap[i].normal, rotor); for (unsigned int hi(0); hi < 4; hi++) { _grad[hi] += v * ( _dCdq[hi] - 2.0 * c * Aq[hi] ); for (unsigned int hj(0); hj < 4; hj++) { hessian[hi][hj] += v * (_d2Cdq2[hi][hj] - 2.0 * _dCdq[hi]*Aq[hj] + 2.0 * c * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj])); } } v *= c; } else if (_refMap[i].func == EXCL) { // scale volume overlap of exclusion sphere with a negative scaling factor // => exclusion spheres have a negative impact v *= -scale; // update gradient and hessian directions for (unsigned int hi=0; hi < 4; hi++) { _grad[hi] -= 2.0 * v * Aq[hi]; for (unsigned int hj(0); hj < 4; hj++) { hessian[hi][hj] += 2.0 * v * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj]); } } } else { // update gradient and hessian directions for (unsigned int hi(0); hi < 4; hi++) { _grad[hi] -= 2.0 * v * Aq[hi]; for (unsigned int hj(0); hj < 4; hj++) { hessian[hi][hj] += 2.0 * v * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj]); } } } volume += v; } // stop iterations if the increase in volume overlap is too small (gradient ascent) // or if the volume is not defined if (std::isnan(volume) || (volume - oldVolume < 1e-5)) { break; } // reset old volume oldVolume = volume; inverseHessian(hessian); // update gradient based on inverse hessian _grad = rowProduct(hessian,_grad); // small scaling of the gradient _grad *= 0.9; // update rotor based on gradient information rotor += _grad; // normalise rotor such that it has unit norm normalise(rotor); } // save result in info structure if (oldVolume > si.volume) { si.rotor = rotor; si.volume = oldVolume; si.iterations = ii; } } return si; }
/** Calculates the segments joining nav polygons at a link. For example, the walk link between two polygons will contain the edge segment where they meet. Note that this function implicitly calculates whether there is a link between two polygons, since that is dependent on whether or not an appropriate edge segment can be found. @param s1 One of the 2D endpoints of the source edge in the plane @param s2 The other 2D endpoint of the source edge in the plane @param d1 One of the 2D endpoints of the destination edge in the plane @param d2 The other 2D endpoint of the destination edge in the plane @param xOverlap The horizontal overlap interval between the 2D edges in the plane */ NavMeshGenerator::LinkSegments NavMeshGenerator::calculate_link_segments(const Vector2d& s1, const Vector2d& s2, const Vector2d& d1, const Vector2d& d2, const Interval& xOverlap) const { LinkSegments linkSegments; // Calculate the line equations yS = mS.x + cS and yD = mD.x + cD. assert(fabs(s2.x - s1.x) > EPSILON); assert(fabs(d2.x - d1.x) > EPSILON); double mS = (s2.y - s1.y) / (s2.x - s1.x); double mD = (d2.y - d1.y) / (d2.x - d1.x); double cS = s1.y - mS * s1.x; double cD = d1.y - mD * d1.x; double deltaM = mD - mS; double deltaC = cD - cS; if(fabs(deltaM) > EPSILON) { // If the gradients of the source and destination edges are different, then we get // a combination of step up/step down links. // We want to find: // (a) The point walkX at which yD = yS // (b) The point stepUpX at which yD - yS = MAX_HEIGHT_DIFFERENCE (this is the furthest point at which you can step up) // (c) The point stepDownX at which yS - yD = MAX_HEIGHT_DIFFERENCE (this is the furthest point at which you can step down) // (a) deltaM . walkX + deltaC = 0 double walkX = -deltaC / deltaM; // (b) deltaM . stepUpX + deltaC = MAX_HEIGHT_DIFFERENCE double stepUpX = (m_maxHeightDifference - deltaC) / deltaM; // (c) deltaM . stepDownX + deltaC = -MAX_HEIGHT_DIFFERENCE double stepDownX = (-m_maxHeightDifference - deltaC) / deltaM; // Now construct the link intervals and clip them to the known x overlap interval. Interval stepDownInterval(std::min(walkX,stepDownX), std::max(walkX,stepDownX)); Interval stepUpInterval(std::min(walkX,stepUpX), std::max(walkX,stepUpX)); stepDownInterval = stepDownInterval.intersect(xOverlap); stepUpInterval = stepUpInterval.intersect(xOverlap); // Finally, construct the link segments from the link intervals. if(!stepDownInterval.empty()) { Vector2d sL(stepDownInterval.low(), mS*stepDownInterval.low()+cS); Vector2d sH(stepDownInterval.high(), mS*stepDownInterval.high()+cS); linkSegments.stepDownSourceToDestSegment.reset(new LineSegment2d(sL,sH)); Vector2d dL(stepDownInterval.low(), mD*stepDownInterval.low()+cD); Vector2d dH(stepDownInterval.high(), mD*stepDownInterval.high()+cD); linkSegments.stepUpDestToSourceSegment.reset(new LineSegment2d(dL,dH)); } if(!stepUpInterval.empty()) { Vector2d sL(stepUpInterval.low(), mS*stepUpInterval.low()+cS); Vector2d sH(stepUpInterval.high(), mS*stepUpInterval.high()+cS); linkSegments.stepUpSourceToDestSegment.reset(new LineSegment2d(sL,sH)); Vector2d dL(stepUpInterval.low(), mD*stepUpInterval.low()+cD); Vector2d dH(stepUpInterval.high(), mD*stepUpInterval.high()+cD); linkSegments.stepDownDestToSourceSegment.reset(new LineSegment2d(dL,dH)); } } else { // If the gradients of the source and destination edges are the same (i.e. the edges are parallel), // then we either get a step up/step down combination, or a walk link in either direction. if(fabs(deltaC) <= m_maxHeightDifference) { Vector2d s1(xOverlap.low(), mS*xOverlap.low()+cS); Vector2d s2(xOverlap.high(), mS*xOverlap.high()+cS); Vector2d d1(xOverlap.low(), mD*xOverlap.low()+cD); Vector2d d2(xOverlap.high(), mD*xOverlap.high()+cD); // There's a link between the lines, but we need to check the sign of deltaC to see which type. if(deltaC > SMALL_EPSILON) { // The destination is higher than the source: step up. linkSegments.stepUpSourceToDestSegment.reset(new LineSegment2d(s1,s2)); linkSegments.stepDownDestToSourceSegment.reset(new LineSegment2d(d1,d2)); } else if(deltaC < -SMALL_EPSILON) { // The destination is lower than the source: step down. linkSegments.stepDownSourceToDestSegment.reset(new LineSegment2d(s1,s2)); linkSegments.stepUpDestToSourceSegment.reset(new LineSegment2d(d1,d2)); } else // |deltaC| < SMALL_EPSILON { // The destination and source are at the same level: just walk across. linkSegments.walkSegment.reset(new LineSegment2d(s1,s2)); } } } return linkSegments; }