KOKKOS_INLINE_FUNCTION
  void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>::
  operator() (const StokesFOImplicitThicknessUpdateResid_Tag& tag, const int& cell) const {

    double rho_g=rho*g;

    for (int node=0; node < numNodes; ++node){
      res(node,0)=0.0;
      res(node,1)=0.0;
    }

    for (int qp=0; qp < numQPs; ++qp) {
          ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0);
          ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1);
          for (int node=0; node < numNodes; ++node) {
            dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0);
            dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1);
          }

          for (int node=0; node < numNodes; ++node) {
               res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp);
               res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp);
          }
        }
        for (int node=0; node < numNodes; ++node) {
           Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0);
           Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1);
           if(numVecDims==3)
             Residual(cell,node,2) = InputResidual(cell,node,2);
        }

 }
Пример #2
0
	/// <summary>
	/// Computes the thresholded gradient (Snatos97).
	/// </summary>
	/// <returns>The focus measure value</returns>
	double BasicFM::computeGRAT()
	{

		if (checkInput()) {

			cv::Mat dH = mSrcImg(cv::Range::all(), cv::Range(1, mSrcImg.cols)) - mSrcImg(cv::Range::all(), cv::Range(0, mSrcImg.cols - 1));
			cv::Mat dV = mSrcImg(cv::Range(1, mSrcImg.rows), cv::Range::all()) - mSrcImg(cv::Range(0, mSrcImg.rows - 1), cv::Range::all());
			//dH = cv::abs(dH);
			//dV = cv::abs(dV);

			//cv::Mat FM = cv::max(dH, dV);
			cv::Mat FM = cv::max(dH(cv::Range(0, dH.rows - 1), cv::Range::all()), dV(cv::Range::all(), cv::Range(0, dV.cols - 1)));

			double thr = 0;
			cv::Mat mask = FM >= thr;
			mask.convertTo(mask, CV_32FC1, 255.0);

			FM = FM.mul(mask);

			cv::Scalar fm = cv::sum(FM) / cv::sum(mask);
            //normalize
			mVal = fm[0] / 255.0;
		}

		return mVal;
	}
void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

#ifndef ALBANY_KOKKOS_UNDER_DEVELOPMENT
  typedef Intrepid2::FunctionSpaceTools FST; 

  // Initialize residual to 0.0
  Intrepid2::FieldContainer_Kokkos<ScalarT, PHX::Layout, PHX::Device> res(numNodes,2);

  double rho_g=rho*g;

  for (std::size_t cell=0; cell < workset.numCells; ++cell) {
    res.initialize();
    for (std::size_t qp=0; qp < numQPs; ++qp) {
      ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0);
      ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1);
      for (std::size_t node=0; node < numNodes; ++node) {
        dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0);
        dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1);
      }

      for (std::size_t node=0; node < numNodes; ++node) {
           res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp);
           res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp);
      }
    }
    for (std::size_t node=0; node < numNodes; ++node) {
       Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0);
       Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1);
       if(numVecDims==3)
         Residual(cell,node,2) = InputResidual(cell,node,2);
    }
  }

#else

  Kokkos::parallel_for(StokesFOImplicitThicknessUpdateResid_Policy(0,workset.numCells),*this);


#endif
}
void UpdateZCoordinateMovingTop<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{
  Teuchos::RCP<const Tpetra_Vector> xT = workset.xT;
  Teuchos::ArrayRCP<const ST> xT_constView = xT->get1dView();

  const Albany::LayeredMeshNumbering<LO>& layeredMeshNumbering = *workset.disc->getLayeredMeshNumbering();
  const Albany::NodalDOFManager& solDOFManager = workset.disc->getOverlapDOFManager("ordinary_solution");

  int numLayers = layeredMeshNumbering.numLayers;
  const Teuchos::ArrayRCP<Teuchos::ArrayRCP<GO> >& wsElNodeID  = workset.disc->getWsElNodeID()[workset.wsIndex];
  const Teuchos::ArrayRCP<double>& layers_ratio = layeredMeshNumbering.layers_ratio;
  Teuchos::ArrayRCP<double> sigmaLevel(numLayers+1);
  sigmaLevel[0] = 0.; sigmaLevel[numLayers] = 1.;
  for(int i=1; i<numLayers; ++i)
    sigmaLevel[i] = sigmaLevel[i-1] + layers_ratio[i-1];

  for (std::size_t cell=0; cell < workset.numCells; ++cell ) {
    const Teuchos::ArrayRCP<GO>& elNodeID = wsElNodeID[cell];
    const Teuchos::ArrayRCP<Teuchos::ArrayRCP<int> >& nodeID  = workset.wsElNodeEqID[cell];
    const int neq = nodeID[0].size();
    const std::size_t num_dof = neq * this->numNodes;

    for (std::size_t node = 0; node < this->numNodes; ++node) {
      LO lnodeId = workset.disc->getOverlapNodeMapT()->getLocalElement(elNodeID[node]);
      LO base_id, ilevel;
      layeredMeshNumbering.getIndices(lnodeId, base_id,  ilevel);
      MeshScalarT h = H0(cell,node)+dH(cell,node);
      MeshScalarT bed = topSurface(cell,node)- H0(cell,node);
      for(std::size_t icomp=0; icomp< numDims; icomp++) {
        typename PHAL::Ref<MeshScalarT>::type val = coordVecOut(cell,node,icomp);
        val = (icomp==2) ?
            (h>minH) ? MeshScalarT(bed + sigmaLevel[ ilevel]*h)
                    : MeshScalarT(bed + sigmaLevel[ ilevel]*minH)
           : coordVecIn(cell,node,icomp);
      }
    }
  }
}
Пример #5
0
	/// <summary>
	/// Computes Brenner's focus measure and determines the ratio of the median/mean.
	/// </summary>
	/// <returns>The focus measure value</returns>
	double BasicFM::computeROGR()
	{
		if (checkInput()) {

			cv::Mat dH = mSrcImg(cv::Range::all(), cv::Range(1, mSrcImg.cols)) - mSrcImg(cv::Range::all(), cv::Range(0, mSrcImg.cols - 1));
			cv::Mat dV = mSrcImg(cv::Range(1, mSrcImg.rows), cv::Range::all()) - mSrcImg(cv::Range(0, mSrcImg.rows - 1), cv::Range::all());
			dH = cv::abs(dH);
			dV = cv::abs(dV);

			cv::Mat FM = cv::max(dH(cv::Range(0, dH.rows - 1), cv::Range::all()), dV(cv::Range::all(), cv::Range(0, dV.cols - 1)));
			FM = FM.mul(FM);

			cv::Scalar m = cv::mean(FM);
			cv::Mat tmp;
			FM.convertTo(tmp, CV_32F);


            double r = 255.0*255.0;
			//mVal = r > 0 ? m[0] / r : m[0];
			mVal = m[0] / r;
		}

		return mVal;
	}
Пример #6
0
extern "C" magma_int_t
magma_cgmres(
    magma_c_sparse_matrix A, 
    magma_c_vector b, 
    magma_c_vector *x,  
    magma_c_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t stat = 0;
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );
    
    magma_int_t stat_cpu = 0, stat_dev = 0;
    // prepare solver feedback
    solver_par->solver = Magma_GMRES;
    solver_par->numiter = 0;
    solver_par->info = MAGMA_SUCCESS;

    // local variables
    magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE, 
                                                c_mone = MAGMA_C_NEG_ONE;
    magma_int_t dofs = A.num_rows;
    magma_int_t i, j, k, m = 0;
    magma_int_t restart = min( dofs-1, solver_par->restart );
    magma_int_t ldh = restart+1;
    float nom, rNorm, RNorm, nom0, betanom, r0 = 0.;

    // CPU workspace
    //magma_setdevice(0);
    magmaFloatComplex *H, *HH, *y, *h1;
    stat_cpu += magma_cmalloc_pinned( &H, (ldh+1)*ldh );
    stat_cpu += magma_cmalloc_pinned( &y, ldh );
    stat_cpu += magma_cmalloc_pinned( &HH, ldh*ldh );
    stat_cpu += magma_cmalloc_pinned( &h1, ldh );
    if( stat_cpu != 0){
        magma_free_pinned( H );
        magma_free_pinned( y );
        magma_free_pinned( HH );
        magma_free_pinned( h1 );
        magmablasSetKernelStream( orig_queue );
        return MAGMA_ERR_HOST_ALLOC;
    }

    // GPU workspace
    magma_c_vector r, q, q_t;
    magma_c_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_c_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero, queue );
    q_t.memory_location = Magma_DEV; 
    q_t.dval = NULL; 
    q_t.num_rows = q_t.nnz = dofs; q_t.num_cols = 1;

    magmaFloatComplex *dy = NULL, *dH = NULL;
    stat_dev += magma_cmalloc( &dy, ldh );
    stat_dev += magma_cmalloc( &dH, (ldh+1)*ldh );
    if( stat_dev != 0){
        magma_free_pinned( H );
        magma_free_pinned( y );
        magma_free_pinned( HH );
        magma_free_pinned( h1 );
        magma_free( dH );
        magma_free( dy );
        magma_free( dH );
        magma_free( dy );
        magmablasSetKernelStream( orig_queue );
        return MAGMA_ERR_DEVICE_ALLOC;
    }

    // GPU stream
    magma_queue_t stream[2];
    magma_event_t event[1];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    magma_event_create( &event[0] );
    //magmablasSetKernelStream(stream[0]);

    magma_cscal( dofs, c_zero, x->dval, 1 );              //  x = 0
    magma_ccopy( dofs, b.dval, 1, r.dval, 1 );             //  r = b
    nom0 = betanom = magma_scnrm2( dofs, r.dval, 1 );     //  nom0= || r||
    nom = nom0  * nom0;
    solver_par->init_res = nom0;
    H(1,0) = MAGMA_C_MAKE( nom0, 0. ); 
    magma_csetvector(1, &H(1,0), 1, &dH(1,0), 1);

    if ( (r0 = nom0 * solver_par->epsilon ) < ATOLERANCE ){ 
        r0 = solver_par->epsilon;
    }
    if ( nom < r0 ) {
        magmablasSetKernelStream( orig_queue );
        return MAGMA_SUCCESS;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ) {

        for(k=1; k<=restart; k++) {

        magma_ccopy(dofs, r.dval, 1, q(k-1), 1);       //  q[0]    = 1.0/||r||
        magma_cscal(dofs, 1./H(k,k-1), q(k-1), 1);    //  (to be fused)

            q_t.dval = q(k-1);
            //magmablasSetKernelStream(stream[0]);
            magma_c_spmv( c_one, A, q_t, c_zero, r, queue ); //  r = A q[k] 
    //            if (solver_par->ortho == Magma_MGS ) {
                // modified Gram-Schmidt

                for (i=1; i<=k; i++) {
                    H(i,k) =magma_cdotc(dofs, q(i-1), 1, r.dval, 1);            
                        //  H(i,k) = q[i] . r
                    magma_caxpy(dofs,-H(i,k), q(i-1), 1, r.dval, 1);            
                       //  r = r - H(i,k) q[i]
                }
                H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); // H(k+1,k) = ||r|| 

            /*} else if (solver_par->ortho == Magma_FUSED_CGS ) {
                // fusing cgemv with scnrm2 in classical Gram-Schmidt
                magmablasSetKernelStream(stream[0]);
                magma_ccopy(dofs, r.dval, 1, q(k), 1);  
                    // dH(1:k+1,k) = q[0:k] . r
                magmablas_cgemv(MagmaTrans, dofs, k+1, c_one, q(0), 
                                dofs, r.dval, 1, c_zero, &dH(1,k), 1);
                    // r = r - q[0:k-1] dH(1:k,k)
                magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                dofs, &dH(1,k), 1, c_one, r.dval, 1);
                   // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) )
                magma_ccopyscale(  dofs, k, r.dval, q(k), &dH(1,k) );  
                   // 2) q[k] = q[k] / dH(k+1,k) 

                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); 
                    // asynch copy dH(1:(k+1),k) to H(1:(k+1),k)
            } else {
                // classical Gram-Schmidt (default)
                // > explicitly calling magmabls
                magmablasSetKernelStream(stream[0]);                                                  
                magmablas_cgemv(MagmaTrans, dofs, k, c_one, q(0), 
                                dofs, r.dval, 1, c_zero, &dH(1,k), 1, queue ); 
                                // dH(1:k,k) = q[0:k-1] . r
                #ifndef SCNRM2SCALE 
                // start copying dH(1:k,k) to H(1:k,k)
                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_cgetvector_async(k, &dH(1,k), 1, &H(1,k), 
                                                    1, stream[1]);
                #endif
                                  // r = r - q[0:k-1] dH(1:k,k)
                magmablas_cgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                    dofs, &dH(1,k), 1, c_one, r.dval, 1);
                #ifdef SCNRM2SCALE
                magma_ccopy(dofs, r.dval, 1, q(k), 1);                 
                    //  q[k] = r / H(k,k-1) 
                magma_scnrm2scale(dofs, q(k), dofs, &dH(k+1,k) );     
                    //  dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k)

                magma_event_record( event[0], stream[0] );            
                            // start sending dH(1:k,k) to H(1:k,k)
                magma_queue_wait_event( stream[1], event[0] );        
                            // can we keep H(k+1,k) on GPU and combine?
                magma_cgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]);
                #else
                H(k+1,k) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. );   
                            //  H(k+1,k) = sqrt(r . r) 
                if ( k<solver_par->restart ) {
                        magmablasSetKernelStream(stream[0]);
                        magma_ccopy(dofs, r.dval, 1, q(k), 1);                  
                            //  q[k]    = 1.0/H[k][k-1] r
                        magma_cscal(dofs, 1./H(k+1,k), q(k), 1);              
                            //  (to be fused)   
                 }
                #endif
            }*/
            /*     Minimization of  || b-Ax ||  in H_k       */ 
            for (i=1; i<=k; i++) {
                HH(k,i) = magma_cblas_cdotc( i+1, &H(1,k), 1, &H(1,i), 1 );
            }
            h1[k] = H(1,k)*H(1,0); 
            if (k != 1) {
                for (i=1; i<k; i++) {
                    HH(k,i) = HH(k,i)/HH(i,i);//
                    for (m=i+1; m<=k; m++) {
                        HH(k,m) -= HH(k,i) * HH(m,i) * HH(i,i);
                    }
                    h1[k] -= h1[i] * HH(k,i);   
                }    
            }
            y[k] = h1[k]/HH(k,k); 
            if (k != 1)  
                for (i=k-1; i>=1; i--) {
                    y[i] = h1[i]/HH(i,i);
                    for (j=i+1; j<=k; j++)
                        y[i] -= y[j] * HH(j,i);
                }                    
            m = k;
            rNorm = fabs(MAGMA_C_REAL(H(k+1,k)));
        }/*     Minimization done       */ 
        // compute solution approximation
        magma_csetmatrix(m, 1, y+1, m, dy, m );
        magma_cgemv(MagmaNoTrans, dofs, m, c_one, q(0), dofs, dy, 1, 
                                                    c_one, x->dval, 1); 

        // compute residual
        magma_c_spmv( c_mone, A, *x, c_zero, r, queue );      //  r = - A * x
        magma_caxpy(dofs, c_one, b.dval, 1, r.dval, 1);  //  r = r + b
        H(1,0) = MAGMA_C_MAKE( magma_scnrm2(dofs, r.dval, 1), 0. ); 
                                            //  RNorm = H[1][0] = || r ||
        RNorm = MAGMA_C_REAL( H(1,0) );
        betanom = fabs(RNorm);  

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < r0 ) {
            break;
        } 
    }

    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    float residual;
    magma_cresidual( A, b, *x, &residual, queue );
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter) {
        solver_par->info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_SLOW_CONVERGENCE;
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    // free pinned memory
    magma_free_pinned( H );
    magma_free_pinned( y );
    magma_free_pinned( HH );
    magma_free_pinned( h1 );
    // free GPU memory
    magma_free(dy); 
    if (dH != NULL ) magma_free(dH); 
    magma_c_vfree(&r, queue );
    magma_c_vfree(&q, queue );

    // free GPU streams and events
    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_event_destroy( event[0] );
    //magmablasSetKernelStream(NULL);

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_cgmres */
Пример #7
0
magma_int_t
magma_dpgmres( magma_d_sparse_matrix A, magma_d_vector b, magma_d_vector *x,  
               magma_d_solver_par *solver_par, 
               magma_d_preconditioner *precond_par ){

    // prepare solver feedback
    solver_par->solver = Magma_PGMRES;
    solver_par->numiter = 0;
    solver_par->info = 0;

    // local variables
    double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE, 
                                                c_mone = MAGMA_D_NEG_ONE;
    magma_int_t dofs = A.num_rows;
    magma_int_t i, j, k, m = 0;
    magma_int_t restart = min( dofs-1, solver_par->restart );
    magma_int_t ldh = restart+1;
    double nom, rNorm, RNorm, nom0, betanom, r0 = 0.;

    // CPU workspace
    magma_setdevice(0);
    double *H, *HH, *y, *h1;
    magma_dmalloc_pinned( &H, (ldh+1)*ldh );
    magma_dmalloc_pinned( &y, ldh );
    magma_dmalloc_pinned( &HH, ldh*ldh );
    magma_dmalloc_pinned( &h1, ldh );

    // GPU workspace
    magma_d_vector r, q, q_t, z, z_t, t;
    magma_d_vinit( &t, Magma_DEV, dofs, c_zero );
    magma_d_vinit( &r, Magma_DEV, dofs, c_zero );
    magma_d_vinit( &q, Magma_DEV, dofs*(ldh+1), c_zero );
    magma_d_vinit( &z, Magma_DEV, dofs*(ldh+1), c_zero );
    magma_d_vinit( &z_t, Magma_DEV, dofs, c_zero );
    q_t.memory_location = Magma_DEV; 
    q_t.val = NULL; 
    q_t.num_rows = q_t.nnz = dofs;

    double *dy, *dH = NULL;
    if (MAGMA_SUCCESS != magma_dmalloc( &dy, ldh )) 
        return MAGMA_ERR_DEVICE_ALLOC;
    if (MAGMA_SUCCESS != magma_dmalloc( &dH, (ldh+1)*ldh )) 
        return MAGMA_ERR_DEVICE_ALLOC;

    // GPU stream
    magma_queue_t stream[2];
    magma_event_t event[1];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    magma_event_create( &event[0] );
    magmablasSetKernelStream(stream[0]);

    magma_dscal( dofs, c_zero, x->val, 1 );              //  x = 0
    magma_dcopy( dofs, b.val, 1, r.val, 1 );             //  r = b
    nom0 = betanom = magma_dnrm2( dofs, r.val, 1 );     //  nom0= || r||
    nom = nom0  * nom0;
    solver_par->init_res = nom0;
    H(1,0) = MAGMA_D_MAKE( nom0, 0. ); 
    magma_dsetvector(1, &H(1,0), 1, &dH(1,0), 1);
    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 )
        return MAGMA_SUCCESS;

    //Chronometry
    real_Double_t tempo1, tempo2;
    magma_device_sync(); tempo1=magma_wtime();
    if( solver_par->verbose > 0 ){
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ){
        magma_dcopy(dofs, r.val, 1, q(0), 1);       //  q[0] = 1.0/H(1,0) r
        magma_dscal(dofs, 1./H(1,0), q(0), 1);      //  (to be fused)

        for(k=1; k<=restart; k++) {
            q_t.val = q(k-1);
            magmablasSetKernelStream(stream[0]);
            // preconditioner
            //  z[k] = M^(-1) q(k)
            magma_d_applyprecond_left( A, q_t, &t, precond_par );      
            magma_d_applyprecond_right( A, t, &z_t, precond_par );     
  
            magma_dcopy(dofs, z_t.val, 1, z(k-1), 1);                  

            // r = A q[k] 
            magma_d_spmv( c_one, A, z_t, c_zero, r );


            if (solver_par->ortho == Magma_MGS ) {
                // modified Gram-Schmidt
                magmablasSetKernelStream(stream[0]);
                for (i=1; i<=k; i++) {
                    H(i,k) =magma_ddot(dofs, q(i-1), 1, r.val, 1);            
                        //  H(i,k) = q[i] . r
                    magma_daxpy(dofs,-H(i,k), q(i-1), 1, r.val, 1);            
                       //  r = r - H(i,k) q[i]
                }
                H(k+1,k) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. );
                      //  H(k+1,k) = sqrt(r . r) 
                if (k < restart) {
                        magma_dcopy(dofs, r.val, 1, q(k), 1);                  
                      //  q[k] = 1.0/H[k][k-1] r
                        magma_dscal(dofs, 1./H(k+1,k), q(k), 1);               
                      //  (to be fused)   
                 }
            } else if (solver_par->ortho == Magma_FUSED_CGS ) {
                // fusing dgemv with dnrm2 in classical Gram-Schmidt
                magmablasSetKernelStream(stream[0]);
                magma_dcopy(dofs, r.val, 1, q(k), 1);  
                    // dH(1:k+1,k) = q[0:k] . r
                magmablas_dgemv(MagmaTrans, dofs, k+1, c_one, q(0), 
                                dofs, r.val, 1, c_zero, &dH(1,k), 1);
                    // r = r - q[0:k-1] dH(1:k,k)
                magmablas_dgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                dofs, &dH(1,k), 1, c_one, r.val, 1);
                   // 1) dH(k+1,k) = sqrt( dH(k+1,k) - dH(1:k,k) )
                magma_dcopyscale(  dofs, k, r.val, q(k), &dH(1,k) );  
                   // 2) q[k] = q[k] / dH(k+1,k) 

                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_dgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]); 
                    // asynch copy dH(1:(k+1),k) to H(1:(k+1),k)
            } else {
                // classical Gram-Schmidt (default)
                // > explicitly calling magmabls
                magmablasSetKernelStream(stream[0]);                                                  
                magmablas_dgemv(MagmaTrans, dofs, k, c_one, q(0), 
                                dofs, r.val, 1, c_zero, &dH(1,k), 1); 
                                // dH(1:k,k) = q[0:k-1] . r
                #ifndef DNRM2SCALE 
                // start copying dH(1:k,k) to H(1:k,k)
                magma_event_record( event[0], stream[0] );
                magma_queue_wait_event( stream[1], event[0] );
                magma_dgetvector_async(k, &dH(1,k), 1, &H(1,k), 
                                                    1, stream[1]);
                #endif
                                  // r = r - q[0:k-1] dH(1:k,k)
                magmablas_dgemv(MagmaNoTrans, dofs, k, c_mone, q(0), 
                                    dofs, &dH(1,k), 1, c_one, r.val, 1);
                #ifdef DNRM2SCALE
                magma_dcopy(dofs, r.val, 1, q(k), 1);                 
                    //  q[k] = r / H(k,k-1) 
                magma_dnrm2scale(dofs, q(k), dofs, &dH(k+1,k) );     
                    //  dH(k+1,k) = sqrt(r . r) and r = r / dH(k+1,k)

                magma_event_record( event[0], stream[0] );            
                            // start sending dH(1:k,k) to H(1:k,k)
                magma_queue_wait_event( stream[1], event[0] );        
                            // can we keep H(k+1,k) on GPU and combine?
                magma_dgetvector_async(k+1, &dH(1,k), 1, &H(1,k), 1, stream[1]);
                #else
                H(k+1,k) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. );   
                            //  H(k+1,k) = sqrt(r . r) 
                if( k<solver_par->restart ){
                        magmablasSetKernelStream(stream[0]);
                        magma_dcopy(dofs, r.val, 1, q(k), 1);                  
                            //  q[k]    = 1.0/H[k][k-1] r
                        magma_dscal(dofs, 1./H(k+1,k), q(k), 1);              
                            //  (to be fused)   
                 }
                #endif
            }
        }
        magma_queue_sync( stream[1] );
        for( k=1; k<=restart; k++ ){
            /*     Minimization of  || b-Ax ||  in H_k       */ 
            for (i=1; i<=k; i++) {
                #if defined(PRECISION_z) || defined(PRECISION_c)
                cblas_ddot_sub( i+1, &H(1,k), 1, &H(1,i), 1, &HH(k,i) );
                #else
                HH(k,i) = cblas_ddot(i+1, &H(1,k), 1, &H(1,i), 1);
                #endif
            }
            h1[k] = H(1,k)*H(1,0); 
            if (k != 1)
                for (i=1; i<k; i++) {
                    for (m=i+1; m<k; m++){
                        HH(k,m) -= HH(k,i) * HH(m,i);
                    }
                    HH(k,k) -= HH(k,i) * HH(k,i) / HH(i,i);
                    HH(k,i) = HH(k,i)/HH(i,i);
                    h1[k] -= h1[i] * HH(k,i);   
                }    
            y[k] = h1[k]/HH(k,k); 
            if (k != 1)  
                for (i=k-1; i>=1; i--) {
                    y[i] = h1[i]/HH(i,i);
                    for (j=i+1; j<=k; j++)
                        y[i] -= y[j] * HH(j,i);
                }                    
            m = k;
            rNorm = fabs(MAGMA_D_REAL(H(k+1,k)));
        }

        magma_dsetmatrix_async(m, 1, y+1, m, dy, m, stream[0]);
        magmablasSetKernelStream(stream[0]);
        magma_dgemv(MagmaNoTrans, dofs, m, c_one, z(0), dofs, dy, 1, 
                                                    c_one, x->val, 1); 
        magma_d_spmv( c_mone, A, *x, c_zero, r );      //  r = - A * x
        magma_daxpy(dofs, c_one, b.val, 1, r.val, 1);  //  r = r + b
        H(1,0) = MAGMA_D_MAKE( magma_dnrm2(dofs, r.val, 1), 0. ); 
                                            //  RNorm = H[1][0] = || r ||
        RNorm = MAGMA_D_REAL( H(1,0) );
        betanom = fabs(RNorm);  

        if( solver_par->verbose > 0 ){
            magma_device_sync(); tempo2=magma_wtime();
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < r0 ) {
            break;
        } 
    }

    magma_device_sync(); tempo2=magma_wtime();
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    magma_dresidual( A, b, *x, &residual );
    solver_par->iter_res = betanom;
    solver_par->final_res = residual;

    if( solver_par->numiter < solver_par->maxiter){
        solver_par->info = 0;
    }else if( solver_par->init_res > solver_par->final_res ){
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -2;
    }
    else{
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -1;
    }
    // free pinned memory
    magma_free_pinned( H );
    magma_free_pinned( y );
    magma_free_pinned( HH );
    magma_free_pinned( h1 );
    // free GPU memory
    magma_free(dy); 
    if (dH != NULL ) magma_free(dH); 
    magma_d_vfree(&t);
    magma_d_vfree(&r);
    magma_d_vfree(&q);
    magma_d_vfree(&z);
    magma_d_vfree(&z_t);

    // free GPU streams and events
    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_event_destroy( event[0] );
    magmablasSetKernelStream(NULL);

    return MAGMA_SUCCESS;
}   /* magma_dgmres */
Пример #8
0
void Distance_Function_RHS (UserCtx *user, Vec Levelset_RHS, int wall_distance)
{
	DA		da = user->da, fda = user->fda;
	DALocalInfo	info = user->info;
	PetscInt	xs,	xe,	ys,	ye,	zs,	ze;
	PetscInt	mx,	my,	mz;
	PetscInt	i, j,	k;
	PetscReal	dpdc,	dpde,	dpdz;
	
	Vec		Csi	=	user->lCsi,	Eta	=	user->lEta,	Zet	=	user->lZet;
	Vec		Aj	=	user->lAj;

	Cmpnts	***csi,	***eta,	***zet;
	PetscReal	***aj, ***level, ***level0,	***rhs,	***grad_level;

	PetscInt	lxs, lys,	lzs, lxe,	lye, lze;
	PetscReal	***nvert;
	
	Vec	L, lLevelset0;	// grad	level, level0
	
	xs = info.xs;	xe = xs	+	info.xm;
	ys = info.ys;	ye = ys	+	info.ym;
	zs = info.zs;	ze = zs	+	info.zm;
	
	lxs	=	xs;	lxe	=	xe;
	lys	=	ys;	lye	=	ye;
	lzs	=	zs;	lze	=	ze;
	
	mx = info.mx;	my = info.my;	mz = info.mz;
	
	if (xs==0) lxs = xs+1;
	if (ys==0) lys = ys+1;
	if (zs==0) lzs = zs+1;

	if (xe==mx)	lxe	=	xe-1;
	if (ye==my)	lye	=	ye-1;
	if (ze==mz)	lze	=	ze-1;
	
	VecSet(Levelset_RHS, 0);
	
	VecDuplicate(user->lP, &L);
	VecDuplicate(user->lP, &lLevelset0);
	
	DAGlobalToLocalBegin(user->da, LevelSet0, INSERT_VALUES, lLevelset0);
	DAGlobalToLocalEnd(user->da, LevelSet0,	INSERT_VALUES, lLevelset0);
	
	DAVecGetArray(fda, Csi,	&csi);
	DAVecGetArray(fda, Eta,	&eta);
	DAVecGetArray(fda, Zet,	&zet);
	DAVecGetArray(da,	 Aj,	&aj);
	DAVecGetArray(da,	user->lNvert,	&nvert);
	DAVecGetArray(da,	user->lLevelset, &level);
	DAVecGetArray(da,	lLevelset0,	&level0);
	DAVecGetArray(da,	Levelset_RHS,	&rhs);
	
	DAVecGetArray(da,  L,  &grad_level);
	for	(k=lzs;	k<lze; k++)
	for	(j=lys;	j<lye; j++)
	for	(i=lxs;	i<lxe; i++) {
		double dldc, dlde, dldz;
		double dl_dx, dl_dy, dl_dz;
		
		double csi0=csi[k][j][i].x,csi1=csi[k][j][i].y, csi2=csi[k][j][i].z;
		double eta0=eta[k][j][i].x,eta1=eta[k][j][i].y, eta2=eta[k][j][i].z;
		double zet0=zet[k][j][i].x,zet1=zet[k][j][i].y, zet2=zet[k][j][i].z;
		double ajc = aj[k][j][i];
		
		double dx=pow(1./aj[k][j][i],1./3.);
		if(dthick_set) dx = dthick;

		double sgn = sign1(level0[k][j][i], dx);
		//if(wall_distance)	sgn	=	sign(level0[k][j][i]);
		
		//Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sgn, wall_distance, level, nvert, &dldc, &dlde, &dldz);
		Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sign(level0[k][j][i]), wall_distance, level, nvert, &dldc, &dlde, &dldz); //100521
		
		Compute_dscalar_dxyz (csi0, csi1, csi2, eta0, eta1, eta2, zet0, zet1, zet2, ajc, dldc, dlde, dldz, &dl_dx, &dl_dy, &dl_dz);
		
		grad_level[k][j][i] = sqrt( dl_dx*dl_dx	+ dl_dy*dl_dy +	dl_dz*dl_dz );
		if(nvert[k][j][i]>0.1) grad_level[k][j][i]=0;
	}
	DAVecRestoreArray(da,  L,  &grad_level);

	DALocalToLocalBegin(user->da, L, INSERT_VALUES,	L);
	DALocalToLocalEnd(user->da, L, INSERT_VALUES, L);
	
	DAVecGetArray(da,  L,  &grad_level);
	
	// Neumann,	periodic conditions
	if(xs==0 ||	xe==mx)	{
		int	from,	to;
		for	(k=lzs;	k<lze; k++)
		for	(j=lys;	j<lye; j++)	{
			if(xs==0)	{
				i	=	1, from	=	i, to	=	0;
				
				if(i_periodic) from	=	mx-2;
				else if(ii_periodic) from	=	-2;
				
				grad_level[k][j][to] = grad_level[k][j][from];
			}
			
			if(xe==mx) {
				i	=	mx-2,	from = i,	to = mx-1;
				
				if(i_periodic) from	=	1;
				else if(ii_periodic) from	=	mx+1;
				
				grad_level[k][j][to] = grad_level[k][j][from];
			}
		}
	}
	
	if(ys==0 ||	ye==my)	{
		int	from,	to;
				
		for	(k=lzs;	k<lze; k++)
		for	(i=lxs;	i<lxe; i++)	{
			if(ys==0)	{
				j	=	1, from	=	j, to	=	0;
				
				if(j_periodic) from	=	my-2;
				else if(jj_periodic) from	=	-2;
				
				grad_level[k][to][i] = grad_level[k][from][i];
			}
			
			if(ye==my) {
				j	=	my-2,	from = j,	to = my-1;
				
				if(j_periodic) from	=	1;
				else if(jj_periodic) from	=	my+1;
				
				grad_level[k][to][i] = grad_level[k][from][i];
			}
		}
	}
	
	if(zs==0 ||	ze==mz)	{
		int	from,	to;
		
		for	(j=lys;	j<lye; j++)
		for	(i=lxs;	i<lxe; i++)	{
			if(zs==0)	{
				k	=	1, from	=	k, to	=	0;
				
				if(k_periodic) from	=	mz-2;
				else if(kk_periodic) from	=	-2;
				
				grad_level[to][j][i] = grad_level[from][j][i];
			}
			
			if(ze==mz) {
				k	=	mz-2,	from = k,	to = mz-1;
				
				if(k_periodic) from	=	1;
				else if(kk_periodic) from	=	mz+1;
				
				grad_level[to][j][i] = grad_level[from][j][i];
			}
		}
	}
	
	DAVecRestoreArray(da,  L,  &grad_level);
	DALocalToLocalBegin(user->da, L, INSERT_VALUES,	L);
	DALocalToLocalEnd(user->da, L, INSERT_VALUES, L);
	DAVecGetArray(da,  L,  &grad_level);

	for(k=zs; k<ze; k++)
	for(j=ys; j<ye; j++)
	for(i=xs; i<xe; i++)	{
		if (i<= 0 || i>= mx-1 || j<=0 || j>=my-1 || k<=0 || k>=mz-1 || nvert[k][j][i]>1.1){
			rhs[k][j][i]=0.;
			continue;
		}
		
		if(nvert[k][j][i]>0.1) {
		  rhs[k][j][i]=0.;
		  continue;
		}
		
		if(wall_distance) {
			if(nvert[k][j][i]>0.1) { rhs[k][j][i]=0.; continue; }
			if(i <= 1 && (user->bctype[0]==1 || user->bctype[0]==-1 || user->bctype[0]==-2)) { rhs[k][j][i]=0.; continue; }
			if(i >=	mx-2 &&	(user->bctype[1]==1 || user->bctype[1]==-1 || user->bctype[1]==-2)) { rhs[k][j][i]=0.; continue; }
			if(j <=1 && (user->bctype[2]==1 || user->bctype[2]==-1 || user->bctype[2]==-2)) { rhs[k][j][i]=0.; continue; }
			if(j >=my-2 && (user->bctype[3]==1 || user->bctype[3]==-1 || user->bctype[3]==-2 || user->bctype[3]==12)) { rhs[k][j][i]=0.; continue; }
			if(k<=1	&& (user->bctype[4]==1 || user->bctype[4]==-1 || user->bctype[4]==-2)) { rhs[k][j][i]=0.; continue; }
			if(k>=mz-2 && (user->bctype[5]==1 || user->bctype[5]==-1 || user->bctype[5]==-2)){ rhs[k][j][i]=0.; continue; }
		}		
		else if( !wall_distance	&& user->bctype[4]==5 && user->bctype[5]==4) {
		  //if ( fix_inlet && k==1 ) { rhs[k][j][i] = 0; continue; }
		  //haha if ( fix_outlet && k==mz-2 ) { rhs[k][j][i] = 0; continue; } // important to stabilize outlet
		}
		
		double dx=pow(1./aj[k][j][i],1./3.);
		if(dthick_set) dx = dthick;
		double sgn = sign1(level0[k][j][i],dx);
		//if(wall_distance)	sgn	=	sign(level0[k][j][i]);
		
		double denom[3][3][3], num[3][3][3], weight[3][3][3];
		
		for(int	p=-1;	p<=1;	p++)
		for(int	q=-1;	q<=1;	q++)
		for(int	r=-1;	r<=1;	r++) {
			int	R=r+1, Q=q+1,	P=p+1;
			int	K=k+r, J=j+q,	I=i+p;
			double phi = level[K][J][I], grad	=	grad_level[K][J][I], dx=pow(1./aj[K][J][I],1./3.);
			if(dthick_set) dx	=	dthick;

			double f = dH(phi,dx)	*	grad;
			
			double _sgn	=	sign1( level0[K][J][I],	dx );
			//if(wall_distance)	_sgn = sign(level0[K][J][I]);
			
			num[R][Q][P] = dH(phi,dx) * _sgn * ( 1.	- grad );
			denom[R][Q][P] = dH(phi,dx) * f;
		}
		
		for(int	p=-1;	p<=1;	p++)
		for(int	q=-1;	q<=1;	q++)
		for(int	r=-1;	r<=1;	r++) {
		  int	R=r+1, Q=q+1,	P=p+1;
		  int	K=k+r, J=j+q,	I=i+p;
		  if( (!i_periodic && !ii_periodic && (I==0 || I==mx-1 ) ) ||
			(!j_periodic && !jj_periodic && (J==0 || J==my-1 ) ) || 
			(!k_periodic && !kk_periodic && (K==0 || K==mz-1) ) ||	
			nvert[K][J][I]>0.1) {
		    num[R][Q][P] = num[1][1][1];
		    denom[R][Q][P] = denom[1][1][1];
		  }
		}
		
		get_weight (i, j, k, mx, my, mz, aj, nvert, 0.1, weight);
		
		double numerator = integrate_testfilter(num, weight);
		double denominator = integrate_testfilter(denom, weight);
		
		double correction;
		
		if(	fabs(denominator)<1.e-10 ) correction=0;
		else {
			double grad	=	grad_level[k][j][i];
			double phi = level[k][j][i];
			double dx=pow(1./aj[k][j][i],1./3.);
			if(dthick_set) dx	=	dthick;

			double f = dH(phi,dx)	*	grad;
			correction = - numerator / denominator;
			correction *=	dH(phi,dx) * grad;
		}
		
		double dlevel_dx, dlevel_dy, dlevel_dz;
		double dldc, dlde, dldz;
		double csi0 = csi[k][j][i].x, csi1 = csi[k][j][i].y, csi2 = csi[k][j][i].z;
		double eta0 = eta[k][j][i].x, eta1 = eta[k][j][i].y, eta2 = eta[k][j][i].z;
		double zet0 = zet[k][j][i].x, zet1 = zet[k][j][i].y, zet2 = zet[k][j][i].z;
		double ajc = aj[k][j][i];
		
		//Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sgn, wall_distance, level, nvert, &dldc, &dlde, &dldz);
		Compute_dlevel_center_levelset (i, j, k, mx, my, mz, sign(level0[k][j][i]), wall_distance, level, nvert, &dldc, &dlde, &dldz); //100521

		Compute_dscalar_dxyz (csi0, csi1, csi2,	eta0, eta1, eta2, zet0,	zet1, zet2, ajc, dldc, dlde, dldz, &dlevel_dx, &dlevel_dy, &dlevel_dz);
		rhs[k][j][i] = sgn * ( 1. - sqrt( dlevel_dx*dlevel_dx +	dlevel_dy*dlevel_dy + dlevel_dz*dlevel_dz ) );
			
		if(nvert[k][j][i+1]+nvert[k][j][i-1]+nvert[k][j+1][i]+nvert[k][j-1][i]+nvert[k+1][j][i]+nvert[k-1][j][i]>0.1) {
		  // correction = 0;
		}

		if(!wall_distance) rhs[k][j][i] += correction;	// Sussman Fetami
	}

	DAVecRestoreArray(da,	 L,	 &grad_level);
	DAVecRestoreArray(fda, Csi,	&csi);
	DAVecRestoreArray(fda, Eta,	&eta);
	DAVecRestoreArray(fda, Zet,	&zet);
	DAVecRestoreArray(da,	 Aj,	&aj);
	DAVecRestoreArray(da,	user->lNvert,	&nvert);
	DAVecRestoreArray(da,	user->lLevelset, &level);
	DAVecRestoreArray(da,	lLevelset0,	&level0);
	DAVecRestoreArray(da,	Levelset_RHS,	&rhs);
	
	VecDestroy(L);
	VecDestroy(lLevelset0);
}
Пример #9
0
SolutionInfo
Alignment::align(bool n)
{
	// create initial solution
	SolutionInfo si;
	si.volume = -1000.0;
	si.iterations = 0;
	si.center1 = _refCenter;
	si.center2 = _dbCenter;
	si.rotation1 = _refRotMat;
	si.rotation2 = _dbRotMat;
	
	// scaling of the exclusion spheres
	double scale(1.0);
	if (_nbrExcl != 0)
	{
		scale /= _nbrExcl;
	}

	// try 4 different start orientations
	for (unsigned int _call(0); _call < 4; ++_call )
	{
		// create initial rotation quaternion
		SiMath::Vector rotor(4,0.0);
		rotor[_call] = 1.0;
		
		double volume(0.0), oldVolume(-999.99), v(0.0);
		SiMath::Vector dG(4,0.0);  // gradient update
		SiMath::Matrix hessian(4,4,0.0), dH(4,4,0.0); // hessian and hessian update
		unsigned int ii(0);
		for ( ; ii < 100; ++ii)
		{			
			// compute gradient of volume
			_grad = 0.0;
			volume = 0.0;
			hessian = 0.0;
			for (unsigned int i(0); i < _refMap.size(); ++i)
			{
				// compute the volume overlap of the two pharmacophore points
				SiMath::Vector Aq(4,0.0);
				SiMath::Matrix * AkA = _AkA[i];
				Aq[0] = (*AkA)[0][0] * rotor[0] + (*AkA)[0][1] * rotor[1] + (*AkA)[0][2] * rotor[2] + (*AkA)[0][3] * rotor[3];
				Aq[1] = (*AkA)[1][0] * rotor[0] + (*AkA)[1][1] * rotor[1] + (*AkA)[1][2] * rotor[2] + (*AkA)[1][3] * rotor[3];
				Aq[2] = (*AkA)[2][0] * rotor[0] + (*AkA)[2][1] * rotor[1] + (*AkA)[2][2] * rotor[2] + (*AkA)[2][3] * rotor[3];
				Aq[3] = (*AkA)[3][0] * rotor[0] + (*AkA)[3][1] * rotor[1] + (*AkA)[3][2] * rotor[2] + (*AkA)[3][3] * rotor[3];
				
				double qAq = Aq[0] * rotor[0] + Aq[1] * rotor[1] + Aq[2] * rotor[2] +Aq[3] * rotor[3];
				
				v = GCI2 * pow(PI/(_refMap[i].alpha+_dbMap[i].alpha),1.5) * exp(-qAq);

				double c(1.0);
				
				// add normal if AROM-AROM
				// in this case the absolute value of the angle is needed
				if (n 
					&&  (_refMap[i].func == AROM) && (_dbMap[i].func == AROM)
					&&  (_refMap[i].hasNormal) && (_dbMap[i].hasNormal))
				{
					// for aromatic rings only the planar directions count
					// therefore the absolute value of the cosine is taken
					c = _normalContribution(_refMap[i].normal, _dbMap[i].normal, rotor);
				
					// update based on the sign of the cosine
					if (c < 0)
					{
						c *= -1.0;
						_dCdq *= -1.0;
						_d2Cdq2 *= -1.0;
					} 
					
					for (unsigned int hi(0); hi < 4; hi++)
					{
						_grad[hi] += v * ( _dCdq[hi] - 2.0 * c * Aq[hi] );
						for (unsigned int hj(0); hj < 4; hj++)
						{
							hessian[hi][hj] += v * (_d2Cdq2[hi][hj] - 2.0 * _dCdq[hi]*Aq[hj] + 2.0 * c * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj])); 
						}
					}
					v *= c;
				}
				else if (n 
					&& ((_refMap[i].func == HACC) || (_refMap[i].func == HDON) || (_refMap[i].func == HYBH)) 
					&& ((_dbMap[i].func == HYBH) || (_dbMap[i].func == HACC)  || (_dbMap[i].func == HDON))
					&& (_refMap[i].hasNormal)
					&& (_dbMap[i].hasNormal))
				{
					// hydrogen donors and acceptor also have a direction
					// in this case opposite directions have negative impact 

					c = _normalContribution(_refMap[i].normal, _dbMap[i].normal, rotor);
						
					for (unsigned int hi(0); hi < 4; hi++)
					{
						_grad[hi] += v * ( _dCdq[hi] - 2.0 * c * Aq[hi] );
						for (unsigned int hj(0); hj < 4; hj++)
						{
							hessian[hi][hj] += v * (_d2Cdq2[hi][hj] - 2.0 * _dCdq[hi]*Aq[hj] + 2.0 * c * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj])); 
						}
					}
					
					v *= c;
				}
				else if (_refMap[i].func == EXCL)
				{
					// scale volume overlap of exclusion sphere with a negative scaling factor
					// => exclusion spheres have a negative impact
					v *= -scale;
					// update gradient and hessian directions
					for (unsigned int hi=0; hi < 4; hi++)
					{
						_grad[hi] -= 2.0 * v * Aq[hi];
						for (unsigned int hj(0); hj < 4; hj++)
						{
							hessian[hi][hj] += 2.0 * v * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj]); 
						}
					}
				}
				else
				{
					// update gradient and hessian directions
					for (unsigned int hi(0); hi < 4; hi++)
					{
						_grad[hi] -= 2.0 * v * Aq[hi];
						for (unsigned int hj(0); hj < 4; hj++)
						{
							hessian[hi][hj] += 2.0 * v * (2.0*Aq[hi]*Aq[hj] - (*AkA)[hi][hj]); 
						}
					}
				}
				
				volume += v;
			}

			// stop iterations if the increase in volume overlap is too small (gradient ascent)
			// or if the volume is not defined
			if (std::isnan(volume) || (volume - oldVolume < 1e-5))
			{
				break; 
			}
			
			// reset old volume	
			oldVolume = volume;
					
			inverseHessian(hessian);
			// update gradient based on inverse hessian
			_grad = rowProduct(hessian,_grad);
			// small scaling of the gradient
			_grad *= 0.9;

			// update rotor based on gradient information
			rotor += _grad;

			// normalise rotor such that it has unit norm
			normalise(rotor);
		}

		// save result in info structure
		if (oldVolume > si.volume)
		{
			si.rotor = rotor;
			si.volume = oldVolume;
			si.iterations = ii;
		}	
	}

	return si;
}
Пример #10
0
/**
Calculates the segments joining nav polygons at a link. For example, the walk link
between two polygons will contain the edge segment where they meet. Note that this
function implicitly calculates whether there is a link between two polygons, since
that is dependent on whether or not an appropriate edge segment can be found.

@param s1			One of the 2D endpoints of the source edge in the plane
@param s2			The other 2D endpoint of the source edge in the plane
@param d1			One of the 2D endpoints of the destination edge in the plane
@param d2			The other 2D endpoint of the destination edge in the plane
@param xOverlap		The horizontal overlap interval between the 2D edges in the plane
*/
NavMeshGenerator::LinkSegments
NavMeshGenerator::calculate_link_segments(const Vector2d& s1, const Vector2d& s2, const Vector2d& d1, const Vector2d& d2,
										  const Interval& xOverlap) const
{
	LinkSegments linkSegments;

	// Calculate the line equations yS = mS.x + cS and yD = mD.x + cD.
	assert(fabs(s2.x - s1.x) > EPSILON);
	assert(fabs(d2.x - d1.x) > EPSILON);

	double mS = (s2.y - s1.y) / (s2.x - s1.x);
	double mD = (d2.y - d1.y) / (d2.x - d1.x);
	double cS = s1.y - mS * s1.x;
	double cD = d1.y - mD * d1.x;

	double deltaM = mD - mS;
	double deltaC = cD - cS;

	if(fabs(deltaM) > EPSILON)
	{
		// If the gradients of the source and destination edges are different, then we get
		// a combination of step up/step down links.

		// We want to find:
		// (a) The point walkX at which yD = yS
		// (b) The point stepUpX at which yD - yS = MAX_HEIGHT_DIFFERENCE (this is the furthest point at which you can step up)
		// (c) The point stepDownX at which yS - yD = MAX_HEIGHT_DIFFERENCE (this is the furthest point at which you can step down)

		// (a) deltaM . walkX + deltaC = 0
		double walkX = -deltaC / deltaM;

		// (b) deltaM . stepUpX + deltaC = MAX_HEIGHT_DIFFERENCE
		double stepUpX = (m_maxHeightDifference - deltaC) / deltaM;

		// (c) deltaM . stepDownX + deltaC = -MAX_HEIGHT_DIFFERENCE
		double stepDownX = (-m_maxHeightDifference - deltaC) / deltaM;

		// Now construct the link intervals and clip them to the known x overlap interval.
		Interval stepDownInterval(std::min(walkX,stepDownX), std::max(walkX,stepDownX));
		Interval stepUpInterval(std::min(walkX,stepUpX), std::max(walkX,stepUpX));
		stepDownInterval = stepDownInterval.intersect(xOverlap);
		stepUpInterval = stepUpInterval.intersect(xOverlap);

		// Finally, construct the link segments from the link intervals.
		if(!stepDownInterval.empty())
		{
			Vector2d sL(stepDownInterval.low(), mS*stepDownInterval.low()+cS);
			Vector2d sH(stepDownInterval.high(), mS*stepDownInterval.high()+cS);
			linkSegments.stepDownSourceToDestSegment.reset(new LineSegment2d(sL,sH));

			Vector2d dL(stepDownInterval.low(), mD*stepDownInterval.low()+cD);
			Vector2d dH(stepDownInterval.high(), mD*stepDownInterval.high()+cD);
			linkSegments.stepUpDestToSourceSegment.reset(new LineSegment2d(dL,dH));
		}

		if(!stepUpInterval.empty())
		{
			Vector2d sL(stepUpInterval.low(), mS*stepUpInterval.low()+cS);
			Vector2d sH(stepUpInterval.high(), mS*stepUpInterval.high()+cS);
			linkSegments.stepUpSourceToDestSegment.reset(new LineSegment2d(sL,sH));

			Vector2d dL(stepUpInterval.low(), mD*stepUpInterval.low()+cD);
			Vector2d dH(stepUpInterval.high(), mD*stepUpInterval.high()+cD);
			linkSegments.stepDownDestToSourceSegment.reset(new LineSegment2d(dL,dH));
		}
	}
	else
	{
		// If the gradients of the source and destination edges are the same (i.e. the edges are parallel),
		// then we either get a step up/step down combination, or a walk link in either direction.
		if(fabs(deltaC) <= m_maxHeightDifference)
		{
			Vector2d s1(xOverlap.low(), mS*xOverlap.low()+cS);
			Vector2d s2(xOverlap.high(), mS*xOverlap.high()+cS);
			Vector2d d1(xOverlap.low(), mD*xOverlap.low()+cD);
			Vector2d d2(xOverlap.high(), mD*xOverlap.high()+cD);

			// There's a link between the lines, but we need to check the sign of deltaC to see which type.
			if(deltaC > SMALL_EPSILON)
			{
				// The destination is higher than the source: step up.
				linkSegments.stepUpSourceToDestSegment.reset(new LineSegment2d(s1,s2));
				linkSegments.stepDownDestToSourceSegment.reset(new LineSegment2d(d1,d2));
			}
			else if(deltaC < -SMALL_EPSILON)
			{
				// The destination is lower than the source: step down.
				linkSegments.stepDownSourceToDestSegment.reset(new LineSegment2d(s1,s2));
				linkSegments.stepUpDestToSourceSegment.reset(new LineSegment2d(d1,d2));
			}
			else	// |deltaC| < SMALL_EPSILON
			{
				// The destination and source are at the same level: just walk across.
				linkSegments.walkSegment.reset(new LineSegment2d(s1,s2));
			}
		}
	}

	return linkSegments;
}