static PetscErrorCode KSPSolve_AGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt its; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscBool guess_zero = ksp->guess_zero; PetscReal res_old, res; PetscInt test; PetscFunctionBegin; ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->reason = KSP_CONVERGED_ITERATING; if (!agmres->HasShifts) { /* Compute Shifts for the Newton basis */ ierr = KSPComputeShifts_DGMRES(ksp);CHKERRQ(ierr); } /* NOTE: At this step, the initial guess is not equal to zero since one cycle of the classical GMRES is performed to compute the shifts */ ierr = (*ksp->converged)(ksp,0,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); while (!ksp->reason) { ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TMP,VEC_TMP_MATOP,VEC_V(0),ksp->vec_rhs);CHKERRQ(ierr); if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(0), VEC_TMP);CHKERRQ(ierr); ierr = VecCopy(VEC_TMP, VEC_V(0));CHKERRQ(ierr); agmres->matvecs += 1; } ierr = VecNormalize(VEC_V(0),&(ksp->rnorm));CHKERRQ(ierr); KSPCheckNorm(ksp,ksp->rnorm); res_old = ksp->rnorm; /* Record the residual norm to test if deflation is needed */ ksp->ops->buildsolution = KSPBuildSolution_AGMRES; ierr = KSPAGMRESCycle(&its,ksp);CHKERRQ(ierr); if (ksp->its >= ksp->max_it) { if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; break; } /* compute the eigenvectors to augment the subspace : use an adaptive strategy */ res = ksp->rnorm; if (!ksp->reason && agmres->neig > 0) { test = agmres->max_k * PetscLogReal(ksp->rtol/res) / PetscLogReal(res/res_old); /* estimate the remaining number of steps */ if ((test > agmres->smv*(ksp->max_it-ksp->its)) || agmres->force) { if (!agmres->force && ((test > agmres->bgv*(ksp->max_it-ksp->its)) && ((agmres->r + 1) < agmres->max_neig))) { agmres->neig += 1; /* Augment the number of eigenvalues to deflate if the convergence is too slow */ } ierr = KSPDGMRESComputeDeflationData_DGMRES(ksp,&agmres->neig);CHKERRQ(ierr); } } ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ } ksp->guess_zero = guess_zero; /* restore if user has provided nonzero initial guess */ PetscFunctionReturn(0); }
static void update(NrnThread* _nt) { int i, i1, i2; i1 = 0; i2 = _nt->end; #if CACHEVEC if (use_cachevec) { /* do not need to worry about linmod or extracellular*/ if (secondorder) { for (i=i1; i < i2; ++i) { VEC_V(i) += 2.*VEC_RHS(i); } }else{ for (i=i1; i < i2; ++i) { VEC_V(i) += VEC_RHS(i); } } }else #endif { /* use original non-vectorized update */ if (secondorder) { #if _CRAY #pragma _CRI ivdep #endif for (i=i1; i < i2; ++i) { NODEV(_nt->_v_node[i]) += 2.*NODERHS(_nt->_v_node[i]); } }else{ #if _CRAY #pragma _CRI ivdep #endif for (i=i1; i < i2; ++i) { NODEV(_nt->_v_node[i]) += NODERHS(_nt->_v_node[i]); } if (use_sparse13) { nrndae_update(); } } } /* end of non-vectorized update */ #if EXTRACELLULAR nrn_update_2d(_nt); #endif #if I_MEMBRANE if (_nt->tml) { assert(_nt->tml->index == CAP); nrn_capacity_current(_nt, _nt->tml->ml); } #endif }
static void nrn_init(_NrnThread* _nt, _Memb_list* _ml, int _type){ Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } v = _v; cai = _ion_cai; cao = _ion_cao; cai = _ion_cai; ki = _ion_ki; ko = _ion_ko; nai = _ion_nai; nao = _ion_nao; initmodel(); _ion_cai = cai; nrn_wrote_conc(_ca_sym, (&(_ion_cai)) - 1, _style_ca); }}
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type) { double _break, _save; double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { ek = _ion_ek; { { for (; t < _break; t += dt) { states(_p, _ppvar, _thread, _nt); }} t = _save; } }} }
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type) { double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; double _v = 0.0; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } v=_v; { cai = _ion_cai; { _deriv1_advance = 1; derivimplicit_thread(2, _slist1, _dlist1, _p, states, _ppvar, _thread, _nt); _deriv1_advance = 0; } }} }
static void nrn_init(_NrnThread* _nt, _Memb_list* _ml, int _type){ double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } v = _v; iCa = _ion_iCa; Cai = _ion_Cai; Cai = _ion_Cai; initmodel(_p, _ppvar, _thread, _nt); _ion_Cai = Cai; nrn_wrote_conc(_Ca_sym, (&(_ion_Cai)) - 1, _style_Ca); }}
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type){ double _break, _save; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { ek = _ion_ek; { { for (; t < _break; t += dt) { error = states(); if(error){fprintf(stderr,"at line 64 in file KA_i1.mod:\n SOLVE states METHOD cnexp\n"); nrn_complain(_p); abort_run(error);} }} t = _save; } }} }
static void nrn_cur(_NrnThread* _nt, _Memb_list* _ml, int _type) { double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; int* _ni; double _rhs, _v; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _g = _nrn_current(_p, _ppvar, _thread, _nt, _v + .001); { _rhs = _nrn_current(_p, _ppvar, _thread, _nt, _v); } _g = (_g - _rhs)/.001; #if CACHEVEC if (use_cachevec) { VEC_RHS(_ni[_iml]) -= _rhs; }else #endif { NODERHS(_nd) -= _rhs; } }}
static void nrn_init(_NrnThread* _nt, _Memb_list* _ml, int _type){ double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if 0 _check_rates(_p, _ppvar, _thread, _nt); #endif #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } v = _v; ena = _ion_ena; initmodel(_p, _ppvar, _thread, _nt); }}
void nrn_finitialize(int setv, double v) { int i; NrnThread* _nt; t = 0.; dt2thread(-1.); nrn_thread_table_check(); clear_event_queue(); nrn_spike_exchange_init(); #if VECTORIZE nrn_play_init(); /* Vector.play */ ///Play events should be executed before initializing events for (i=0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The play events at t=0 */ } if (setv) { for (_nt = nrn_threads; _nt < nrn_threads + nrn_nthread; ++_nt) { for (i=0; i < _nt->end; ++i) { VEC_V(i) = v; } } } for (i=0; i < nrn_nthread; ++i) { nrn_ba(nrn_threads + i, BEFORE_INITIAL); } /* the INITIAL blocks are ordered so that mechanisms that write concentrations are after ions and before mechanisms that read concentrations. */ /* the memblist list in NrnThread is already so ordered */ for (i=0; i < nrn_nthread; ++i) { NrnThread* nt = nrn_threads + i; NrnThreadMembList* tml; for (tml = nt->tml; tml; tml = tml->next) { mod_f_t s = memb_func[tml->index].initialize; if (s) { (*s)(nt, tml->ml, tml->index); } } } #endif init_net_events(); for (i = 0; i < nrn_nthread; ++i) { nrn_ba(nrn_threads + i, AFTER_INITIAL); } for (i=0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The INITIAL sent events at t=0 */ } for (i=0; i < nrn_nthread; ++i) { setup_tree_matrix_minimal(nrn_threads + i); } for (i=0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The record events at t=0 */ } #if NRNMPI nrn_spike_exchange(nrn_threads); #endif }
PetscErrorCode KSPSetUp_AGMRES(KSP ksp) { PetscErrorCode ierr; PetscInt hes; PetscInt nloc; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscInt neig = agmres->neig; PetscInt max_k = agmres->max_k; PetscInt N = MAXKSPSIZE; PetscInt lwork = PetscMax(8 * N + 16, 4 * neig * (N - neig)); PetscFunctionBegin; if (ksp->pc_side == PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"no symmetric preconditioning for KSPAGMRES"); max_k = agmres->max_k; N = MAXKSPSIZE; /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */ agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */ /* Preallocate space to compute later the eigenvalues in GMRES */ ksp->calc_sings = PETSC_TRUE; agmres->max_k = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */ ierr = KSPSetUp_DGMRES(ksp);CHKERRQ(ierr); agmres->max_k = max_k; hes = (N + 1) * (N + 1); /* Data for the Newton basis GMRES */ ierr = PetscMalloc4(max_k,PetscScalar,&agmres->Rshift,max_k,PetscScalar,&agmres->Ishift,hes,PetscScalar,&agmres->Rloc,((N+1)*4),PetscScalar,&agmres->wbufptr);CHKERRQ(ierr); ierr = PetscMalloc7((N+1),PetscScalar,&agmres->Scale,(N+1),PetscScalar,&agmres->sgn,(N+1),PetscScalar,&agmres->tloc,(N+1),PetscScalar,&agmres->temp,(N+1),PetscScalar,&agmres->tau,lwork,PetscScalar,&agmres->work,(N+1),PetscScalar,&agmres->nrs);CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rshift, max_k*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Ishift, max_k*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Scale, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rloc, (N+1)*(N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->sgn, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->tloc, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->temp, (N+1)*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(agmres->wbufptr, (N+1)*4*sizeof(PetscScalar));CHKERRQ(ierr); /* Allocate space for the vectors in the orthogonalized basis*/ ierr = VecGetLocalSize(agmres->vecs[0], &nloc);CHKERRQ(ierr); ierr = PetscMalloc(nloc*(N+1)*sizeof(PetscScalar), &agmres->Qloc);CHKERRQ(ierr); /* Init the ring of processors for the roddec orthogonalization */ ierr = KSPAGMRESRoddecInitNeighboor(ksp);CHKERRQ(ierr); if (agmres->neig < 1) PetscFunctionReturn(0); /* Allocate space for the deflation */ ierr = PetscMalloc(N*sizeof(PetscScalar), &agmres->select);CHKERRQ(ierr); ierr = VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU);CHKERRQ(ierr); ierr = PetscMalloc2(N*N, PetscScalar, &agmres->MatEigL, N*N, PetscScalar, &agmres->MatEigR);CHKERRQ(ierr); /* ierr = PetscMalloc6(N*N, PetscScalar, &agmres->Q, N*N, PetscScalar, &agmres->Z, N, PetscScalar, &agmres->wr, N, PetscScalar, &agmres->wi, N, PetscScalar, &agmres->beta, N, PetscScalar, &agmres->modul);CHKERRQ(ierr); */ ierr = PetscMalloc3(N*N, PetscScalar, &agmres->Q, N*N, PetscScalar, &agmres->Z, N, PetscScalar, &agmres->beta);CHKERRQ(ierr); ierr = PetscMalloc2((N+1),PetscInt,&agmres->perm,(2*neig*N),PetscInt,&agmres->iwork);CHKERRQ(ierr); PetscFunctionReturn(0); }
static void nrn_cur(_NrnThread* _nt, _Memb_list* _ml, int _type){ Node *_nd; int* _ni; double _rhs, _v; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } cai = _ion_cai; cao = _ion_cao; cai = _ion_cai; ki = _ion_ki; ko = _ion_ko; nai = _ion_nai; nao = _ion_nao; _g = _nrn_current(_v + .001); { double _dina; double _dik; double _dica; _dica = ica; _dik = ik; _dina = ina; _rhs = _nrn_current(_v); _ion_dicadv += (_dica - ica)/.001 ; _ion_dikdv += (_dik - ik)/.001 ; _ion_dinadv += (_dina - ina)/.001 ; } _g = (_g - _rhs)/.001; _ion_ica += ica ; _ion_cai = cai; _ion_ik += ik ; _ion_ina += ina ; #if CACHEVEC if (use_cachevec) { VEC_RHS(_ni[_iml]) -= _rhs; }else #endif { NODERHS(_nd) -= _rhs; } }}
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type){ double _break, _save; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { ica = _ion_ica; cai = _ion_cai; cai = _ion_cai; { { for (; t < _break; t += dt) { error = _deriv1_advance = 1; derivimplicit(_ninits, 1, _slist1, _dlist1, _p, &t, dt, state, &_temp1); _deriv1_advance = 0; if(error){fprintf(stderr,"at line 72 in file cad_0403.mod:\n SOLVE state METHOD derivimplicit\n"); nrn_complain(_p); abort_run(error);} }} t = _save; } { /*VERBATIM*/ /* printf("CAD.MOD cai:%.10lf\t",cai);*/ } _ion_cai = cai; }} }
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type) { double _break, _save; double* _p; Datum* _ppvar; Datum* _thread; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; _thread = _ml->_thread; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { ik = _ion_ik; ko = _ion_ko; ina = _ion_ina; nai = _ion_nai; { { for (; t < _break; t += dt) { _deriv1_advance = 1; derivimplicit_thread(2, _slist1, _dlist1, _p, state, _ppvar, _thread, _nt); _deriv1_advance = 0; }} t = _save; } { } _ion_ko = ko; _ion_nai = nai; }} }
static void nrn_cur(_NrnThread* _nt, _Memb_list* _ml, int _type){ Node *_nd; int* _ni; double _rhs, _v; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } }}
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type){ double _break, _save; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { cai = _ion_cai; cao = _ion_cao; cai = _ion_cai; ki = _ion_ki; ko = _ion_ko; nai = _ion_nai; nao = _ion_nao; { { for (; t < _break; t += dt) { error = states(); if(error){fprintf(stderr,"at line 131 in file pGPeA_fukuda.mod:\n \n"); nrn_complain(_p); abort_run(error);} }} t = _save; } _ion_cai = cai; }} }
static void nrn_state(_NrnThread* _nt, _Memb_list* _ml, int _type){ double _break, _save; Node *_nd; double _v; int* _ni; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; _nd = _ml->_nodelist[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } _break = t + .5*dt; _save = t; v=_v; { ica = _ion_ica; cai = _ion_cai; cai = _ion_cai; { { for (; t < _break; t += dt) { error = euler(_ninits, 1, _slist1, _dlist1, _p, &t, dt, state, &_temp1); if(error){fprintf(stderr,"at line 87 in file capump.mod:\n SOLVE state METHOD euler\n"); nrn_complain(_p); abort_run(error);} }} t = _save; state(); } { } _ion_cai = cai; }} }
static void nrn_cur(_NrnThread* _nt, _Memb_list* _ml, int _type){ Node *_nd; int* _ni; double _rhs, _v; int _iml, _cntml; #if CACHEVEC _ni = _ml->_nodeindices; #endif _cntml = _ml->_nodecount; for (_iml = 0; _iml < _cntml; ++_iml) { _p = _ml->_data[_iml]; _ppvar = _ml->_pdata[_iml]; #if CACHEVEC if (use_cachevec) { _v = VEC_V(_ni[_iml]); }else #endif { _nd = _ml->_nodelist[_iml]; _v = NODEV(_nd); } Cai = _ion_Cai; Cao = _ion_Cao; _g = _nrn_current(_v + .001); { double _diCa; _diCa = iCa; _rhs = _nrn_current(_v); _ion_diCadv += (_diCa - iCa)/.001 ; } _g = (_g - _rhs)/.001; _ion_iCa += iCa ; #if CACHEVEC if (use_cachevec) { VEC_RHS(_ni[_iml]) -= _rhs; }else #endif { NODERHS(_nd) -= _rhs; } }}
PetscErrorCode KSPAGMRESRodvec(KSP ksp, PetscInt nvec, PetscScalar *In, Vec Out) { KSP_AGMRES *agmres = (KSP_AGMRES*) ksp->data; MPI_Comm comm; PetscScalar *Qloc = agmres->Qloc; PetscScalar *sgn = agmres->sgn; PetscScalar *tloc = agmres->tloc; PetscMPIInt rank = agmres->rank; PetscMPIInt First = agmres->First, Last = agmres->Last; PetscMPIInt Iright = agmres->Iright, Ileft = agmres->Ileft; PetscScalar *y, *zloc; PetscErrorCode ierr; PetscInt nloc,tag,d, len, i, j; PetscInt dpt,pas; PetscReal c, s, rho, zp, zq, yd, tt; MPI_Status status; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); tag = 0x666; pas = 1; ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr); ierr = PetscMalloc1(nvec, &y);CHKERRQ(ierr); ierr = PetscMemcpy(y, In, nvec*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecGetArray(Out, &zloc);CHKERRQ(ierr); if (rank == Last) { for (i = 0; i < nvec; i++) y[i] = sgn[i] * y[i]; } for (i = 0; i < nloc; i++) zloc[i] = 0.0; if (agmres->size == 1) PetscStackCallBLAS("BLAScopy",BLAScopy_(&nvec, y, &pas, &(zloc[0]), &pas)); else { for (d = nvec - 1; d >= 0; d--) { if (rank == First) { ierr = MPI_Recv(&(zloc[d]), 1, MPIU_SCALAR, Iright, tag, comm, &status);CHKERRQ(ierr); } else { for (j = nvec - 1; j >= d + 1; j--) { i = j - d; ierr = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[j * nloc + i]), 0); zp = zloc[i-1]; zq = zloc[i]; zloc[i-1] = c * zp + s * zq; zloc[i] = -s * zp + c * zq; } ierr = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[d * nloc]), 0); if (rank == Last) { zp = y[d]; zq = zloc[0]; y[d] = c * zp + s * zq; zloc[0] = -s * zp + c * zq; ierr = MPI_Send(&(y[d]), 1, MPIU_SCALAR, Ileft, tag, comm);CHKERRQ(ierr); } else { ierr = MPI_Recv(&yd, 1, MPIU_SCALAR, Iright, tag, comm, &status);CHKERRQ(ierr); zp = yd; zq = zloc[0]; yd = c * zp + s * zq; zloc[0] = -s * zp + c * zq; ierr = MPI_Send(&yd, 1, MPIU_SCALAR, Ileft, tag, comm);CHKERRQ(ierr); } } } } for (j = nvec - 1; j >= 0; j--) { dpt = j * nloc + j; if (tloc[j] != 0.0) { len = nloc - j; rho = Qloc[dpt]; Qloc[dpt] = 1.0; tt = tloc[j] * (BLASdot_(&len, &(Qloc[dpt]), &pas, &(zloc[j]), &pas)); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&len, &tt, &(Qloc[dpt]), &pas, &(zloc[j]), &pas)); Qloc[dpt] = rho; } } ierr = VecRestoreArray(Out, &zloc);CHKERRQ(ierr); ierr = PetscFree(y);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp) { PetscErrorCode ierr; KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscReal *Rshift = agmres->Rshift; PetscReal *Ishift = agmres->Ishift; PetscReal *Scale = agmres->Scale; PetscInt max_k = agmres->max_k; PetscInt KspSize = KSPSIZE; /* if max_k == KspSizen then the basis should not be augmented */ PetscInt j = 1; PetscFunctionBegin; ierr = PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr); Scale[0] = 1.0; while (j <= max_k) { if (Ishift[j-1] == 0) { if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); /* This step can be postponed until all vectors are built */ ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } else { if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-1], VEC_V(j-1));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) { /* Apply the precond-matrix operators */ ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* Then apply deflation as a preconditioner */ ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j));CHKERRQ(ierr); } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j-1), VEC_TMP);CHKERRQ(ierr); ierr = KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } else { ierr = KSP_PCApplyBAorAB(ksp, VEC_V(j-1), VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); } ierr = VecAXPY(VEC_V(j), -Rshift[j-2], VEC_V(j-1));CHKERRQ(ierr); ierr = VecAXPY(VEC_V(j), Scale[j-2]*Ishift[j-2]*Ishift[j-2], VEC_V(j-2));CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } } /* Augment the subspace with the eigenvectors*/ while (j <= KspSize) { ierr = KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP);CHKERRQ(ierr); #if defined(KSP_AGMRES_NONORM) Scale[j] = 1.0; #else ierr = VecScale(VEC_V(j), Scale[j-1]);CHKERRQ(ierr); ierr = VecNorm(VEC_V(j), NORM_2, &(Scale[j]));CHKERRQ(ierr); Scale[j] = 1.0/Scale[j]; #endif agmres->matvecs += 1; j++; } ierr = PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it) { KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscErrorCode ierr; PetscInt max_k = agmres->max_k; /* Size of the non-augmented Krylov basis */ PetscInt i, j; PetscInt r = agmres->r; /* current number of augmented eigenvectors */ PetscBLASInt KspSize; PetscBLASInt lC; PetscBLASInt N; PetscBLASInt ldH = N + 1; PetscBLASInt lwork; PetscBLASInt info, nrhs = 1; PetscFunctionBegin; ierr = PetscBLASIntCast(KSPSIZE,&KspSize);CHKERRQ(ierr); ierr = PetscBLASIntCast(4 * (KspSize+1),&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(KspSize+1,&lC);CHKERRQ(ierr); ierr = PetscBLASIntCast(MAXKSPSIZE + 1,&N);CHKERRQ(ierr); ierr = PetscBLASIntCast(N + 1,&ldH);CHKERRQ(ierr); /* Save a copy of the Hessenberg matrix */ for (j = 0; j < N-1; j++) { for (i = 0; i < N; i++) { *HS(i,j) = *H(i,j); } } /* QR factorize the Hessenberg matrix */ #if defined(PETSC_MISSING_LAPACK_GEQRF) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info); #endif /* Update the right hand side of the least square problem */ ierr = PetscMemzero(agmres->nrs, N*sizeof(PetscScalar));CHKERRQ(ierr); agmres->nrs[0] = ksp->rnorm; #if defined(PETSC_MISSING_LAPACK_ORMQR) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info); #endif ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]); /* solve the least-square problem */ #if defined(PETSC_MISSING_LAPACK_TRTRS) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info); #endif /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */ ierr = VecZeroEntries(VEC_TMP);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));CHKERRQ(ierr); if (!agmres->DeflPrecond) { ierr = VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U);CHKERRQ(ierr); } if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); ierr = VecCopy(VEC_TMP_MATOP, VEC_TMP);CHKERRQ(ierr); } ierr = KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* add the solution to the previous one */ ierr = VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPAGMRESRoddec(KSP ksp, PetscInt nvec) { KSP_AGMRES *agmres = (KSP_AGMRES*) ksp->data; MPI_Comm comm; PetscScalar *Qloc = agmres->Qloc; PetscScalar *sgn = agmres->sgn; PetscScalar *tloc = agmres->tloc; PetscErrorCode ierr; PetscReal *wbufptr = agmres->wbufptr; PetscMPIInt rank = agmres->rank; PetscMPIInt First = agmres->First; PetscMPIInt Last = agmres->Last; PetscBLASInt nloc,pas,len; PetscInt d, i, j, k; PetscInt pos,tag; PetscReal c, s, rho, Ajj, val, tt, old; PetscScalar *col; MPI_Status status; PetscBLASInt N = MAXKSPSIZE + 1; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); tag = 0x666; ierr = PetscLogEventBegin(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rloc, N*N*sizeof(PetscScalar));CHKERRQ(ierr); /* check input arguments */ if (nvec < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "The number of input vectors shoud be positive"); ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr); if (nvec > nloc) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_WRONG, "In QR factorization, the number of local rows should be greater or equal to the number of columns"); pas = 1; k = 0; /* Copy the vectors of the basis */ for (j = 0; j < nvec; j++) { ierr = VecGetArray(VEC_V(j), &col);CHKERRQ(ierr); PetscStackCallBLAS("BLAScopy",BLAScopy_(&nloc, col, &pas, &Qloc[j*nloc], &pas)); ierr = VecRestoreArray(VEC_V(j), &col);CHKERRQ(ierr); } /* Each process performs a local QR on its own block */ for (j = 0; j < nvec; j++) { len = nloc - j; Ajj = Qloc[j*nloc+j]; rho = -PetscSign(Ajj) * BLASnrm2_(&len, &(Qloc[j*nloc+j]), &pas); if (rho == 0.0) tloc[j] = 0.0; else { tloc[j] = (Ajj - rho) / rho; len = len - 1; val = 1.0 / (Ajj - rho); PetscStackCallBLAS("BLASscal",BLASscal_(&len, &val, &(Qloc[j*nloc+j+1]), &pas)); Qloc[j*nloc+j] = 1.0; len = len + 1; for (k = j + 1; k < nvec; k++) { PetscStackCallBLAS("BLASdot",tt = tloc[j] * BLASdot_(&len, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas)); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&len, &tt, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas)); } Qloc[j*nloc+j] = rho; } } /*annihilate undesirable Rloc, diagonal by diagonal*/ for (d = 0; d < nvec; d++) { len = nvec - d; if (rank == First) { PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(Qloc[d*nloc+d]), &nloc, &(wbufptr[d]), &pas)); ierr = MPI_Send(&(wbufptr[d]), len, MPIU_SCALAR, rank + 1, tag, comm);CHKERRQ(ierr); } else { ierr = MPI_Recv(&(wbufptr[d]), len, MPIU_SCALAR, rank - 1, tag, comm, &status);CHKERRQ(ierr); /*Elimination of Rloc(1,d)*/ c = wbufptr[d]; s = Qloc[d*nloc]; ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1); /*Apply Givens Rotation*/ for (k = d; k < nvec; k++) { old = wbufptr[k]; wbufptr[k] = c * old - s * Qloc[k*nloc]; Qloc[k*nloc] = s * old + c * Qloc[k*nloc]; } Qloc[d*nloc] = rho; if (rank != Last) { ierr = MPI_Send(& (wbufptr[d]), len, MPIU_SCALAR, rank + 1, tag, comm);CHKERRQ(ierr); } /* zero-out the d-th diagonal of Rloc ...*/ for (j = d + 1; j < nvec; j++) { /* elimination of Rloc[i][j]*/ i = j - d; c = Qloc[j*nloc+i-1]; s = Qloc[j*nloc+i]; ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1);CHKERRQ(ierr); for (k = j; k < nvec; k++) { old = Qloc[k*nloc+i-1]; Qloc[k*nloc+i-1] = c * old - s * Qloc[k*nloc+i]; Qloc[k*nloc+i] = s * old + c * Qloc[k*nloc+i]; } Qloc[j*nloc+i] = rho; } if (rank == Last) { PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(wbufptr[d]), &pas, RLOC(d,d), &N)); for (k = d + 1; k < nvec; k++) *RLOC(k,d) = 0.0; } } } if (rank == Last) { for (d = 0; d < nvec; d++) { pos = nvec - d; sgn[d] = PetscSign(*RLOC(d,d)); PetscStackCallBLAS("BLASscal",BLASscal_(&pos, &(sgn[d]), RLOC(d,d), &N)); } } /*BroadCast Rloc to all other processes * NWD : should not be needed */ ierr = MPI_Bcast(agmres->Rloc,N*N,MPIU_SCALAR,Last,comm);CHKERRQ(ierr); ierr = PetscLogEventEnd(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }