void FluxFluxBoundary<D> :: T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); // This FE is already multiplied by normal: const HDivNormalFiniteElement<D-1> & fel_q = // q.n space dynamic_cast<const HDivNormalFiniteElement<D-1>&> (cfel[GetInd1()]); const HDivNormalFiniteElement<D-1> & fel_r = // r.n space dynamic_cast<const HDivNormalFiniteElement<D-1>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange rq = cfel.GetRange(GetInd1()); IntRange rr = cfel.GetRange(GetInd2()); int ndofq = rq.Size(); int ndofr = rr.Size(); FlatMatrix<SCAL> submat(ndofr, ndofq, lh); submat = SCAL(0.0); FlatVector<> qshape(fel_q.GetNDof(), lh); FlatVector<> rshape(fel_r.GetNDof(), lh); const IntegrationRule ir(fel_q.ElementType(), fel_q.Order() + fel_r.Order()); for (int i = 0 ; i < ir.GetNIP(); i++) { MappedIntegrationPoint<D-1,D> mip(ir[i], eltrans); SCAL cc = coeff_c -> T_Evaluate<SCAL>(mip); fel_r.CalcShape (ir[i], rshape); fel_q.CalcShape (ir[i], qshape); // mapped q.n-shape is simply reference q.n-shape / measure qshape *= 1.0/mip.GetMeasure(); rshape *= 1.0/mip.GetMeasure(); // [ndofr x 1] [1 x ndofq] submat += (cc*mip.GetWeight()) * rshape * Trans(qshape); } elmat.Rows(rr).Cols(rq) += submat; if (GetInd1() != GetInd2()) elmat.Rows(rq).Cols(rr) += Conj(Trans(submat)); }
void TraceTraceBoundary<D> :: T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); // get surface elements const ScalarFiniteElement<D-1> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D-1>&> (cfel[GetInd1()]); const ScalarFiniteElement<D-1> & fel_e = // u space dynamic_cast<const ScalarFiniteElement<D-1>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofu = ru.Size(); int ndofe = re.Size(); FlatMatrix<SCAL> submat(ndofe, ndofu, lh); submat = SCAL(0.0); FlatVector<> ushape(fel_u.GetNDof(), lh); FlatVector<> eshape(fel_e.GetNDof(), lh); const IntegrationRule ir(fel_u.ElementType(), fel_u.Order() + fel_e.Order()); for (int i = 0 ; i < ir.GetNIP(); i++) { MappedIntegrationPoint<D-1,D> mip(ir[i], eltrans); SCAL cc = coeff_c -> T_Evaluate<SCAL>(mip); fel_u.CalcShape (ir[i], ushape); fel_e.CalcShape (ir[i], eshape); // [ndofe x 1] [1 x ndofu] submat += (cc*mip.GetWeight()) * eshape * Trans(ushape); } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
void EyeEye<D>::T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); const ScalarFiniteElement<D> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofe = re.Size(); int ndofu = ru.Size(); Vector<> ushape(ndofu); Vector<> eshape(ndofe); ELEMENT_TYPE eltype = fel_u.ElementType(); const IntegrationRule & ir = SelectIntegrationRule(eltype, fel_u.Order()+fel_e.Order()); FlatMatrix<SCAL> submat(ndofe,ndofu,lh); submat = SCAL(0.0); for(int k=0; k<ir.GetNIP(); k++) { MappedIntegrationPoint<D,D> mip (ir[k],eltrans); fel_u.CalcShape( ir[k], ushape ); fel_e.CalcShape( ir[k], eshape ); SCAL fac = (coeff_a -> T_Evaluate<SCAL>(mip))* mip.GetWeight() ; // [ndofe x D] * [D x ndofu] submat += fac * eshape * Trans(ushape) ; } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
void MLGP_SCAL(unsigned N, FLOAT a, FLOAT* X, unsigned incX) { #ifdef DOUBLE #define SCAL(...) dscal_(__VA_ARGS__) #else #define SCAL(...) sscal_(__VA_ARGS__) #endif return SCAL(&N, &a, X, &incX); }
void GradGrad<D>::T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); const ScalarFiniteElement<D> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0.0); // u dofs [ru.First() : ru.Next()-1], e dofs [re.First() : re.Next()-1] IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofe = re.Size(); int ndofu = ru.Size(); FlatMatrixFixWidth<D> dum(ndofu,lh); // to store grad(u-basis) FlatMatrixFixWidth<D> dem(ndofe,lh); // to store grad(e-basis) ELEMENT_TYPE eltype // get the type of element: = fel_u.ElementType(); // ET_TRIG in 2d, ET_TET in 3d. const IntegrationRule & // Note: p = fel_u.Order()-1 ir = SelectIntegrationRule(eltype, fel_u.Order()+fel_e.Order()-2); FlatMatrix<SCAL> submat(ndofe,ndofu,lh); submat = 0.0; for(int k=0; k<ir.GetNIP(); k++) { MappedIntegrationPoint<D,D> mip (ir[k],eltrans); // set grad(u-basis) and grad(e-basis) at mapped pts in dum and dem. fel_u.CalcMappedDShape( mip, dum ); fel_e.CalcMappedDShape( mip, dem ); // evaluate coefficient SCAL fac = coeff_a -> T_Evaluate<SCAL>(mip); fac *= mip.GetWeight() ; // [ndofe x D] * [D x ndofu] submat += fac * dem * Trans(dum) ; } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
//error function(mininize); get -1 * gradient // -d(E)/d(zi) = (if i is target) ? 1-zi : -zi void Process::set_softmax_gradient(const REAL* s_target,const REAL* s_output,REAL* s_gradient,int bsize,int c) { const REAL *optr=s_output; REAL *gptr=s_gradient; const REAL *tptr=s_target; int n = bsize*c; REAL f1=-1.0; memcpy(s_gradient,s_output,n*sizeof(REAL)); SCAL(&n,&f1,s_gradient,&inc1); for (int b=0; b<bsize; b++) { if (*tptr<0.0) ErrorN("negative index %f at %d",*tptr,b); int tidx=(uint) *tptr++; gptr[tidx] += 1.0; gptr+=c; optr+=c; } }
void Do(LocalHeap & lh) { // We proceed in three steps: // 1. Compute the difference between Q and q // 2. Compute the H(div) Schur complement // 3. Apply Schur complement to the difference // grid function with (interpolated) exact flux, grad(u) BaseVector& vecQ = Q->GetVector(); // numerical flux q BaseVector& vecq = q->GetVector(); // p.w. constant gridfunction to store element-wise error BaseVector& errvec = err->GetVector(); errvec.FV<double>() = 0.0; double sqer =0.0; // this will contain the total error square for(int k=0; k<ma->GetNE(); k++) { ElementId ei (VOL, k); double elndof = ext->GetFE(k,lh).GetNDof(); Vector<SCAL> diff(elndof); // dof nrs: global, global inner, local inner, local Schur Array<int> Gn, Ginn, Linn, Lsn; // compute the difference between Q and q ext->GetDofNrs(k,Gn); // Global# of all dofs on element k diff = SCAL(0.0); for(int j=0; j<elndof; j++) diff[j] = vecQ.FV<SCAL>()[Gn[j]] - vecq.FV<SCAL>()[Gn[j]]; // H(div) Gram matrix (given in two parts in pde file) Matrix<double> elmat(elndof), elmat2(elndof); elmat = 0.0; elmat2 = 0.0; hdivip->GetIntegrator(0)-> CalcElementMatrix(ext->GetFE(ei,lh),ma->GetTrafo(ei,lh),elmat,lh); hdivip->GetIntegrator(1)-> CalcElementMatrix(ext->GetFE(ei,lh),ma->GetTrafo(ei,lh),elmat2,lh); elmat += elmat2; // compute the H(div) Schur complement ext->GetInnerDofNrs(k,Ginn); // Global# of inner dofs on element k for(int j=0; j<elndof; j++) if (Ginn.Contains( Gn[j] )) Linn.Append(j); // Local# of inner dofs on element k else Lsn.Append(j); // Local# of Schur dofs on element k int ielndof = Linn.Size(); Matrix<double> elmati(ielndof),elmatiinv(ielndof); elmati = elmat.Rows(Linn).Cols(Linn); CalcInverse(elmati,elmatiinv); // apply Schur complement to the difference int selndof = elndof - ielndof; Vector<SCAL> diffs(selndof); Matrix<double> S(selndof), Asi(selndof,ielndof); diffs = diff(Lsn); // S = A_ss - A_si * inv(A_ii) * A_is Asi = elmat.Rows(Lsn).Cols(Linn); S = elmat.Rows(Lsn).Cols(Lsn); S -= Asi * elmatiinv * Trans(Asi); // error = (S * diffs, diffs) errvec.FVDouble()[k] = fabs(InnerProduct(diffs, S * diffs)); sqer += errvec.FVDouble()[k]; } cout<<"Discrete H^(-1/2) norm of error in q = "<<sqrt(sqer)<<endl; // write file (don't know what the last argument of AddVariable // does, but 6 seems to be the value everywhere! It seems to intializes // an object of class IM (important message). GetPDE()->AddVariable (string("fluxerr.")+GetName()+".value", sqrt(sqer), 6); }
void minimize_dual(DOUBLE *Xopt, DOUBLE *Xorig, INT length, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \ DOUBLE c, INT N, INT K) { DOUBLE INTERV = 0.1; DOUBLE EXT = 3.0; INT MAX = 20; DOUBLE RATIO = (DOUBLE) 10; DOUBLE SIG = 0.1; DOUBLE RHO = SIG / (DOUBLE) 2; INT MN = K * 1; CHAR lamch_opt = 'U'; DOUBLE realmin = LAMCH(&lamch_opt); DOUBLE red = 1; INT i = 0; INT ls_failed = 0; DOUBLE f0; DOUBLE *df0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *dftemp = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *df3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *s = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE d0; INT derivFlag = 1; DOUBLE *X = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); datacpy(X, Xorig, MN); INT maxNK = IMAX(N, K); DOUBLE *SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); DOUBLE *tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); dual_obj_grad(&f0, df0, X, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); INT incx = 1; INT incy = 1; datacpy(s, df0, MN); DOUBLE alpha = -1; SCAL(&MN, &alpha, s, &incx); d0 = - DOT(&MN, s, &incx, s, &incy); DOUBLE x1; DOUBLE x2; DOUBLE x3; DOUBLE x4; DOUBLE *X0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *X3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE F0; DOUBLE *dF0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); INT Mmin; DOUBLE f1; DOUBLE f2; DOUBLE f3; DOUBLE f4; DOUBLE d1; DOUBLE d2; DOUBLE d3; DOUBLE d4; INT success; DOUBLE A; DOUBLE B; DOUBLE sqrtquantity; DOUBLE tempnorm; DOUBLE tempinprod1; DOUBLE tempinprod2; DOUBLE tempscalefactor; x3 = red / (1 - d0); while (i++ < length) { datacpy(X0, X, MN); datacpy(dF0, df0, MN); F0 = f0; Mmin = MAX; while (1) { x2 = 0; f2 = f0; d2 = d0; f3 = f0; datacpy(df3, df0, MN); success = 0; while ((!success) && (Mmin > 0)) { Mmin = Mmin - 1; datacpy(X3, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X3, &incy); dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); if (ISNAN(f3) || ISINF(f3)) { /* any(isnan(df3)+isinf(df3)) */ x3 = (x2 + x3) * 0.5; } else { success = 1; } } if (f3 < F0) { datacpy(X0, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X0, &incy); datacpy(dF0, df3, MN); F0 = f3; } d3 = DOT(&MN, df3, &incx, s, &incy); if ((d3 > SIG * d0) || (f3 > f0 + x3 * RHO * d0) || (Mmin == 0)) { break; } x1 = x2; f1 = f2; d1 = d2; x2 = x3; f2 = f3; d2 = d3; A = 6 * (f1 - f2) + 3 * (d2 + d1) * (x2 - x1); B = 3 * (f2 - f1) - (2 * d1 + d2) * (x2 - x1); sqrtquantity = B * B - A * d1 * (x2 - x1); if (sqrtquantity < 0) { x3 = x2 * EXT; } else { x3 = x1 - d1 * SQR(x2 - x1) / (B + SQRT(sqrtquantity)); if (ISNAN(x3) || ISINF(x3) || (x3 < 0)) { x3 = x2 * EXT; } else if (x3 > x2 * EXT) { x3 = x2 * EXT; } else if (x3 < x2 + INTERV * (x2 - x1)) { x3 = x2 + INTERV * (x2 - x1); } } } while (((ABS(d3) > - SIG * d0) || (f3 > f0 + x3 * RHO * d0)) && (Mmin > 0)) { if ((d3 > 0) || (f3 > f0 + x3 * RHO * d0)) { x4 = x3; f4 = f3; d4 = d3; } else { x2 = x3; f2 = f3; d2 = d3; } if (f4 > f0) { x3 = x2 - (0.5 * d2 * SQR(x4 - x2)) / (f4 - f2 - d2 * (x4 - x2)); } else { A = 6 * (f2 - f4) / (x4 - x2) + 3 * (d4 + d2); B = 3 * (f4 - f2) - (2 * d2 + d4) * (x4 - x2); x3 = x2 + (SQRT(B * B - A * d2 * SQR(x4 - x2)) - B) / A; } if (ISNAN(x3) || ISINF(x3)) { x3 = (x2 + x4) * 0.5; } x3 = IMAX(IMIN(x3, x4 - INTERV * (x4 - x2)), x2 + INTERV * (x4 - x2)); datacpy(X3, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X3, &incy); dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); if (f3 < F0) { datacpy(X0, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X0, &incy); datacpy(dF0, df3, MN); F0 = f3; } Mmin = Mmin - 1; d3 = DOT(&MN, df3, &incx, s, &incy); } if ((ABS(d3) < - SIG * d0) && (f3 < f0 + x3 * RHO * d0)) { alpha = x3; AXPY(&MN, &alpha, s, &incx, X, &incy); f0 = f3; datacpy(dftemp, df3, MN); alpha = -1; AXPY(&MN, &alpha, df0, &incx, dftemp, &incy); tempinprod1 = DOT(&MN, dftemp, &incx, df3, &incy); tempnorm = NRM2(&MN, df0, &incx); tempinprod2 = SQR(tempnorm); tempscalefactor = tempinprod1 / tempinprod2; alpha = tempscalefactor; SCAL(&MN, &alpha, s, &incx); alpha = -1; AXPY(&MN, &alpha, df3, &incx, s, &incy); datacpy(df0, df3, MN); d3 = d0; d0 = DOT(&MN, df0, &incx, s, &incy); if (d0 > 0) { datacpy(s, df0, MN); alpha = -1; SCAL(&MN, &alpha, s, &incx); tempnorm = NRM2(&MN, s, &incx); d0 = - SQR(tempnorm); } x3 = x3 * IMIN(RATIO, d3 / (d0 - realmin)); ls_failed = 0; } else { datacpy(X, X0, MN); datacpy(df0, dF0, MN); f0 = F0; if ((ls_failed == 1) || (i > length)) { break; } datacpy(s, df0, MN); alpha = -1; SCAL(&MN, &alpha, s, &incx); tempnorm = NRM2(&MN, s, &incx); d0 = - SQR(tempnorm); x3 = 1 / (1 - d0); ls_failed = 1; } } datacpy(Xopt, X, MN); FREE(SStLambda); FREE(tempMatrix); FREE(df0); FREE(dftemp); FREE(df3); FREE(s); FREE(X); FREE(X0); FREE(X3); FREE(dF0); }
void Arnoldi<SCAL>::Calc (int numval, Array<Complex> & lam, int numev, Array<shared_ptr<BaseVector>> & hevecs, const BaseMatrix * pre) const { static Timer t("arnoldi"); static Timer t2("arnoldi - orthogonalize"); static Timer t3("arnoldi - compute large vectors"); RegionTimer reg(t); auto hv = a.CreateVector(); auto hv2 = a.CreateVector(); auto hva = a.CreateVector(); auto hvm = a.CreateVector(); int n = hv.FV<SCAL>().Size(); int m = min2 (numval, n); Matrix<SCAL> matH(m); Array<shared_ptr<BaseVector>> abv(m); for (int i = 0; i < m; i++) abv[i] = a.CreateVector(); auto mat_shift = a.CreateMatrix(); mat_shift->AsVector() = a.AsVector() - shift*b.AsVector(); shared_ptr<BaseMatrix> inv; if (!pre) inv = mat_shift->InverseMatrix (freedofs); else { auto itso = make_shared<GMRESSolver<double>> (*mat_shift, *pre); itso->SetPrintRates(1); itso->SetMaxSteps(2000); inv = itso; } hv.SetRandom(); hv.SetParallelStatus (CUMULATED); FlatVector<SCAL> fv = hv.FV<SCAL>(); if (freedofs) for (int i = 0; i < hv.Size(); i++) if (! (*freedofs)[i] ) fv(i) = 0; t2.Start(); // matV = SCAL(0.0); why ? matH = SCAL(0.0); *hv2 = *hv; SCAL len = sqrt (S_InnerProduct<SCAL> (*hv, *hv2)); // parallel *hv /= len; for (int i = 0; i < m; i++) { cout << IM(1) << "\ri = " << i << "/" << m << flush; /* for (int j = 0; j < n; j++) matV(i,j) = hv.FV<SCAL>()(j); */ *abv[i] = *hv; *hva = b * *hv; *hvm = *inv * *hva; for (int j = 0; j <= i; j++) { /* SCAL sum = 0.0; for (int k = 0; k < n; k++) sum += hvm.FV<SCAL>()(k) * matV(j,k); matH(j,i) = sum; for (int k = 0; k < n; k++) hvm.FV<SCAL>()(k) -= sum * matV(j,k); */ /* SCAL sum = 0.0; FlatVector<SCAL> abvj = abv[j] -> FV<SCAL>(); FlatVector<SCAL> fv_hvm = hvm.FV<SCAL>(); for (int k = 0; k < n; k++) sum += fv_hvm(k) * abvj(k); matH(j,i) = sum; for (int k = 0; k < n; k++) fv_hvm(k) -= sum * abvj(k); */ matH(j,i) = S_InnerProduct<SCAL> (*hvm, *abv[j]); *hvm -= matH(j,i) * *abv[j]; } *hv = *hvm; *hv2 = *hv; SCAL len = sqrt (S_InnerProduct<SCAL> (*hv, *hv2)); if (i<m-1) matH(i+1,i) = len; *hv /= len; } t2.Stop(); t2.AddFlops (double(n)*m*m); cout << "n = " << n << ", m = " << m << " n*m*m = " << n*m*m << endl; cout << IM(1) << "\ri = " << m << "/" << m << endl; Vector<Complex> lami(m); Matrix<Complex> evecs(m); Matrix<Complex> matHt(m); matHt = Trans (matH); evecs = Complex (0.0); lami = Complex (0.0); cout << "Solve Hessenberg evp with Lapack ... " << flush; LapackHessenbergEP (matH.Height(), &matHt(0,0), &lami(0), &evecs(0,0)); cout << "done" << endl; for (int i = 0; i < m; i++) lami(i) = 1.0 / lami(i) + shift; lam.SetSize (m); for (int i = 0; i < m; i++) lam[i] = lami(i); t3.Start(); if (numev>0) { int nout = min2 (numev, m); hevecs.SetSize(nout); for (int i = 0; i< nout; i++) { hevecs[i] = a.CreateVector(); *hevecs[i] = 0; for (int j = 0; j < m; j++) *hevecs[i] += evecs(i,j) * *abv[j]; // hevecs[i]->FVComplex() = Trans(matV)*evecs.Row(i); } } t3.Stop(); }
/* * ... RHO[0] = RHO_PREV * ... RHO[1] = RHO * ... RHO[2] = ALPHA * ... RHO[3] = BETA * ... RHO[4] = |X| */ void OPK_TRCG(const opk_index_t n, real_t p[], const real_t q[], real_t r[], real_t x[], const real_t z[], const real_t delta, real_t rho[5], opk_cg_state_t *state) { const real_t zero = OPK_REALCONST(0.0); const real_t one = OPK_REALCONST(1.0); real_t a, b, c, d, e, xn, sum, tmp; real_t pq, alpha, beta; long i; if (delta <= zero) { *state = OPK_CG_ERROR; return; } switch (*state) { case OPK_CG_START: /* Start with no initial guess: fill x with zeros, there is no needs to compute q = A.x0 since it is 0. */ ZERO(n, x); rho[0] = rho[1] = rho[2] = rho[3] = rho[4] = zero; *state = OPK_CG_NEWX; return; case OPK_CG_RESTART: /* Start or restart with initial x given: copy initial x into p and request caller to compte: q = A.p */ rho[0] = rho[1] = rho[2] = rho[3] = zero; xn = NRM2(n, x); if (xn >= delta) { if (xn > delta) { SCAL(n, delta/xn, x); } rho[4] = delta; /* |X| */ *state = OPK_CG_TRUNCATED; } else { rho[4] = xn; /* |X| */ COPY(n, x, p); *state = OPK_CG_AP; } return; case OPK_CG_NEWX: if (z == NULL) { /* No preconditioning. Take z = r and jump to next case to compute conjugate gradient direction. */ z = r; } else { /* Preconditioned version of the algorithm. Request caller to compute preconditioned residuals. */ *state = OPK_CG_PRECOND; return; } case OPK_CG_PRECOND: /* Caller has been requested to compute preconditioned residuals. Use conjugate gradients recurrence to compute next search direption p. */ rho[1] = DOT(n, r, z); if (rho[1] <= zero) { /* If r'.z too small, then algorithm has converged or preconditioner is not positive definite. */ *state = (rho[1] < zero ? OPK_CG_NON_CONVEX : OPK_CG_FINISH); return; } if (rho[0] > zero) { beta = rho[1]/rho[0]; for (i = 0; i < n; ++i) { p[i] = z[i] + beta*p[i]; } } else { beta = zero; COPY(n, z, p); } rho[3] = beta; /* Request caller to compute: q = A.p */ *state = OPK_CG_AP; return; case OPK_CG_AP: if (rho[1] > zero) { /* Caller has been requested to compute q = A.p. Compute optimal step size and update the variables and the residuals. */ pq = DOT(n, p, q); if (pq > zero) { alpha = rho[1]/pq; /* optimal step size */ rho[2] = alpha; /* memorize optimal step size */ if (alpha == zero) { /* If alpha too small, then algorithm has converged. */ *state = OPK_CG_FINISH; return; } sum = zero; for (i = 0; i < n; ++i) { tmp = x[i] + alpha*p[i]; sum += tmp*tmp; } xn = SQRT(sum); if (xn <= delta) { /* Optimal step not too long, take it. */ AXPY(n, alpha, p, x); AXPY(n, -alpha, q, r); rho[0] = rho[1]; rho[4] = xn; *state = (xn < delta ? OPK_CG_NEWX : OPK_CG_TRUNCATED); return; } } /* Operator A is not positive definite (P'.A.P < 0) or optimal step leads us ouside the trust region. In these cases, we take a truncated step X + ALPHA*P along P so that |X + ALPHA*P| = DELTA with ALPHA >= 0. This amounts to find the positive root of: A*ALPHA^2 + 2*B*ALPHA + C with: A = |P|^2, B = X'.P, and C = |X|^2 - DELTA^2. Note that the reduced discriminant is D = B*B - A*C which expands to D = (DELTA^2 - |X|^2*sin(THETA)^2)*|P|^2 with THETA the angle between X and P, as DELTA > |X|, then D > 0 must hold. */ a = DOT(n, p, p); if (a <= zero) { /* This can only occurs if P = 0 (and thus A = 0). It is probably due to rounding errors and the algorithm should be restarted. */ *state = OPK_CG_FINISH; return; } b = DOT(n, x, p); c = (rho[4] + delta)*(rho[4] - delta); /* RHO[4] = |X| */ if (c >= zero) { /* This can only occurs if the caller has modified DELTA or RHO[4], which stores the norm of X, and thus is considered as an error. */ *state = OPK_CG_ERROR; return; } /* Normalize the coefficients to avoid overflows. */ d = FABS(a); if ((e = FABS(b)) > d) d = e; if ((e = FABS(c)) > d) d = e; d = one/d; a *= d; b *= d; c *= d; /* Compute the reduced discriminant. */ d = b*b - a*c; if (d > zero) { /* The polynomial has two real roots of opposite signs (because C/A < 0 by construction), ALPHA is the positive one. Compute this root avoiding numerical errors (by adding numbers of same sign). */ e = SQRT(d); if (b >= zero) { alpha = -c/(e + b); } else { alpha = (e - b)/a; } } else { /* D > 0 must hold. There must be something wrong... */ *state = OPK_CG_ERROR; return; } if (alpha > zero) { AXPY(n, alpha, p, x); AXPY(n, -alpha, q, r); } rho[0] = rho[1]; rho[2] = alpha; /* memorize optimal step size */ rho[4] = delta; *state = OPK_CG_TRUNCATED; return; } else { /* Caller has been requested to compute q = A.x0 Update the residuals. */ for (i = 0; i < n; ++i) { r[i] -= q[i]; } rho[2] = zero; /* ALPHA = 0 (no step has been taken yet) */ rho[3] = zero; /* BETA = 0 (ditto) */ } /* Variables X and residuals R available for inspection. */ *state = OPK_CG_NEWX; return; case OPK_CG_FINISH: case OPK_CG_NON_CONVEX: case OPK_CG_TRUNCATED: /* The caller can restart the algorithm with final 'x', 'state' set to OPK_CG_RESTART and 'r' set to 'b' and, maybe, a larger 'delta'. */ return; default: /* There must be something wrong... */ *state = OPK_CG_ERROR; return; } }
void FluxTrace<D>::T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); const HDivFiniteElement<D> & fel_q = // q space dynamic_cast<const HDivFiniteElement<D>& > (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange rq = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofq = rq.Size(); int ndofe = re.Size(); FlatMatrix<SCAL> submat(ndofe,ndofq,lh); FlatMatrixFixWidth<D> shapeq(ndofq,lh); // q-basis (vec) values FlatVector<> shapee(ndofe,lh); // e-basis basis ELEMENT_TYPE eltype // get the type of element: = fel_q.ElementType(); // ET_TRIG in 2d, ET_TET in 3d. // transform facet integration points to volume integration points Facet2ElementTrafo transform(eltype); int nfa = ElementTopology::GetNFacets(eltype); /* nfa = number of facets of an element */ submat = 0.0; for(int k = 0; k<nfa; k++) { // type of geometry of k-th facet ELEMENT_TYPE eltype_facet = ElementTopology::GetFacetType(eltype, k); const IntegrationRule & facet_ir = SelectIntegrationRule (eltype_facet, fel_q.Order()+fel_e.Order()); // reference element normal vector FlatVec<D> normal_ref = ElementTopology::GetNormals(eltype) [k]; for (int l = 0; l < facet_ir.GetNIP(); l++) { // map 1D facet points to volume integration points IntegrationPoint volume_ip = transform(k, facet_ir[l]); MappedIntegrationPoint<D,D> mip (volume_ip, eltrans); // compute normal on physcial element Mat<D> inv_jac = mip.GetJacobianInverse(); double det = mip.GetJacobiDet(); Vec<D> normal = fabs(det) * Trans(inv_jac) * normal_ref; double len = L2Norm(normal); normal /= len; double weight = facet_ir[l].Weight()*len; // mapped H(div) basis fn values and DG fn (no need to map) values fel_q.CalcMappedShape(mip,shapeq); fel_e.CalcShape(volume_ip,shapee); // evaluate coefficient SCAL dd = coeff_d -> T_Evaluate<SCAL>(mip); // [ndofe x 1] [ndofq x D] * [D x 1] submat += (dd*weight) * shapee * Trans( shapeq * normal ) ; } } elmat.Rows(re).Cols(rq) += submat; elmat.Rows(rq).Cols(re) += Conj(Trans(submat)); }
void nuclear_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, INT N, DOUBLE *sv, \ DOUBLE *svecsmall, DOUBLE *sveclarge, DOUBLE *work, INT lwork) { INT MINMN = IMIN(M, N); INT MAXMN = IMAX(M, N); INT svFlag = 0; if (sv == NULL) { sv = (DOUBLE *) malloc(MINMN * 1 * sizeof(DOUBLE)); svFlag = 1; } INT svecsmallFlag = 0; if (svecsmall == NULL) { svecsmall = (DOUBLE *) malloc(MINMN * MINMN * sizeof(DOUBLE)); svecsmallFlag = 1; } INT sveclargeFlag = 0; if (sveclarge == NULL) { sveclarge = (DOUBLE *) malloc(MAXMN * MINMN * sizeof(DOUBLE)); sveclargeFlag = 1; } CHAR jobu = 'S'; CHAR jobvt = 'S'; DOUBLE *u; DOUBLE *vt; if (MAXMN == M) { u = sveclarge; vt = svecsmall; } else { u = svecsmall; vt = sveclarge; } INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MINMN; INT info; if (lwork == -1) { GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info); if (svFlag == 1) { free(sv); } if (svecsmallFlag == 1) { free(svecsmall); } if (sveclargeFlag == 1) { free(sveclarge); } return; } INT workFlag = 0; if (lwork == 0) { DOUBLE workTemp; lwork = -1; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, &workTemp, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } lwork = (INT) workTemp; work = (DOUBLE *) malloc(lwork * 1 * sizeof(DOUBLE)); workFlag = 1; } GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, X, &GESVDLDA, sv, u, &GESVDLDU, vt, &GESVDLDVT, work, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } INT iterMN; DOUBLE normtemp = 0; for (iterMN = 0; iterMN < rank; ++iterMN) { normtemp += sv[iterMN]; } if (norm != NULL) { *norm = normtemp; } for (iterMN = rank; iterMN < MINMN; ++iterMN) { sv[iterMN] = 0; } /* * TODO: Only multiply for singular vectors corresponding to non-zero singular values. */ if (MAXMN == M) { INT SCALN = M; INT incx = 1; for (iterMN = 0; iterMN < MINMN; ++iterMN) { SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MINMN; DOUBLE alpha = 1; INT GEMMLDA = M; INT GEMMLDB = MINMN; DOUBLE beta = 0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC); } else { INT SCALN = M; INT incx = 1; for (iterMN = 0; iterMN < MINMN; ++iterMN) { SCAL(&SCALN, &sv[iterMN], &u[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MINMN; DOUBLE alpha = 1; INT GEMMLDA = M; INT GEMMLDB = MINMN; DOUBLE beta = 0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, u, &GEMMLDA, vt, &GEMMLDB, &beta, X, &GEMMLDC); } if (svFlag == 1) { free(sv); } if (svecsmallFlag == 1) { free(svecsmall); } if (sveclargeFlag == 1) { free(sveclarge); } if (workFlag == 1) { free(work); } }
void TraceTrace<D>::T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); const ScalarFiniteElement<D> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0.0); IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofe = re.Size(); int ndofu = ru.Size(); FlatVector<> shapee(ndofe,lh); FlatVector<> shapeu(ndofu,lh); FlatMatrix<SCAL> submat(ndofe,ndofu, lh); submat = SCAL(0.0); ELEMENT_TYPE eltype = fel_u.ElementType(); Facet2ElementTrafo transform(eltype); int nfa = ElementTopology :: GetNFacets(eltype); for(int k = 0; k<nfa; k++) { // type of geometry of k-th facet ELEMENT_TYPE eltype_facet = ElementTopology::GetFacetType(eltype, k); const IntegrationRule & facet_ir = SelectIntegrationRule (eltype_facet, fel_u.Order()+fel_e.Order()); // reference element normal vector FlatVec<D> normal_ref = ElementTopology::GetNormals(eltype) [k]; for (int l = 0; l < facet_ir.GetNIP(); l++) { // map 1D facet points to volume integration points IntegrationPoint volume_ip = transform(k, facet_ir[l]); MappedIntegrationPoint<D,D> mip (volume_ip, eltrans); // compute normal on physcial element Mat<D> inv_jac = mip.GetJacobianInverse(); double det = mip.GetJacobiDet(); Vec<D> normal = fabs(det) * Trans(inv_jac) * normal_ref; double len = L2Norm(normal); normal /= len; double weight = facet_ir[l].Weight()*len; fel_e.CalcShape(volume_ip,shapee); fel_u.CalcShape(volume_ip,shapeu); SCAL cc = coeff_c -> T_Evaluate<SCAL>(mip); // [ndofe x 1] [1 x ndofu] submat += (cc*weight) * shapee * Trans(shapeu); } } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
void nuclear_psd_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, DOUBLE *eigv, \ DOUBLE *eigvec, DOUBLE *work, INT lwork) { CHAR jobz = 'V'; CHAR uplo = 'U'; INT SYEVN = M; INT SYEVLDA = M; INT info; if (lwork == - 1) { SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info); return; } INT eigvFlag = 0; if (eigv == NULL) { eigv = (DOUBLE *) MALLOC(M * 1 * sizeof(DOUBLE)); eigvFlag = 1; } INT eigvecFlag = 0; if (eigvec == NULL) { eigvec = (DOUBLE *) MALLOC(M * M * sizeof(DOUBLE)); eigvecFlag = 1; } datacpy(eigvec, X, M * M); INT workFlag = 0; if (lwork == 0) { DOUBLE workTemp; lwork = -1; SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, &workTemp, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } lwork = (INT) workTemp; work = (DOUBLE *) MALLOC(lwork * 1 * sizeof(DOUBLE)); workFlag = 1; } // TODO: Perhaps replace with SYEVR? SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } INT iterM; DOUBLE normtemp = 0; DOUBLE alpha; INT SCALN = M; INT incx = 1; for (iterM = 0; iterM < M; ++iterM) { if ((eigv[iterM] < 0) || (iterM < M - rank)){ eigv[iterM] = 0; } else { normtemp += eigv[iterM]; alpha = SQRT(eigv[iterM]); SCAL(&SCALN, &alpha, &eigvec[iterM * M], &incx); } } if (norm != NULL) { *norm = normtemp; } uplo = 'U'; CHAR trans = 'N'; INT SYRKN = M; INT SYRKK = rank; alpha = 1; INT SYRKLDA = M; DOUBLE beta = 0; INT SYRKLDC = M; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, &eigvec[(M - rank) * M], &SYRKLDA, &beta, X, &SYRKLDC); /* NOTE: alternative 1, somewhat slower than version above. INT iterM; DOUBLE normtemp = 0; memset((void *) X, 0, M * M * sizeof(DOUBLE)); uplo = 'U'; INT SYRN = M; DOUBLE alpha; INT SYRLDA = M; INT incx = 1; for (iterM = 0; iterM < M; ++iterM) { eigv[iterM] = eigv[iterM] - tau; if (eigv[iterM] < 0) { eigv[iterM] = 0; } else { normtemp += eigv[iterM]; alpha = eigv[iterM]; SYR(&uplo, &SYRN, &alpha, &eigvec[iterM * M], &incx, X, &SYRLDA); } } *norm = normtemp; */ INT iterN; for (iterM = 0; iterM < M; ++iterM) { for (iterN = iterM + 1; iterN < M; ++iterN) { X[iterM * M + iterN] = X[iterN * M + iterM]; } } if (eigvFlag == 1) { FREE(eigv); } if (eigvecFlag == 1) { FREE(eigvec); } if (workFlag == 1) { FREE(work); } }
//////////////////////////////////////////////////////////////////////////////// /// @brief Application main function. //////////////////////////////////////////////////////////////////////////////// void main(void) { // Initializations SET_MAIN_CLOCK_SOURCE(CRYSTAL); SET_MAIN_CLOCK_SPEED(MHZ_26); CLKCON = (CLKCON & 0xC7); init_peripherals(); P0 &= ~0x40; // Pulse the Codec Reset line (high to low, low to high) P0 |= 0x40; init_codec(); // Initilize the Codec INT_SETFLAG(INUM_DMA, INT_CLR); // clear the DMA interrupt flag I2SCFG0 |= 0x01; // Enable the I2S interface DMA_SET_ADDR_DESC0(&DmaDesc0); // Set up DMA configuration table for channel 0 DMA_SET_ADDR_DESC1234(&DmaDesc1_4[0]); // Set up DMA configuration table for channels 1 - 4 dmaMemtoMem(AF_BUF_SIZE); // Set up DMA Channel 0 for memmory to memory data transfers initRf(); // Set radio base frequency and reserve DMA channels 1 and 2 for RX/TX buffers dmaAudio(); // Set up DMA channels 3 and 4 for the Audio In/Out buffers DMAIRQ = 0; DMA_ARM_CHANNEL(4); // Arm DMA channel 4 macTimer3Init(); INT_ENABLE(INUM_T1, INT_ON); // Enable Timer 1 interrupts INT_ENABLE(INUM_DMA, INT_ON); // Enable DMA interrupts INT_GLOBAL_ENABLE(INT_ON); // Enable Global interrupts MAStxData.macPayloadLen = TX_PAYLOAD_LEN; MAStxData.macField = MAC_ADDR; while (1) { // main program loop setChannel(channel[band][ActiveChIdx]); // SetChannel will set the MARCSTATE to IDLE ActiveChIdx = (ActiveChIdx + 1) & 0x03; SCAL(); // Start PLL calibration at new channel if ((P1 & 0x08) != aux_option_status) { // if the 'SEL AUX IN' option bit has changed state if ((P1 & 0x08) == 0) { // SEL AUX IN has changed state to true I2Cwrite(MIC1LP_LEFTADC, 0xFC); // Disconnect MIC1LP/M from the Left ADC, Leave Left DAC enabled I2Cwrite(MIC2L_MIC2R_LEFTADC, 0x2F); // Connect AUX In (MIC2L) to Left ADC I2Cwrite(LEFT_ADC_PGA_GAIN, 0x00); // Set PGA gain to 0 dB aux_option_status &= ~0x08; } else { // SEL AUX IN has changed state to false I2Cwrite(MIC2L_MIC2R_LEFTADC, 0xFF); // Disconnect AUX In (MIC2L) from Left ADC I2Cwrite(MIC1LP_LEFTADC, 0x84); // Connect the internal microphone to the Left ADC using differential inputs (gain = 0 dB); Power Up the Left ADC I2Cwrite(LEFT_ADC_PGA_GAIN, 0x3C); // Enable PGA and set gain to 30 dB aux_option_status |= 0x08; } } if ((P1 & 0x04) != agc_option_status) { // if the 'ENA AGC' option bit has changed state if ((P1 & 0x04) == 0) { // ENA AGC has changed state to true I2Cwrite(LEFT_AGC_CNTRL_A, 0x90); // Left AGC Control Register A - Enable, set target level to -8 dB I2Cwrite(LEFT_AGC_CNTRL_B, 0xC8); // Left AGC Control Register B - Set maximum gain to to 50 dB I2Cwrite(LEFT_AGC_CNTRL_C, 0x00); // Left AGC Control Register C - Disable Silence Detection agc_option_status &= ~0x04; } else { // SEL AUX IN has changed state to false I2Cwrite(LEFT_AGC_CNTRL_A, 0x10); // Left AGC Control Register A - Disable agc_option_status |= 0x04; } } // Check the band selection bits band = 2; // if the switch is not in position 1 or 2, in must be in position 3 if ((P1 & 0x10) == 0) // check if switch is in position 1 band = 0; else if ((P0 & 0x04) == 0) // check if switch is in position 2 band = 1; // Now wait for the "audio frame ready" signal while (audioFrameReady == FALSE); // Wait until an audioframe is ready to be transmitted audioFrameReady = FALSE; // Reset the flag // Move data from the CODEC (audioOut) buffer to the TX buffer using DMA Channel 0 SET_WORD(DmaDesc0.SRCADDRH, DmaDesc0.SRCADDRL, audioOut[activeOut]); SET_WORD(DmaDesc0.DESTADDRH, DmaDesc0.DESTADDRL, MAStxData.payload); DmaDesc0.SRCINC = SRCINC_1; // Increment Source address DMAARM |= DMA_CHANNEL_0; DMAREQ |= DMA_CHANNEL_0; // Enable memory-to-memory transfer using DMA channel 0 while ((DMAARM & DMA_CHANNEL_0) > 0); // Wait for transfer to complete while (MARCSTATE != 0x01); // Wait for calibration to complete P2 |= 0x08; // Debug - Set P2_3 (TP2) rfSendPacket(MASTER_TX_TIMEOUT_WO_CALIB); P2 &= ~0x08; // Debug - Reset P2_3 (TP2) } // end of 'while (1)' loop }
void RobinVolume<D> :: T_CalcElementMatrix (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatMatrix<SCAL> elmat, LocalHeap & lh) const { ELEMENT_TYPE eltype = base_fel.ElementType(); const CompoundFiniteElement & cfel // product space = dynamic_cast<const CompoundFiniteElement&> (base_fel); // note how we do NOT refer to D-1 elements here: const ScalarFiniteElement<D> & fel_u = // u space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd1()]); const ScalarFiniteElement<D> & fel_e = // e space dynamic_cast<const ScalarFiniteElement<D>&> (cfel[GetInd2()]); elmat = SCAL(0); IntRange ru = cfel.GetRange(GetInd1()); IntRange re = cfel.GetRange(GetInd2()); int ndofe = re.Size(); int ndofu = ru.Size(); FlatVector<> ushape(fel_u.GetNDof(), lh); FlatVector<> eshape(fel_e.GetNDof(), lh); FlatMatrix<SCAL> submat(ndofe,ndofu,lh); submat = SCAL(0); int nfacet = ElementTopology::GetNFacets(eltype); Facet2ElementTrafo transform(eltype); FlatVector< Vec<D> > normals = ElementTopology::GetNormals<D>(eltype); const MeshAccess & ma = *(const MeshAccess*)eltrans.GetMesh(); Array<int> fnums, sels; ma.GetElFacets (eltrans.GetElementNr(), fnums); for (int k = 0; k < nfacet; k++) { ma.GetFacetSurfaceElements (fnums[k], sels); // if interior element, then do nothing: if (sels.Size() == 0) continue; // else: Vec<D> normal_ref = normals[k]; ELEMENT_TYPE etfacet=ElementTopology::GetFacetType(eltype, k); IntegrationRule ir_facet(etfacet, fel_e.Order()+fel_u.Order()); // map the facet integration points to volume reference elt ipts IntegrationRule & ir_facet_vol = transform(k, ir_facet, lh); // ... and further to the physical element MappedIntegrationRule<D,D> mir(ir_facet_vol, eltrans, lh); for (int i = 0 ; i < ir_facet_vol.GetNIP(); i++) { SCAL val = coeff_c->T_Evaluate<SCAL> (mir[i]); // this is contrived to get the surface measure in "len" Mat<D> inv_jac = mir[i].GetJacobianInverse(); double det = mir[i].GetMeasure(); Vec<D> normal = det * Trans (inv_jac) * normal_ref; double len = L2Norm (normal); val *= len * ir_facet[i].Weight(); fel_u.CalcShape (ir_facet_vol[i], ushape); fel_e.CalcShape (ir_facet_vol[i], eshape); submat += val * eshape * Trans(ushape); } } elmat.Rows(re).Cols(ru) += submat; if (GetInd1() != GetInd2()) elmat.Rows(ru).Cols(re) += Conj(Trans(submat)); }
inline SCAL T_Evaluate (const BaseMappedIntegrationPoint & ip) const { return SCAL (Evaluate (ip)); // used by PML : AutoDiff<complex> }
void NeumannVolume<D> :: T_CalcElementVector (const FiniteElement & base_fel, const ElementTransformation & eltrans, FlatVector<SCAL> elvec, LocalHeap & lh) const { const CompoundFiniteElement & cfel = dynamic_cast<const CompoundFiniteElement&> (base_fel); const ScalarFiniteElement<D> & fel = dynamic_cast<const ScalarFiniteElement<D>&> (cfel[indx]); FlatVector<> ushape(fel.GetNDof(), lh); elvec = SCAL(0); IntRange re = cfel.GetRange(indx); int ndofe = re.Size(); FlatVector<SCAL> subvec(ndofe,lh); subvec = SCAL(0); const IntegrationRule ir(fel.ElementType(), 2*fel.Order()); ELEMENT_TYPE eltype = base_fel.ElementType(); int nfacet = ElementTopology::GetNFacets(eltype); Facet2ElementTrafo transform(eltype); FlatVector< Vec<D> > normals = ElementTopology::GetNormals<D>(eltype); const MeshAccess & ma = *(const MeshAccess*)eltrans.GetMesh(); Array<int> fnums, sels; ma.GetElFacets (eltrans.GetElementNr(), fnums); for (int k = 0; k < nfacet; k++) { ma.GetFacetSurfaceElements (fnums[k], sels); // if interior element, then do nothing: if (sels.Size() == 0) continue; // else: Vec<D> normal_ref = normals[k]; ELEMENT_TYPE etfacet = ElementTopology::GetFacetType (eltype, k); IntegrationRule ir_facet(etfacet, 2*fel.Order()); // map the facet integration points to volume reference elt ipts IntegrationRule & ir_facet_vol = transform(k, ir_facet, lh); // ... and further to the physical element MappedIntegrationRule<D,D> mir(ir_facet_vol, eltrans, lh); for (int i = 0 ; i < ir_facet_vol.GetNIP(); i++) { SCAL G[3] ; G[0] = coeff_Gx -> T_Evaluate<SCAL>(mir[i]); G[1] = coeff_Gy -> T_Evaluate<SCAL>(mir[i]); if (D==3) G[2] = coeff_Gz -> T_Evaluate<SCAL>(mir[i]); FlatVector<SCAL> Gval(D,lh); for (int dd=0; dd<D; dd++) Gval[dd] = G[dd]; SCAL g = coeff_g -> T_Evaluate<SCAL>(mir[i]); // this is contrived to get the surface measure in "len" Mat<D> inv_jac = mir[i].GetJacobianInverse(); double det = mir[i].GetMeasure(); Vec<D> normal = det * Trans (inv_jac) * normal_ref; double len = L2Norm (normal); SCAL gg = (InnerProduct(Gval,normal) + g*len) * ir_facet[i].Weight(); fel.CalcShape (ir_facet_vol[i], ushape); subvec += gg * ushape; } } elvec.Rows(re) += subvec; }
// ************************************************************************************************* // Sets the radio hardware to the required initial state. // ************************************************************************************************* void rftest_radio_init(void) { u8 FSCAL3_Register_u8; SYNC1 = RFTEST_SYNC_WORD >> 8; /* Sync word, high byte */ SYNC0 = (u8) RFTEST_SYNC_WORD; /* Sync word, low byte */ PKTLEN = RFTEST_PACKET_LENGTH; /* Packet length */ PKTCTRL1 = 0x00; /* Packet automation control */ PKTCTRL0 = 0x00; /* Packet automation control */ ADDR = 0x00; /* Device address */ CHANNR = 0x00; /* Channel number */ FSCTRL1 = 0x12; /* Frequency synthesizer control */ FSCTRL0 = 0x00; /* Frequency synthesizer control */ #ifdef ISM_EU FREQ2 = 0x24; /* Frequency control word, high byte */ FREQ1 = 0x2D; /* Frequency control word, middle byte */ FREQ0 = 0x55; /* Frequency control word, low byte */ MDMCFG4 = 0x3D; /* Modem configuration */ MDMCFG3 = 0x55; /* Modem configuration */ MDMCFG2 = 0x15; /* Modem configuration */ MDMCFG1 = 0x12; /* Modem configuration */ MDMCFG0 = 0x11; /* Modem configuration */ #else #ifdef ISM_US FREQ2 = 0x26; /* Frequency control word, high byte */ FREQ1 = 0x19; /* Frequency control word, middle byte */ FREQ0 = 0x11; /* Frequency control word, low byte */ MDMCFG4 = 0x3D; /* Modem configuration */ MDMCFG3 = 0x55; /* Modem configuration */ MDMCFG2 = 0x15; /* Modem configuration */ MDMCFG1 = 0x12; /* Modem configuration */ MDMCFG0 = 0x11; /* Modem configuration */ #else #ifdef ISM_LF FREQ2 = 0x12; /* Frequency control word, high byte */ FREQ1 = 0x0A; /* Frequency control word, middle byte */ FREQ0 = 0xAA; /* Frequency control word, low byte */ MDMCFG4 = 0x3D; /* Modem configuration */ MDMCFG3 = 0x55; /* Modem configuration */ MDMCFG2 = 0x15; /* Modem configuration */ MDMCFG1 = 0x12; /* Modem configuration */ MDMCFG0 = 0x11; /* Modem configuration */ #else #error "No ISM band specified" #endif #endif #endif DEVIATN = 0x60; /* Modem deviation setting */ MCSM2 = 0x07; /* Main Radio Control State Machine configuration */ MCSM1 = 0x02; /* Main Radio Control State Machine configuration */ MCSM0 = 0x18; /* Main Radio Control State Machine configuration */ FOCCFG = 0x1D; /* Frequency Offset Compensation configuration */ BSCFG = 0x1C; /* Bit Synchronization configuration */ AGCCTRL2 = 0xC7; /* AGC control */ AGCCTRL1 = 0x10; /* AGC control */ AGCCTRL0 = 0xB2; /* AGC control */ FREND1 = 0xB6; /* Front end RX configuration */ FREND0 = 0x10; /* Front end TX configuration */ FSCAL3 = 0xEA; /* Frequency synthesizer calibration */ FSCAL2 = 0x2A; /* Frequency synthesizer calibration */ FSCAL1 = 0x00; /* Frequency synthesizer calibration */ FSCAL0 = 0x1F; /* Frequency synthesizer calibration */ IOCFG2 = 0x00; /* Radio Test Signal Configuration (P1_7) */ IOCFG1 = 0x00; /* Radio Test Signal Configuration (P1_6) */ IOCFG0 = 0x00; /* Radio Test Signal Configuration (P1_5) */ TEST1 = 0x31; // Read FSCAL3 register, set bits enabling charge pump calibration and write register again FSCAL3_Register_u8 = FSCAL3 | 0x20; FSCAL3 = FSCAL3_Register_u8; // Set output power PA_TABLE0 = RFTEST_OUTPUT_POWER; // Start calibration manually SIDLE(); SCAL(); // Wait until calibration completed while(MARCSTATE != 0x01); FSCAL3 &= ~0x30; // Enter powerdown mode SIDLE(); }
// TODO: Change so that adaptive depending on M and N // TODO: Check what standard I use for copy of original data, and get rid of it // if not necessary. void nuclear_approx_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *X, DOUBLE *rp, \ INT M, INT N, INT derivFlag, DOUBLE *svdVec, DOUBLE *vtMat, \ DOUBLE *dataBuffer, DOUBLE *derivVec, DOUBLE *work, INT lwork) { INT MN = IMIN(M, N); if (lwork == -1) { CHAR jobu; CHAR jobvt; if (derivFlag == 1) { jobu = 'O'; jobvt = 'S'; } else { jobu = 'N'; jobvt = 'N'; } INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MN; INT INFO; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } return; } INT svdVecFlag = 0; if (svdVec == NULL) { svdVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE)); svdVecFlag = 1; } INT derivVecFlag = 0; if ((derivVec == NULL) && (derivFlag == 1)) { derivVec = (DOUBLE *) MALLOC(1 * MN * sizeof(DOUBLE)); derivVecFlag = 1; } INT vtMatFlag = 0; if ((vtMat == NULL) && (derivFlag == 1)) { vtMat = (DOUBLE *) MALLOC(MN * N * sizeof(DOUBLE)); vtMatFlag = 1; } INT dataBufferFlag = 0; if (dataBuffer == NULL) { dataBuffer = (DOUBLE *) MALLOC(M * N * sizeof(DOUBLE)); dataBufferFlag = 1; } INT workFlag = 0; if (work == NULL) { workFlag = 1; } CHAR jobu; CHAR jobvt; if (derivFlag == 1) { jobu = 'O'; jobvt = 'S'; } else { jobu = 'N'; jobvt = 'N'; } datacpy(dataBuffer, X, M * N); INT GESVDM = M; INT GESVDN = N; INT GESVDLDA = M; INT GESVDLDU = M; INT GESVDLDVT = MN; INT INFO; if (workFlag == 1) { lwork = -1; DOUBLE work_temp; GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, &work_temp, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } lwork = (INT) work_temp; work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE)); } GESVD(&jobu, &jobvt, &GESVDM, &GESVDN, dataBuffer, &GESVDLDA, svdVec, NULL, &GESVDLDU, vtMat, &GESVDLDVT, work, &lwork, &INFO); if (INFO != 0) { PRINTF("Error, INFO = %d. ", INFO); ERROR("LAPACK error."); } abs_smooth_obj_grad(svdVec, derivVec, svdVec, rp, MN, derivFlag); INT ASUMN = MN; INT incx = 1; *obj = ASUM(&ASUMN, svdVec, &incx); if (derivFlag == 1) { INT iterMN; INT SCALN = M; INT incx = 1; DOUBLE alpha; for (iterMN = 0; iterMN < MN; ++iterMN) { alpha = derivVec[iterMN]; SCAL(&SCALN, &alpha, &dataBuffer[iterMN * M], &incx); } CHAR transa = 'N'; CHAR transb = 'N'; INT GEMMM = M; INT GEMMN = N; INT GEMMK = MN; alpha = 1.0; INT GEMMLDA = M; INT GEMMLDB = MN; DOUBLE beta = 0.0; INT GEMMLDC = M; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, dataBuffer, &GEMMLDA, vtMat, &GEMMLDB, &beta, deriv, &GEMMLDC); } if (svdVecFlag == 1) { FREE(svdVec); } if (derivVecFlag == 1) { FREE(derivVec); } if (vtMatFlag == 1) { FREE(vtMat); } if (dataBufferFlag == 1) { FREE(dataBuffer); } if (workFlag == 1) { FREE(work); } }