#define BYTE_DISP 1 #define WORD_DISP 2 #define UNDEF_BYTE_DISP 0 #define UNDEF_WORD_DISP 3 #define BRANCH 1 #define SCB_F 2 #define SCB_TST 3 #define END 4 #define BYTE_F 127 #define BYTE_B -126 #define WORD_F 32767 #define WORD_B 32768 const relax_typeS md_relax_table[C (END, 0)]; static struct hash_control *opcode_hash_control; /* Opcode mnemonics */ /* This function is called once, at assembler startup time. This should set up all the tables, etc that the MD part of the assembler needs */ void md_begin () { h8500_opcode_info *opcode; char prev_buffer[100]; int idx = 0; register relax_typeS *table;
#define IMM16 atomrimm, &imm16off static struct rbitfield imm32off = { 8, 32 }; #define IMM32 atomrimm, &imm32off static struct bitfield flagoff = { 0, 5 }; #define FLAG atomimm, &flagoff static struct bitfield waitoff = { 0, 2 }; #define WAIT atomimm, &waitoff static struct bitfield waitsoff = { 2, 4, .shr = 1 }; #define WAITS atomimm, &waitsoff static struct bitfield eventoff = { 8, 5 }; #define EVENT atomimm, &eventoff static struct bitfield evaloff = { 16, 1 }; #define EVAL atomimm, &evaloff static struct insn tabevent[] = { { 0x0000, 0xff00, C("FB_PAUSED") }, { 0x0100, 0xff00, C("CRTC0_VBLANK") }, { 0x0200, 0xff00, C("CRTC0_HBLANK") }, { 0x0300, 0xff00, C("CRTC1_VBLANK") }, { 0x0400, 0xff00, C("CRTC1_HBLANK") }, { 0, 0, EVENT }, }; static struct insn tabfl[] = { { 0x00, 0x1f, C("GPIO_2_OUT"), .fmask = F_NV17F }, { 0x01, 0x1f, C("GPIO_2_OE"), .fmask = F_NV17F }, { 0x02, 0x1f, C("GPIO_3_OUT"), .fmask = F_NV17F }, { 0x03, 0x1f, C("GPIO_3_OE"), .fmask = F_NV17F }, { 0x04, 0x1f, C("PRAMDAC0_UNK880_28"), .fmask = F_NV17F }, { 0x05, 0x1f, C("PRAMDAC1_UNK880_28"), .fmask = F_NV17F }, { 0x06, 0x1f, C("PRAMDAC0_UNK880_29"), .fmask = F_NV17F },
long long int lucas(long long int a,long long int b){ if(b==0) return 1; return (lucas(a/mod,b/mod)*C(a%mod,b%mod))%mod; }
int main(int, char**) { { typedef std::tuple<long> T0; typedef std::tuple<long long> T1; T0 t0(2); T1 t1 = t0; assert(std::get<0>(t1) == 2); } #if TEST_STD_VER > 11 { typedef std::tuple<int> T0; typedef std::tuple<A> T1; constexpr T0 t0(2); constexpr T1 t1 = t0; static_assert(std::get<0>(t1) == 2, ""); } { typedef std::tuple<int> T0; typedef std::tuple<C> T1; constexpr T0 t0(2); constexpr T1 t1{t0}; static_assert(std::get<0>(t1) == C(2), ""); } #endif { typedef std::tuple<long, char> T0; typedef std::tuple<long long, int> T1; T0 t0(2, 'a'); T1 t1 = t0; assert(std::get<0>(t1) == 2); assert(std::get<1>(t1) == int('a')); } { typedef std::tuple<long, char, D> T0; typedef std::tuple<long long, int, B> T1; T0 t0(2, 'a', D(3)); T1 t1 = t0; assert(std::get<0>(t1) == 2); assert(std::get<1>(t1) == int('a')); assert(std::get<2>(t1).id_ == 3); } { D d(3); typedef std::tuple<long, char, D&> T0; typedef std::tuple<long long, int, B&> T1; T0 t0(2, 'a', d); T1 t1 = t0; d.id_ = 2; assert(std::get<0>(t1) == 2); assert(std::get<1>(t1) == int('a')); assert(std::get<2>(t1).id_ == 2); } { typedef std::tuple<long, char, int> T0; typedef std::tuple<long long, int, B> T1; T0 t0(2, 'a', 3); T1 t1(t0); assert(std::get<0>(t1) == 2); assert(std::get<1>(t1) == int('a')); assert(std::get<2>(t1).id_ == 3); } { const std::tuple<int> t1(42); std::tuple<Explicit> t2(t1); assert(std::get<0>(t2).value == 42); } { const std::tuple<int> t1(42); std::tuple<Implicit> t2 = t1; assert(std::get<0>(t2).value == 42); } return 0; }
double nestedprod(size_t N, size_t iterations = 1){ typedef boost::numeric::ublas::matrix<value_type, boost::numeric::ublas::row_major> matrix_type; boost::numeric::ublas::matrix<value_type, boost::numeric::ublas::row_major> A(N, N), B(N, N), C(N, N), D(N, N), E(N, N), F(N, N); minit(N, B); minit(N, C); minit(N, D); minit(N, E); minit(N, F); std::vector<double> times; for(size_t i = 0; i < iterations; ++i){ auto start = std::chrono::steady_clock::now(); noalias(A) = prod( B, matrix_type( prod( C, matrix_type( prod( D, matrix_type( prod( E, F) ) ) ) ) ) ); auto end = std::chrono::steady_clock::now(); auto diff = end - start; times.push_back(std::chrono::duration<double, std::milli> (diff).count()); //save time in ms for each iteration } double tmin = *(std::min_element(times.begin(), times.end())); double tavg = average_time(times); // check to see if nothing happened during run to invalidate the times if(variance(tavg, times) > max_variance){ std::cerr << "boost kernel 'nestedprod': Time deviation too large! \n"; } return tavg; }
/* Subroutine */ int slaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, real *abstol, real * reltol, real *pivmin, real *d, real *e, real *e2, integer *nval, real *ab, real *c, integer *mout, integer *nab, real *work, integer *iwork, integer *info) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SLAEBZ contains the iteration loops which compute and use the function N(w), which is the count of eigenvalues of a symmetric tridiagonal matrix T less than or equal to its argument w. It performs a choice of two types of loops: IJOB=1, followed by IJOB=2: It takes as input a list of intervals and returns a list of sufficiently small intervals whose union contains the same eigenvalues as the union of the original intervals. The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. The output interval (AB(j,1),AB(j,2)] will contain eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. IJOB=3: It performs a binary search in each input interval (AB(j,1),AB(j,2)] for a point w(j) such that N(w(j))=NVAL(j), and uses C(j) as the starting point of the search. If such a w(j) is found, then on output AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output (AB(j,1),AB(j,2)] will be a small interval containing the point where N(w) jumps through NVAL(j), unless that point lies outside the initial interval. Note that the intervals are in all cases half-open intervals, i.e., of the form (a,b] , which includes b but not a . To avoid underflow, the matrix should be scaled so that its largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value. To assure the most accurate computation of small eigenvalues, the matrix should be scaled to be not much smaller than that, either. See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal VISMatrix", Report CS41, Computer Science Dept., Stanford University, July 21, 1966 Note: the arguments are, in general, *not* checked for unreasonable values. Arguments ========= IJOB (input) INTEGER Specifies what is to be done: = 1: Compute NAB for the initial intervals. = 2: Perform bisection iteration to find eigenvalues of T. = 3: Perform bisection iteration to invert N(w), i.e., to find a point which has a specified number of eigenvalues of T to its left. Other values will cause SLAEBZ to return with INFO=-1. NITMAX (input) INTEGER The maximum number of "levels" of bisection to be performed, i.e., an interval of width W will not be made smaller than 2^(-NITMAX) * W. If not all intervals have converged after NITMAX iterations, then INFO is set to the number of non-converged intervals. N (input) INTEGER The dimension n of the tridiagonal matrix T. It must be at least 1. MMAX (input) INTEGER The maximum number of intervals. If more than MMAX intervals are generated, then SLAEBZ will quit with INFO=MMAX+1. MINP (input) INTEGER The initial number of intervals. It may not be greater than MMAX. NBMIN (input) INTEGER The smallest number of intervals that should be processed using a vector loop. If zero, then only the scalar loop will be used. ABSTOL (input) REAL The minimum (absolute) width of an interval. When an interval is narrower than ABSTOL, or than RELTOL times the larger (in magnitude) endpoint, then it is considered to be sufficiently small, i.e., converged. This must be at least zero. RELTOL (input) REAL The minimum relative width of an interval. When an interval is narrower than ABSTOL, or than RELTOL times the larger (in magnitude) endpoint, then it is considered to be sufficiently small, i.e., converged. Note: this should always be at least radix*machine epsilon. PIVMIN (input) REAL The minimum absolute value of a "pivot" in the Sturm sequence loop. This *must* be at least max |e(j)**2| * safe_min and at least safe_min, where safe_min is at least the smallest number that can divide one without overflow. D (input) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T. E (input) REAL array, dimension (N) The offdiagonal elements of the tridiagonal matrix T in positions 1 through N-1. E(N) is arbitrary. E2 (input) REAL array, dimension (N) The squares of the offdiagonal elements of the tridiagonal matrix T. E2(N) is ignored. NVAL (input/output) INTEGER array, dimension (MINP) If IJOB=1 or 2, not referenced. If IJOB=3, the desired values of N(w). The elements of NVAL will be reordered to correspond with the intervals in AB. Thus, NVAL(j) on output will not, in general be the same as NVAL(j) on input, but it will correspond with the interval (AB(j,1),AB(j,2)] on output. AB (input/output) REAL array, dimension (MMAX,2) The endpoints of the intervals. AB(j,1) is a(j), the left endpoint of the j-th interval, and AB(j,2) is b(j), the right endpoint of the j-th interval. The input intervals will, in general, be modified, split, and reordered by the calculation. C (input/output) REAL array, dimension (MMAX) If IJOB=1, ignored. If IJOB=2, workspace. If IJOB=3, then on input C(j) should be initialized to the first search point in the binary search. MOUT (output) INTEGER If IJOB=1, the number of eigenvalues in the intervals. If IJOB=2 or 3, the number of intervals output. If IJOB=3, MOUT will equal MINP. NAB (input/output) INTEGER array, dimension (MMAX,2) If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). If IJOB=2, then on input, NAB(i,j) should be set. It must satisfy the condition: N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), which means that in interval i only eigenvalues NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, NAB(i,j)=N(AB(i,j)), from a previous call to SLAEBZ with IJOB=1. On output, NAB(i,j) will contain max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of the input interval that the output interval (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the the input values of NAB(k,1) and NAB(k,2). If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), unless N(w) > NVAL(i) for all search points w , in which case NAB(i,1) will not be modified, i.e., the output value will be the same as the input value (modulo reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) for all search points w , in which case NAB(i,2) will not be modified. Normally, NAB should be set to some distinctive value(s) before SLAEBZ is called. WORK (workspace) REAL array, dimension (MMAX) Workspace. IWORK (workspace) INTEGER array, dimension (MMAX) Workspace. INFO (output) INTEGER = 0: All intervals converged. = 1--MMAX: The last INFO intervals did not converge. = MMAX+1: More than MMAX intervals were generated. Further Details =============== This routine is intended to be called only by other LAPACK routines, thus the interface is less user-friendly. It is intended for two purposes: (a) finding eigenvalues. In this case, SLAEBZ should have one or more initial intervals set up in AB, and SLAEBZ should be called with IJOB=1. This sets up NAB, and also counts the eigenvalues. Intervals with no eigenvalues would usually be thrown out at this point. Also, if not all the eigenvalues in an interval i are desired, NAB(i,1) can be increased or NAB(i,2) decreased. For example, set NAB(i,1)=NAB(i,2)-1 to get the largest eigenvalue. SLAEBZ is then called with IJOB=2 and MMAX no smaller than the value of MOUT returned by the call with IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the tolerance specified by ABSTOL and RELTOL. (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). In this case, start with a Gershgorin interval (a,b). Set up AB to contain 2 search intervals, both initially (a,b). One NVAL element should contain f-1 and the other should contain l , while C should contain a and b, resp. NAB(i,1) should be -1 and NAB(i,2) should be N+1, to flag an error if the desired interval does not lie in (a,b). SLAEBZ is then called with IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and w(l-r)=...=w(l+k) are handled similarly. ===================================================================== Check for Errors Parameter adjustments Function Body */ /* System generated locals */ integer nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1, r__2, r__3, r__4; /* Local variables */ static integer itmp1, itmp2, j, kfnew, klnew, kf, ji, kl, jp, jit; static real tmp1, tmp2; #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define E2(I) e2[(I)-1] #define NVAL(I) nval[(I)-1] #define C(I) c[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define NAB(I,J) nab[(I)-1 + ((J)-1)* ( *mmax)] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *mmax)] *info = 0; if (*ijob < 1 || *ijob > 3) { *info = -1; return 0; } /* Initialize NAB */ if (*ijob == 1) { /* Compute the number of eigenvalues in the initial intervals. */ *mout = 0; i__1 = *minp; for (ji = 1; ji <= *minp; ++ji) { for (jp = 1; jp <= 2; ++jp) { tmp1 = D(1) - AB(ji,jp); if (dabs(tmp1) < *pivmin) { tmp1 = -(doublereal)(*pivmin); } NAB(ji,jp) = 0; if (tmp1 <= 0.f) { NAB(ji,jp) = 1; } i__2 = *n; for (j = 2; j <= *n; ++j) { tmp1 = D(j) - E2(j - 1) / tmp1 - AB(ji,jp); if (dabs(tmp1) < *pivmin) { tmp1 = -(doublereal)(*pivmin); } if (tmp1 <= 0.f) { ++NAB(ji,jp); } /* L10: */ } /* L20: */ } *mout = *mout + NAB(ji,2) - NAB(ji,1); /* L30: */ } return 0; } /* Initialize for loop KF and KL have the following meaning: Intervals 1,...,KF-1 have converged. Intervals KF,...,KL still need to be refined. */ kf = 1; kl = *minp; /* If IJOB=2, initialize C. If IJOB=3, use the user-supplied starting point. */ if (*ijob == 2) { i__1 = *minp; for (ji = 1; ji <= *minp; ++ji) { C(ji) = (AB(ji,1) + AB(ji,2)) * .5f; /* L40: */ } } /* Iteration loop */ i__1 = *nitmax; for (jit = 1; jit <= *nitmax; ++jit) { /* Loop over intervals */ if (kl - kf + 1 >= *nbmin && *nbmin > 0) { /* Begin of Parallel Version of the loop */ i__2 = kl; for (ji = kf; ji <= kl; ++ji) { /* Compute N(c), the number of eigenvalues less t han c */ WORK(ji) = D(1) - C(ji); IWORK(ji) = 0; if (WORK(ji) <= *pivmin) { IWORK(ji) = 1; /* Computing MIN */ r__1 = WORK(ji), r__2 = -(doublereal)(*pivmin); WORK(ji) = dmin(r__1,r__2); } i__3 = *n; for (j = 2; j <= *n; ++j) { WORK(ji) = D(j) - E2(j - 1) / WORK(ji) - C(ji); if (WORK(ji) <= *pivmin) { ++IWORK(ji); /* Computing MIN */ r__1 = WORK(ji), r__2 = -(doublereal)(*pivmin); WORK(ji) = dmin(r__1,r__2); } /* L50: */ } /* L60: */ } if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenv alues. */ klnew = kl; i__2 = kl; for (ji = kf; ji <= kl; ++ji) { /* Insure that N(w) is monotone Computing MIN Computing MAX */ i__5 = NAB(ji,1), i__6 = IWORK(ji); i__3 = NAB(ji,2), i__4 = max(i__5,i__6); IWORK(ji) = min(i__3,i__4); /* Update the Queue -- add intervals if bo th halves contain eigenvalues. */ if (IWORK(ji) == NAB(ji,2)) { /* No eigenvalue in the upper inter val: just use the lower interval. */ AB(ji,2) = C(ji); } else if (IWORK(ji) == NAB(ji,1)) { /* No eigenvalue in the lower inter val: just use the upper interval. */ AB(ji,1) = C(ji); } else { ++klnew; if (klnew <= *mmax) { /* Eigenvalue in both interv als -- add upper to queue. */ AB(klnew,2) = AB(ji,2); NAB(klnew,2) = NAB(ji,2); AB(klnew,1) = C(ji); NAB(klnew,1) = IWORK(ji); AB(ji,2) = C(ji); NAB(ji,2) = IWORK(ji); } else { *info = *mmax + 1; } } /* L70: */ } if (*info != 0) { return 0; } kl = klnew; } else { /* IJOB=3: Binary search. Keep only the interval containing w s.t. N(w) = NVAL */ i__2 = kl; for (ji = kf; ji <= kl; ++ji) { if (IWORK(ji) <= NVAL(ji)) { AB(ji,1) = C(ji); NAB(ji,1) = IWORK(ji); } if (IWORK(ji) >= NVAL(ji)) { AB(ji,2) = C(ji); NAB(ji,2) = IWORK(ji); } /* L80: */ } } } else { /* End of Parallel Version of the loop Begin of Serial Version of the loop */ klnew = kl; i__2 = kl; for (ji = kf; ji <= kl; ++ji) { /* Compute N(w), the number of eigenvalues less t han w */ tmp1 = C(ji); tmp2 = D(1) - tmp1; itmp1 = 0; if (tmp2 <= *pivmin) { itmp1 = 1; /* Computing MIN */ r__1 = tmp2, r__2 = -(doublereal)(*pivmin); tmp2 = dmin(r__1,r__2); } /* A series of compiler directives to defeat vect orization for the next loop $PL$ CMCHAR=' ' DIR$ NEXTSCALAR $DIR SCALAR DIR$ NEXT SCALAR VD$L NOVECTOR DEC$ NOVECTOR VD$ NOVECTOR VDIR NOVECTOR VOCL LOOP,SCALAR IBM PREFER SCALAR $PL$ CMCHAR='*' */ i__3 = *n; for (j = 2; j <= *n; ++j) { tmp2 = D(j) - E2(j - 1) / tmp2 - tmp1; if (tmp2 <= *pivmin) { ++itmp1; /* Computing MIN */ r__1 = tmp2, r__2 = -(doublereal)(*pivmin); tmp2 = dmin(r__1,r__2); } /* L90: */ } if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenvalues. Insure that N(w) is monotone Computing MIN Computing MAX */ i__5 = NAB(ji,1); i__3 = NAB(ji,2), i__4 = max(i__5,itmp1); itmp1 = min(i__3,i__4); /* Update the Queue -- add intervals if bo th halves contain eigenvalues. */ if (itmp1 == NAB(ji,2)) { /* No eigenvalue in the upper inter val: just use the lower interval. */ AB(ji,2) = tmp1; } else if (itmp1 == NAB(ji,1)) { /* No eigenvalue in the lower inter val: just use the upper interval. */ AB(ji,1) = tmp1; } else if (klnew < *mmax) { /* Eigenvalue in both intervals -- add upper to queue. */ ++klnew; AB(klnew,2) = AB(ji,2); NAB(klnew,2) = NAB(ji,2); AB(klnew,1) = tmp1; NAB(klnew,1) = itmp1; AB(ji,2) = tmp1; NAB(ji,2) = itmp1; } else { *info = *mmax + 1; return 0; } } else { /* IJOB=3: Binary search. Keep only the i nterval containing w s.t. N(w) = NVAL */ if (itmp1 <= NVAL(ji)) { AB(ji,1) = tmp1; NAB(ji,1) = itmp1; } if (itmp1 >= NVAL(ji)) { AB(ji,2) = tmp1; NAB(ji,2) = itmp1; } } /* L100: */ } kl = klnew; /* End of Serial Version of the loop */ } /* Check for convergence */ kfnew = kf; i__2 = kl; for (ji = kf; ji <= kl; ++ji) { tmp1 = (r__1 = AB(ji,2) - AB(ji,1), dabs( r__1)); /* Computing MAX */ r__3 = (r__1 = AB(ji,2), dabs(r__1)), r__4 = (r__2 = AB(ji,1), dabs(r__2)); tmp2 = dmax(r__3,r__4); /* Computing MAX */ r__1 = max(*abstol,*pivmin), r__2 = *reltol * tmp2; if (tmp1 < dmax(r__1,r__2) || NAB(ji,1) >= NAB(ji,2)) { /* Converged -- Swap with position KFNEW, then increment KFNEW */ if (ji > kfnew) { tmp1 = AB(ji,1); tmp2 = AB(ji,2); itmp1 = NAB(ji,1); itmp2 = NAB(ji,2); AB(ji,1) = AB(kfnew,1); AB(ji,2) = AB(kfnew,2); NAB(ji,1) = NAB(kfnew,1); NAB(ji,2) = NAB(kfnew,2); AB(kfnew,1) = tmp1; AB(kfnew,2) = tmp2; NAB(kfnew,1) = itmp1; NAB(kfnew,2) = itmp2; if (*ijob == 3) { itmp1 = NVAL(ji); NVAL(ji) = NVAL(kfnew); NVAL(kfnew) = itmp1; } } ++kfnew; } /* L110: */ } kf = kfnew; /* Choose Midpoints */ i__2 = kl; for (ji = kf; ji <= kl; ++ji) { C(ji) = (AB(ji,1) + AB(ji,2)) * .5f; /* L120: */ } /* If no more intervals to refine, quit. */ if (kf > kl) { goto L140; } /* L130: */ } /* Converged */ L140: /* Computing MAX */ i__1 = kl + 1 - kf; *info = max(i__1,0); *mout = kl; return 0; /* End of SLAEBZ */ } /* slaebz_ */
}; A* ap; struct C { }; C* cp; SA (noexcept (dynamic_cast<B*>(ap))); SA (!noexcept (dynamic_cast<B&>(*ap))); SA (!noexcept (typeid (*ap))); SA (noexcept (typeid (*cp))); SA (!noexcept (true ? 1 : throw 1)); SA (!noexcept (true || true ? 1 : throw 1)); SA (noexcept (C())); struct D { D() throw(); }; SA (noexcept (D())); struct E { E() throw(); ~E(); }; SA (noexcept (E()));
int main(int argc, char **argv) { if (argc != 8) { std::cerr << "Not valid number of args" << std::endl; std::cerr<<("Usage: #Rows #Columns #kernel_rows #Kernel_columns Input_Matrix Kernel Output_Matrix \n"); return 1; } int rows=strtol(argv[1],NULL,10); int columns=strtol(argv[2],NULL,10); int krows=strtol(argv[3],NULL,10); int kcolumns=strtol(argv[4],NULL,10); char *inputm=argv[5]; char *kernel=argv[6]; char *outputm=argv[7]; if( krows%2==0 || kcolumns%2==0) { std::cerr<<"Number of kernel rows and columns must be odd"<<std::endl; return 1; } //INPUT MATRIX std::ifstream matrixfile(inputm,std::ios::in); if(!matrixfile) { std::cerr<<"Impossible to read input matrix"<<std::endl; return 1; } std::vector<int> inputmatrix(rows*columns); for(int i = 0; i < rows; ++i){ for(int j = 0; j < columns; ++j){ matrixfile>>inputmatrix[i*columns+j]; } } matrixfile.close(); //KERNEL std::ifstream kernelfile(kernel,std::ios::in); if(!kernelfile) { std::cerr<<"Impossible to read kernel"<<std::endl; return 1; } std::vector<int> kernelmatrix(krows*kcolumns); for(int i = 0; i < krows; ++i){ for(int j = 0; j < kcolumns; ++j){ kernelfile>>kernelmatrix[i*kcolumns+j]; } } kernelfile.close(); std::ofstream outputfile(outputm,std::ios::out); Convolution C(inputmatrix,kernelmatrix, rows, columns, krows, kcolumns); C.Compute(); C.Print_to_File(outputfile); outputfile.close(); return 0; }
void AnisotropicHyperelasticDamageModel<EvalT, Traits>:: computeState(typename Traits::EvalData workset, std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT>>> dep_fields, std::map<std::string, Teuchos::RCP<PHX::MDField<ScalarT>>> eval_fields) { bool print = false; //if (typeid(ScalarT) == typeid(RealType)) print = true; //cout.precision(15); // retrive appropriate field name strings std::string F_string = (*field_name_map_)["F"]; std::string J_string = (*field_name_map_)["J"]; std::string cauchy_string = (*field_name_map_)["Cauchy_Stress"]; std::string matrix_energy_string = (*field_name_map_)["Matrix_Energy"]; std::string f1_energy_string = (*field_name_map_)["F1_Energy"]; std::string f2_energy_string = (*field_name_map_)["F2_Energy"]; std::string matrix_damage_string = (*field_name_map_)["Matrix_Damage"]; std::string f1_damage_string = (*field_name_map_)["F1_Damage"]; std::string f2_damage_string = (*field_name_map_)["F2_Damage"]; // extract dependent MDFields PHX::MDField<ScalarT> def_grad = *dep_fields[F_string]; PHX::MDField<ScalarT> J = *dep_fields[J_string]; PHX::MDField<ScalarT> poissons_ratio = *dep_fields["Poissons Ratio"]; PHX::MDField<ScalarT> elastic_modulus = *dep_fields["Elastic Modulus"]; // extract evaluated MDFields PHX::MDField<ScalarT> stress = *eval_fields[cauchy_string]; PHX::MDField<ScalarT> energy_m = *eval_fields[matrix_energy_string]; PHX::MDField<ScalarT> energy_f1 = *eval_fields[f1_energy_string]; PHX::MDField<ScalarT> energy_f2 = *eval_fields[f2_energy_string]; PHX::MDField<ScalarT> damage_m = *eval_fields[matrix_damage_string]; PHX::MDField<ScalarT> damage_f1 = *eval_fields[f1_damage_string]; PHX::MDField<ScalarT> damage_f2 = *eval_fields[f2_damage_string]; // previous state Albany::MDArray energy_m_old = (*workset.stateArrayPtr)[matrix_energy_string + "_old"]; Albany::MDArray energy_f1_old = (*workset.stateArrayPtr)[f1_energy_string + "_old"]; Albany::MDArray energy_f2_old = (*workset.stateArrayPtr)[f2_energy_string + "_old"]; ScalarT kappa, mu, Jm53, Jm23, p, I4_f1, I4_f2; ScalarT alpha_f1, alpha_f2, alpha_m; // Define some tensors for use Intrepid::Tensor<ScalarT> I(Intrepid::eye<ScalarT>(num_dims_)); Intrepid::Tensor<ScalarT> F(num_dims_), s(num_dims_), b(num_dims_), C( num_dims_); Intrepid::Tensor<ScalarT> sigma_m(num_dims_), sigma_f1(num_dims_), sigma_f2( num_dims_); Intrepid::Tensor<ScalarT> M1dyadM1(num_dims_), M2dyadM2(num_dims_); Intrepid::Tensor<ScalarT> S0_f1(num_dims_), S0_f2(num_dims_); Intrepid::Vector<ScalarT> M1(num_dims_), M2(num_dims_); for (int cell = 0; cell < workset.numCells; ++cell) { for (int pt = 0; pt < num_pts_; ++pt) { // local parameters kappa = elastic_modulus(cell, pt) / (3. * (1. - 2. * poissons_ratio(cell, pt))); mu = elastic_modulus(cell, pt) / (2. * (1. + poissons_ratio(cell, pt))); Jm53 = std::pow(J(cell, pt), -5. / 3.); Jm23 = std::pow(J(cell, pt), -2. / 3.); F.fill(def_grad,cell, pt,0,0); // compute deviatoric stress b = F * Intrepid::transpose(F); s = mu * Jm53 * Intrepid::dev(b); // compute pressure p = 0.5 * kappa * (J(cell, pt) - 1. / (J(cell, pt))); sigma_m = s + p * I; // compute energy for M energy_m(cell, pt) = 0.5 * kappa * (0.5 * (J(cell, pt) * J(cell, pt) - 1.0) - std::log(J(cell, pt))) + 0.5 * mu * (Jm23 * Intrepid::trace(b) - 3.0); // damage term in M alpha_m = energy_m_old(cell, pt); if (energy_m(cell, pt) > alpha_m) alpha_m = energy_m(cell, pt); damage_m(cell, pt) = max_damage_m_ * (1 - std::exp(-alpha_m / saturation_m_)); //-----------compute stress in Fibers // Right Cauchy-Green Tensor C = F^{T} * F C = Intrepid::transpose(F) * F; // Fiber orientation vectors // // fiber 1 for (int i = 0; i < num_dims_; ++i) { M1(i) = direction_f1_[i]; } M1 = M1 / norm(M1); // fiber 2 for (int i = 0; i < num_dims_; ++i) { M2(i) = direction_f2_[i]; } M2 = M2 / norm(M2); // Anisotropic invariants I4 = M_{i} * C * M_{i} I4_f1 = Intrepid::dot(M1, Intrepid::dot(C, M1)); I4_f2 = Intrepid::dot(M2, Intrepid::dot(C, M2)); M1dyadM1 = Intrepid::dyad(M1, M1); M2dyadM2 = Intrepid::dyad(M2, M2); // undamaged stress (2nd PK stress) S0_f1 = (4.0 * k_f1_ * (I4_f1 - 1.0) * std::exp(q_f1_ * (I4_f1 - 1) * (I4_f1 - 1))) * M1dyadM1; S0_f2 = (4.0 * k_f2_ * (I4_f2 - 1.0) * std::exp(q_f2_ * (I4_f2 - 1) * (I4_f2 - 1))) * M2dyadM2; // compute energy for fibers energy_f1(cell, pt) = k_f1_ * (std::exp(q_f1_ * (I4_f1 - 1) * (I4_f1 - 1)) - 1) / q_f1_; energy_f2(cell, pt) = k_f2_ * (std::exp(q_f2_ * (I4_f2 - 1) * (I4_f2 - 1)) - 1) / q_f2_; // Fiber Cauchy stress sigma_f1 = (1.0 / J(cell, pt)) * Intrepid::dot(F, Intrepid::dot(S0_f1, Intrepid::transpose(F))); sigma_f2 = (1.0 / J(cell, pt)) * Intrepid::dot(F, Intrepid::dot(S0_f2, Intrepid::transpose(F))); // maximum thermodynamic forces alpha_f1 = energy_f1_old(cell, pt); alpha_f2 = energy_f2_old(cell, pt); if (energy_f1(cell, pt) > alpha_f1) alpha_f1 = energy_f1(cell, pt); if (energy_f2(cell, pt) > alpha_f2) alpha_f2 = energy_f2(cell, pt); // damage term in fibers damage_f1(cell, pt) = max_damage_f1_ * (1 - std::exp(-alpha_f1 / saturation_f1_)); damage_f2(cell, pt) = max_damage_f2_ * (1 - std::exp(-alpha_f2 / saturation_f2_)); // total Cauchy stress (M, Fibers) for (int i(0); i < num_dims_; ++i) { for (int j(0); j < num_dims_; ++j) { stress(cell, pt, i, j) = volume_fraction_m_ * (1 - damage_m(cell, pt)) * sigma_m(i, j) + volume_fraction_f1_ * (1 - damage_f1(cell, pt)) * sigma_f1(i, j) + volume_fraction_f2_ * (1 - damage_f2(cell, pt)) * sigma_f2(i, j); } } if (print) { std::cout << " matrix damage: " << damage_m(cell, pt) << std::endl; std::cout << " matrix energy: " << energy_m(cell, pt) << std::endl; std::cout << " fiber1 damage: " << damage_f1(cell, pt) << std::endl; std::cout << " fiber1 energy: " << energy_f1(cell, pt) << std::endl; std::cout << " fiber2 damage: " << damage_f2(cell, pt) << std::endl; std::cout << " fiber2 energy: " << energy_f2(cell, pt) << std::endl; } } // pt } // cell }
cmplx w_of_z(cmplx z) { faddeeva_nofterms = 0; // Steven G. Johnson, October 2012. if (creal(z) == 0.0) { // Purely imaginary input, purely real output. // However, use creal(z) to give correct sign of 0 in cimag(w). return C(erfcx(cimag(z)), creal(z)); } if (cimag(z) == 0) { // Purely real input, complex output. return C(exp(-sqr(creal(z))), im_w_of_x(creal(z))); } const double relerr = DBL_EPSILON; const double a = 0.518321480430085929872; // pi / sqrt(-log(eps*0.5)) const double c = 0.329973702884629072537; // (2/pi) * a; const double a2 = 0.268657157075235951582; // a^2 const double x = fabs(creal(z)); const double y = cimag(z); const double ya = fabs(y); cmplx ret = 0.; // return value double sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0, sum5 = 0; int n; if (ya > 7 || (x > 6 // continued fraction is faster /* As pointed out by M. Zaghloul, the continued fraction seems to give a large relative error in Re w(z) for |x| ~ 6 and small |y|, so use algorithm 816 in this region: */ && (ya > 0.1 || (x > 8 && ya > 1e-10) || x > 28))) { faddeeva_algorithm = 100; /* Poppe & Wijers suggest using a number of terms nu = 3 + 1442 / (26*rho + 77) where rho = sqrt((x/x0)^2 + (y/y0)^2) where x0=6.3, y0=4.4. (They only use this expansion for rho >= 1, but rho a little less than 1 seems okay too.) Instead, I did my own fit to a slightly different function that avoids the hypotenuse calculation, using NLopt to minimize the sum of the squares of the errors in nu with the constraint that the estimated nu be >= minimum nu to attain machine precision. I also separate the regions where nu == 2 and nu == 1. */ const double ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) double xs = y < 0 ? -creal(z) : creal(z); // compute for -z if y < 0 if (x + ya > 4000) { // nu <= 2 if (x + ya > 1e7) { // nu == 1, w(z) = i/sqrt(pi) / z // scale to avoid overflow if (x > ya) { faddeeva_algorithm += 1; double yax = ya / xs; faddeeva_algorithm = 100; double denom = ispi / (xs + yax*ya); ret = C(denom*yax, denom); } else if (isinf(ya)) { faddeeva_algorithm += 2; return ((isnan(x) || y < 0) ? C(NaN,NaN) : C(0,0)); } else { faddeeva_algorithm += 3; double xya = xs / ya; double denom = ispi / (xya*xs + ya); ret = C(denom, denom*xya); } } else { // nu == 2, w(z) = i/sqrt(pi) * z / (z*z - 0.5) faddeeva_algorithm += 4; double dr = xs*xs - ya*ya - 0.5, di = 2*xs*ya; double denom = ispi / (dr*dr + di*di); ret = C(denom * (xs*di-ya*dr), denom * (xs*dr+ya*di)); } } else { // compute nu(z) estimate and do general continued fraction faddeeva_algorithm += 5; const double c0=3.9, c1=11.398, c2=0.08254, c3=0.1421, c4=0.2023; // fit double nu = floor(c0 + c1 / (c2*x + c3*ya + c4)); double wr = xs, wi = ya; for (nu = 0.5 * (nu - 1); nu > 0.4; nu -= 0.5) { // w <- z - nu/w: double denom = nu / (wr*wr + wi*wi); wr = xs - wr * denom; wi = ya + wi * denom; } { // w(z) = i/sqrt(pi) / w: double denom = ispi / (wr*wr + wi*wi); ret = C(denom*wi, denom*wr); } } if (y < 0) { faddeeva_algorithm += 10; // use w(z) = 2.0*exp(-z*z) - w(-z), // but be careful of overflow in exp(-z*z) // = exp(-(xs*xs-ya*ya) -2*i*xs*ya) return 2.0*cexp(C((ya-xs)*(xs+ya), 2*xs*y)) - ret; } else return ret; } /* Note: The test that seems to be suggested in the paper is x < sqrt(-log(DBL_MIN)), about 26.6, since otherwise exp(-x^2) underflows to zero and sum1,sum2,sum4 are zero. However, long before this occurs, the sum1,sum2,sum4 contributions are negligible in double precision; I find that this happens for x > about 6, for all y. On the other hand, I find that the case where we compute all of the sums is faster (at least with the precomputed expa2n2 table) until about x=10. Furthermore, if we try to compute all of the sums for x > 20, I find that we sometimes run into numerical problems because underflow/overflow problems start to appear in the various coefficients of the sums, below. Therefore, we use x < 10 here. */ else if (x < 10) { faddeeva_algorithm = 200; double prod2ax = 1, prodm2ax = 1; double expx2; if (isnan(y)) { faddeeva_algorithm += 99; return C(y,y); } if (x < 5e-4) { // compute sum4 and sum5 together as sum5-sum4 // This special case is needed for accuracy. faddeeva_algorithm += 1; const double x2 = x*x; expx2 = 1 - x2 * (1 - 0.5*x2); // exp(-x*x) via Taylor // compute exp(2*a*x) and exp(-2*a*x) via Taylor, to double precision const double ax2 = 1.036642960860171859744*x; // 2*a*x const double exp2ax = 1 + ax2 * (1 + ax2 * (0.5 + 0.166666666666666666667*ax2)); const double expm2ax = 1 - ax2 * (1 - ax2 * (0.5 - 0.166666666666666666667*ax2)); for (n = 1; 1; ++n) { ++faddeeva_nofterms; const double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); prod2ax *= exp2ax; prodm2ax *= expm2ax; sum1 += coef; sum2 += coef * prodm2ax; sum3 += coef * prod2ax; // really = sum5 - sum4 sum5 += coef * (2*a) * n * sinh_taylor((2*a)*n*x); // test convergence via sum3 if (coef * prod2ax < relerr * sum3) break; } } else { // x > 5e-4, compute sum4 and sum5 separately faddeeva_algorithm += 2; expx2 = exp(-x*x); const double exp2ax = exp((2*a)*x), expm2ax = 1 / exp2ax; for (n = 1; 1; ++n) { ++faddeeva_nofterms; const double coef = expa2n2[n-1] * expx2 / (a2*(n*n) + y*y); prod2ax *= exp2ax; prodm2ax *= expm2ax; sum1 += coef; sum2 += coef * prodm2ax; sum4 += (coef * prodm2ax) * (a*n); sum3 += coef * prod2ax; sum5 += (coef * prod2ax) * (a*n); // test convergence via sum5, since this sum has the slowest decay if ((coef * prod2ax) * (a*n) < relerr * sum5) break; } } const double expx2erfcxy = // avoid spurious overflow for large negative y y > -6 // for y < -6, erfcx(y) = 2*exp(y*y) to double precision ? expx2*erfcx(y) : 2*exp(y*y-x*x); if (y > 5) { // imaginary terms cancel faddeeva_algorithm += 10; const double sinxy = sin(x*y); ret = (expx2erfcxy - c*y*sum1) * cos(2*x*y) + (c*x*expx2) * sinxy * sinc(x*y, sinxy); } else { faddeeva_algorithm += 20; double xs = creal(z); const double sinxy = sin(xs*y); const double sin2xy = sin(2*xs*y), cos2xy = cos(2*xs*y); const double coef1 = expx2erfcxy - c*y*sum1; const double coef2 = c*xs*expx2; ret = C(coef1 * cos2xy + coef2 * sinxy * sinc(xs*y, sinxy), coef2 * sinc(2*xs*y, sin2xy) - coef1 * sin2xy); } } else { // x large: only sum3 & sum5 contribute (see above note) faddeeva_algorithm = 300; if (isnan(x)) return C(x,x); if (isnan(y)) return C(y,y); ret = exp(-x*x); // |y| < 1e-10, so we only need exp(-x*x) term // (round instead of ceil as in original paper; note that x/a > 1 here) double n0 = floor(x/a + 0.5); // sum in both directions, starting at n0 double dx = a*n0 - x; sum3 = exp(-dx*dx) / (a2*(n0*n0) + y*y); sum5 = a*n0 * sum3; double exp1 = exp(4*a*dx), exp1dn = 1; int dn; for (dn = 1; n0 - dn > 0; ++dn) { // loop over n0-dn and n0+dn terms double np = n0 + dn, nm = n0 - dn; double tp = exp(-sqr(a*dn+dx)); double tm = tp * (exp1dn *= exp1); // trick to get tm from tp tp /= (a2*(np*np) + y*y); tm /= (a2*(nm*nm) + y*y); sum3 += tp + tm; sum5 += a * (np * tp + nm * tm); if (a * (np * tp + nm * tm) < relerr * sum5) goto finish; } while (1) { // loop over n0+dn terms only (since n0-dn <= 0) double np = n0 + dn++; double tp = exp(-sqr(a*dn+dx)) / (a2*(np*np) + y*y); sum3 += tp; sum5 += a * np * tp; if (a * np * tp < relerr * sum5) goto finish; } } finish: return ret + C((0.5*c)*y*(sum2+sum3), (0.5*c)*copysign(sum5-sum4, creal(z))); } // w_of_z
int pnlist[NPN] = { -1 }; int *pnp = pnlist; int npn = 1; int npnflg = 1; int dpn = -1; int totout = 1; int ulfont = ULFONT; int tabch = TAB; int ldrch = LEADER; #define C(a,b) {a, 0, b, 0, 0, NULL} Contab contab[NM] = { C(PAIR('d', 's'), caseds), C(PAIR('a', 's'), caseas), C(PAIR('s', 'p'), casesp), C(PAIR('f', 't'), caseft), C(PAIR('p', 's'), caseps), C(PAIR('v', 's'), casevs), C(PAIR('n', 'r'), casenr), C(PAIR('i', 'f'), caseif), C(PAIR('i', 'e'), caseie), C(PAIR('e', 'l'), caseel), C(PAIR('p', 'o'), casepo), C(PAIR('t', 'l'), casetl), C(PAIR('t', 'm'), casetm), C(PAIR('f', 'm'), casefm), C(PAIR('b', 'p'), casebp), C(PAIR('c', 'h'), casech),
[SND_PCM_FORMAT_U20_3BE] = VLC_CODEC_U24B, // ^ [SND_PCM_FORMAT_S18_3LE] = VLC_CODEC_S24L, // ^ [SND_PCM_FORMAT_S18_3BE] = VLC_CODEC_S24B, // ^ [SND_PCM_FORMAT_U18_3LE] = VLC_CODEC_U24L, // ^ [SND_PCM_FORMAT_U18_3BE] = VLC_CODEC_U24B, // ^ }; #ifdef WORDS_BIGENDIAN # define C(f) f##BE, f##LE #else # define C(f) f##LE, f##BE #endif /* Formats in order of decreasing preference */ static const uint8_t choices[] = { C(SND_PCM_FORMAT_FLOAT_), C(SND_PCM_FORMAT_S32_), C(SND_PCM_FORMAT_U32_), C(SND_PCM_FORMAT_S16_), C(SND_PCM_FORMAT_U16_), C(SND_PCM_FORMAT_FLOAT64_), C(SND_PCM_FORMAT_S24_3), C(SND_PCM_FORMAT_U24_3), SND_PCM_FORMAT_MPEG, SND_PCM_FORMAT_GSM, SND_PCM_FORMAT_MU_LAW, SND_PCM_FORMAT_A_LAW, SND_PCM_FORMAT_S8, SND_PCM_FORMAT_U8, };
int main() { { static_assert( ranges::detail::BidirectionalCursor< ranges::detail::reverse_cursor<bidirectional_iterator<const char *>>>{}, ""); static_assert( ranges::detail::BidirectionalCursor< ranges::detail::reverse_cursor<random_access_iterator<const char *>>>{}, ""); static_assert( ranges::detail::RandomAccessCursor< ranges::detail::reverse_cursor<random_access_iterator<const char *>>>{}, ""); static_assert( ranges::BidirectionalIterator< ranges::reverse_iterator<bidirectional_iterator<const char *>>>{}, ""); static_assert( ranges::RandomAccessIterator< ranges::reverse_iterator<random_access_iterator<const char *>>>{}, ""); } { // test test<bidirectional_iterator<const char *>>(); test<random_access_iterator<char *>>(); test<char *>(); test<const char *>(); } { // test 2 const char s[] = "123"; test2(bidirectional_iterator<const char *>(s)); test2(random_access_iterator<const char *>(s)); } { // test3 Derived d; test3<bidirectional_iterator<Base *>>( bidirectional_iterator<Derived *>(&d)); test3<random_access_iterator<const Base *>>( random_access_iterator<Derived *>(&d)); } { // test4 const char *s = "1234567890"; random_access_iterator<const char *> b(s); random_access_iterator<const char *> e(s + 10); while (b != e) test4(b++); } { // test5 const char *s = "1234567890"; test5(bidirectional_iterator<const char *>(s), bidirectional_iterator<const char *>(s), false); test5(bidirectional_iterator<const char *>(s), bidirectional_iterator<const char *>(s + 1), true); test5(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s), false); test5(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s + 1), true); test5(s, s, false); test5(s, s + 1, true); } { const char *s = "123"; test6(bidirectional_iterator<const char *>(s + 1), bidirectional_iterator<const char *>(s)); test6(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s)); test6(s + 1, s); } { const char *s = "123"; test7(bidirectional_iterator<const char *>(s + 1), bidirectional_iterator<const char *>(s)); test7(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s)); test7(s + 1, s); } { const char *s = "1234567890"; test8(random_access_iterator<const char *>(s + 5), 5, random_access_iterator<const char *>(s)); test8(s + 5, 5, s); } { const char *s = "1234567890"; test9(random_access_iterator<const char *>(s + 5), 5, random_access_iterator<const char *>(s)); test9(s + 5, 5, s); } { const char *s = "123"; test10(bidirectional_iterator<const char *>(s + 1), bidirectional_iterator<const char *>(s + 2)); test10(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s + 2)); test10(s + 1, s + 2); } { const char *s = "123"; test11(bidirectional_iterator<const char *>(s + 1), bidirectional_iterator<const char *>(s + 2)); test11(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s + 2)); test11(s + 1, s + 2); } { const char *s = "1234567890"; test12(random_access_iterator<const char *>(s + 5), 5, random_access_iterator<const char *>(s + 10)); test12(s + 5, 5, s + 10); } { const char *s = "1234567890"; test13(random_access_iterator<const char *>(s + 5), 5, random_access_iterator<const char *>(s + 10)); test13(s + 5, 5, s + 10); } { A a; test14(&a + 1, A()); } { Derived d; test15<bidirectional_iterator<Base *>>( bidirectional_iterator<Derived *>(&d)); test15<random_access_iterator<const Base *>>( random_access_iterator<Derived *>(&d)); test15<Base *>(&d); } { const char *s = "1234567890"; test16(bidirectional_iterator<const char *>(s), bidirectional_iterator<const char *>(s), true); test16(bidirectional_iterator<const char *>(s), bidirectional_iterator<const char *>(s + 1), false); test16(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s), true); test16(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s + 1), false); test16(s, s, true); test16(s, s + 1, false); } { char s[3] = {0}; test17(random_access_iterator<const char *>(s), random_access_iterator<char *>(s), 0); random_access_iterator<char *> inp1(s); test17(random_access_iterator<char *>(s), random_access_iterator<const char *>(s + 1), 1); test17(random_access_iterator<const char *>(s + 1), random_access_iterator<char *>(s), -1); test17(s, s, 0); test17(s, s + 1, 1); test17(s + 1, s, -1); } { const char *s = "1234567890"; test18(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s), false); test18(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s + 1), true); test18(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s), false); test18(s, s, false); test18(s, s + 1, true); test18(s + 1, s, false); } { const char *s = "1234567890"; test19(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s), true); test19(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s + 1), true); test19(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s), false); test19(s, s, true); test19(s, s + 1, true); test19(s + 1, s, false); } { const char *s = "1234567890"; test20(random_access_iterator<const char *>(s + 5), 4, '1'); test20(s + 5, 4, '1'); } { const char *s = "1234567890"; test21(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s), false); test21(random_access_iterator<const char *>(s), random_access_iterator<const char *>(s + 1), false); test21(random_access_iterator<const char *>(s + 1), random_access_iterator<const char *>(s), true); test21(s, s, false); test21(s, s + 1, false); test21(s + 1, s, true); } { const char* s = "1234567890"; test22(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s), false); test22(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+1), false); test22(random_access_iterator<const char*>(s+1), random_access_iterator<const char*>(s), true); test22(s, s, false); test22(s, s+1, false); test22(s+1, s, true); } { const char* s = "1234567890"; test23(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s), true); test23(random_access_iterator<const char*>(s), random_access_iterator<const char*>(s+1), false); test23(random_access_iterator<const char*>(s+1), random_access_iterator<const char*>(s), true); test23(s, s, true); test23(s, s+1, false); test23(s+1, s, true); } { B a; test24(&a+1, B()); } { C l[3] = {C(0), C(1), C(2)}; auto ri = ranges::rbegin(l); CHECK ( (*ri).get() == 2 ); ++ri; CHECK ( (*ri).get() == 1 ); ++ri; CHECK ( (*ri).get() == 0 ); ++ri; CHECK ( ri == ranges::rend(l)); } { const char* s = "1234567890"; test25(random_access_iterator<const char*>(s+5), 5, random_access_iterator<const char*>(s)); test25(s+5, 5, s); } return test_result(); }
void md_begin () { h8500_opcode_info *opcode; char prev_buffer[100]; int idx = 0; register relax_typeS *table; opcode_hash_control = hash_new (); prev_buffer[0] = 0; /* Insert unique names into hash table */ for (opcode = h8500_table; opcode->name; opcode++) { if (idx != opcode->idx) { hash_insert (opcode_hash_control, opcode->name, (char *) opcode); idx++; } } /* Initialize the relax table. We use a local variable to avoid warnings about modifying a supposedly const data structure. */ table = (relax_typeS *) md_relax_table; table[C (BRANCH, BYTE_DISP)].rlx_forward = BYTE_F; table[C (BRANCH, BYTE_DISP)].rlx_backward = BYTE_B; table[C (BRANCH, BYTE_DISP)].rlx_length = 2; table[C (BRANCH, BYTE_DISP)].rlx_more = C (BRANCH, WORD_DISP); table[C (BRANCH, WORD_DISP)].rlx_forward = WORD_F; table[C (BRANCH, WORD_DISP)].rlx_backward = WORD_B; table[C (BRANCH, WORD_DISP)].rlx_length = 3; table[C (BRANCH, WORD_DISP)].rlx_more = 0; table[C (SCB_F, BYTE_DISP)].rlx_forward = BYTE_F; table[C (SCB_F, BYTE_DISP)].rlx_backward = BYTE_B; table[C (SCB_F, BYTE_DISP)].rlx_length = 3; table[C (SCB_F, BYTE_DISP)].rlx_more = C (SCB_F, WORD_DISP); table[C (SCB_F, WORD_DISP)].rlx_forward = WORD_F; table[C (SCB_F, WORD_DISP)].rlx_backward = WORD_B; table[C (SCB_F, WORD_DISP)].rlx_length = 8; table[C (SCB_F, WORD_DISP)].rlx_more = 0; table[C (SCB_TST, BYTE_DISP)].rlx_forward = BYTE_F; table[C (SCB_TST, BYTE_DISP)].rlx_backward = BYTE_B; table[C (SCB_TST, BYTE_DISP)].rlx_length = 3; table[C (SCB_TST, BYTE_DISP)].rlx_more = C (SCB_TST, WORD_DISP); table[C (SCB_TST, WORD_DISP)].rlx_forward = WORD_F; table[C (SCB_TST, WORD_DISP)].rlx_backward = WORD_B; table[C (SCB_TST, WORD_DISP)].rlx_length = 10; table[C (SCB_TST, WORD_DISP)].rlx_more = 0; }
'2', '3', '0', '.', NO, NO, NO, NO, // 0x50 [0xC7] = KEY_HOME, [0x9C] = '\n' /*KP_Enter*/, [0xB5] = '/' /*KP_Div*/, [0xC8] = KEY_UP, [0xC9] = KEY_PGUP, [0xCB] = KEY_LF, [0xCD] = KEY_RT, [0xCF] = KEY_END, [0xD0] = KEY_DN, [0xD1] = KEY_PGDN, [0xD2] = KEY_INS, [0xD3] = KEY_DEL }; #define C(x) (x - '@') static uint8_t ctlmap[256] = { NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, NO, C('Q'), C('W'), C('E'), C('R'), C('T'), C('Y'), C('U'), C('I'), C('O'), C('P'), NO, NO, '\r', NO, C('A'), C('S'), C('D'), C('F'), C('G'), C('H'), C('J'), C('K'), C('L'), NO, NO, NO, NO, C('\\'), C('Z'), C('X'), C('C'), C('V'), C('B'), C('N'), C('M'), NO, NO, C('/'), NO, NO, [0x97] = KEY_HOME, [0xB5] = C('/'), [0xC8] = KEY_UP, [0xC9] = KEY_PGUP, [0xCB] = KEY_LF, [0xCD] = KEY_RT, [0xCF] = KEY_END, [0xD0] = KEY_DN, [0xD1] = KEY_PGDN, [0xD2] = KEY_INS, [0xD3] = KEY_DEL }; static uint8_t *charcode[4] = { normalmap, shiftmap,
int main() { printf("C(%d, %d = %d)\n",4, 2, C(4, 2)); printf("C(%d, %d = %d)\n",49, 6, C(49, 6)); return 0; }
/** Purpose ------- SORMTR overwrites the general real M-by-N matrix C with SIDE = MagmaLeft SIDE = MagmaRight TRANS = MagmaNoTrans: Q * C C * Q TRANS = MagmaTrans: Q**H * C C * Q**H where Q is a real unitary matrix of order nq, with nq = m if SIDE = MagmaLeft and nq = n if SIDE = MagmaRight. Q is defined as the product of nq-1 elementary reflectors, as returned by SSYTRD: if UPLO = MagmaUpper, Q = H(nq-1) . . . H(2) H(1); if UPLO = MagmaLower, Q = H(1) H(2) . . . H(nq-1). Arguments --------- @param[in] ngpu INTEGER Number of GPUs to use. ngpu > 0. @param[in] side magma_side_t - = MagmaLeft: apply Q or Q**H from the Left; - = MagmaRight: apply Q or Q**H from the Right. @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A contains elementary reflectors from SSYTRD; - = MagmaLower: Lower triangle of A contains elementary reflectors from SSYTRD. @param[in] trans magma_trans_t - = MagmaNoTrans: No transpose, apply Q; - = MagmaTrans: Conjugate transpose, apply Q**H. @param[in] m INTEGER The number of rows of the matrix C. M >= 0. @param[in] n INTEGER The number of columns of the matrix C. N >= 0. @param[in] A REAL array, dimension (LDA,M) if SIDE = MagmaLeft (LDA,N) if SIDE = MagmaRight The vectors which define the elementary reflectors, as returned by SSYTRD. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,M) if SIDE = MagmaLeft; LDA >= max(1,N) if SIDE = MagmaRight. @param[in] tau REAL array, dimension (M-1) if SIDE = MagmaLeft (N-1) if SIDE = MagmaRight TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SSYTRD. @param[in,out] C REAL array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. @param[in] ldc INTEGER The leading dimension of the array C. LDC >= max(1,M). @param[out] work (workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. If SIDE = MagmaLeft, LWORK >= max(1,N); if SIDE = MagmaRight, LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = MagmaLeft, and LWORK >= M*NB if SIDE = MagmaRight, where NB is the optimal blocksize. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_ssyev_comp ********************************************************************/ extern "C" magma_int_t magma_sormtr_m( magma_int_t ngpu, magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_int_t m, magma_int_t n, float *A, magma_int_t lda, float *tau, float *C, magma_int_t ldc, float *work, magma_int_t lwork, magma_int_t *info) { #define A(i_,j_) (A + (i_) + (j_)*lda) #define C(i_,j_) (C + (i_) + (j_)*ldc) float c_one = MAGMA_S_ONE; magma_int_t i__2; magma_int_t i1, i2, nb, mi, ni, nq, nw; int left, upper, lquery; magma_int_t iinfo; magma_int_t lwkopt; *info = 0; left = (side == MagmaLeft); upper = (uplo == MagmaUpper); lquery = (lwork == -1); /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = m; nw = n; } else { nq = n; nw = m; } if (! left && side != MagmaRight) { *info = -1; } else if (! upper && uplo != MagmaLower) { *info = -2; } else if (trans != MagmaNoTrans && trans != MagmaTrans) { *info = -3; } else if (m < 0) { *info = -4; } else if (n < 0) { *info = -5; } else if (lda < max(1,nq)) { *info = -7; } else if (ldc < max(1,m)) { *info = -10; } else if (lwork < max(1,nw) && ! lquery) { *info = -12; } nb = 32; lwkopt = max(1,nw) * nb; if (*info == 0) { work[0] = MAGMA_S_MAKE( lwkopt, 0 ); } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (m == 0 || n == 0 || nq == 1) { work[0] = c_one; return *info; } if (left) { mi = m - 1; ni = n; } else { mi = m; ni = n - 1; } if (upper) { /* Q was determined by a call to SSYTRD with UPLO = 'U' */ i__2 = nq - 1; // TODO: upper case is not yet implemented for multiple GPUs -- see above // for now use one GPU //lapackf77_sormql(side_, trans_, &mi, &ni, &i__2, A(0,1), &lda, // tau, C, &ldc, work, &lwork, &iinfo); //magma_sormql_m(ngpu, side, trans, mi, ni, i__2, A(0,1), lda, tau, // C, ldc, work, lwork, &iinfo); magma_sormql(side, trans, mi, ni, i__2, A(0,1), lda, tau, C, ldc, work, lwork, &iinfo); } else { /* Q was determined by a call to SSYTRD with UPLO = 'L' */ if (left) { i1 = 1; i2 = 0; } else { i1 = 0; i2 = 1; } i__2 = nq - 1; magma_sormqr_m(ngpu, side, trans, mi, ni, i__2, A(1,0), lda, tau, C(i1,i2), ldc, work, lwork, &iinfo); } work[0] = MAGMA_S_MAKE( lwkopt, 0 ); return *info; } /* magma_sormtr */
/* Subroutine */ int ctrsyl_(char *trana, char *tranb, integer *isgn, integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *c, integer *ldc, real *scale, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= CTRSYL solves the complex Sylvester matrix equation: op(A)*X + X*op(B) = scale*C or op(A)*X - X*op(B) = scale*C, where op(A) = A or A**H, and A and B are both upper triangular. A is M-by-M and B is N-by-N; the right hand side C and the solution X are M-by-N; and scale is an output scale factor, set <= 1 to avoid overflow in X. Arguments ========= TRANA (input) CHARACTER*1 Specifies the option op(A): = 'N': op(A) = A (No transpose) = 'C': op(A) = A**H (Conjugate transpose) TRANB (input) CHARACTER*1 Specifies the option op(B): = 'N': op(B) = B (No transpose) = 'C': op(B) = B**H (Conjugate transpose) ISGN (input) INTEGER Specifies the sign in the equation: = +1: solve op(A)*X + X*op(B) = scale*C = -1: solve op(A)*X - X*op(B) = scale*C M (input) INTEGER The order of the matrix A, and the number of rows in the matrices X and C. M >= 0. N (input) INTEGER The order of the matrix B, and the number of columns in the matrices X and C. N >= 0. A (input) COMPLEX array, dimension (LDA,M) The upper triangular matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input) COMPLEX array, dimension (LDB,N) The upper triangular matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). C (input/output) COMPLEX array, dimension (LDC,N) On entry, the M-by-N right hand side matrix C. On exit, C is overwritten by the solution matrix X. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M) SCALE (output) REAL The scale factor, scale, set <= 1 to avoid overflow in X. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value = 1: A and B have common or very close eigenvalues; perturbed values were used to solve the equation (but the matrices A and B are unchanged). ===================================================================== Decode and Test input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4; real r__1, r__2; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_imag(complex *); void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); /* Local variables */ static real smin; static complex suml, sumr; static integer j, k, l; extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer *, complex *, integer *); extern logical lsame_(char *, char *); extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer *, complex *, integer *); static complex a11; static real db; extern /* Subroutine */ int slabad_(real *, real *); extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *); static complex x11; static real scaloc; extern doublereal slamch_(char *); extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *), xerbla_(char *, integer *); static real bignum; static logical notrna, notrnb; static real smlnum, da11; static complex vec; static real dum[1], eps, sgn; #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] notrna = lsame_(trana, "N"); notrnb = lsame_(tranb, "N"); *info = 0; if (! notrna && ! lsame_(trana, "T") && ! lsame_(trana, "C")) { *info = -1; } else if (! notrnb && ! lsame_(tranb, "T") && ! lsame_(tranb, "C")) { *info = -2; } else if (*isgn != 1 && *isgn != -1) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*m)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldc < max(1,*m)) { *info = -11; } if (*info != 0) { i__1 = -(*info); xerbla_("CTRSYL", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Set constants to control overflow */ eps = slamch_("P"); smlnum = slamch_("S"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); smlnum = smlnum * (real) (*m * *n) / eps; bignum = 1.f / smlnum; /* Computing MAX */ r__1 = smlnum, r__2 = eps * clange_("M", m, m, &A(1,1), lda, dum) , r__1 = max(r__1,r__2), r__2 = eps * clange_("M", n, n, &B(1,1), ldb, dum); smin = dmax(r__1,r__2); *scale = 1.f; sgn = (real) (*isgn); if (notrna && notrnb) { /* Solve A*X + ISGN*X*B = scale*C. The (K,L)th block of X is determined starting from bottom-left corner column by column by A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) Where M L-1 R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. I=K+1 J=1 */ i__1 = *n; for (l = 1; l <= *n; ++l) { for (k = *m; k >= 1; --k) { i__2 = *m - k; /* Computing MIN */ i__3 = k + 1; /* Computing MIN */ i__4 = k + 1; cdotu_(&q__1, &i__2, &A(k,min(k+1,*m)), lda, &C(min(k+1,*m),l), &c__1); suml.r = q__1.r, suml.i = q__1.i; i__2 = l - 1; cdotu_(&q__1, &i__2, &C(k,1), ldc, &B(1,l), &c__1); sumr.r = q__1.r, sumr.i = q__1.i; i__2 = k + l * c_dim1; q__3.r = sgn * sumr.r, q__3.i = sgn * sumr.i; q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i; q__1.r = C(k,l).r - q__2.r, q__1.i = C(k,l).i - q__2.i; vec.r = q__1.r, vec.i = q__1.i; scaloc = 1.f; i__2 = k + k * a_dim1; i__3 = l + l * b_dim1; q__2.r = sgn * B(l,l).r, q__2.i = sgn * B(l,l).i; q__1.r = A(k,k).r + q__2.r, q__1.i = A(k,k).i + q__2.i; a11.r = q__1.r, a11.i = q__1.i; da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11), dabs(r__2)); if (da11 <= smin) { a11.r = smin, a11.i = 0.f; da11 = smin; *info = 1; } db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs( r__2)); if (da11 < 1.f && db > 1.f) { if (db > bignum * da11) { scaloc = 1.f / db; } } q__3.r = scaloc, q__3.i = 0.f; q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r * q__3.i + vec.i * q__3.r; c_div(&q__1, &q__2, &a11); x11.r = q__1.r, x11.i = q__1.i; if (scaloc != 1.f) { i__2 = *n; for (j = 1; j <= *n; ++j) { csscal_(m, &scaloc, &C(1,j), &c__1); /* L10: */ } *scale *= scaloc; } i__2 = k + l * c_dim1; C(k,l).r = x11.r, C(k,l).i = x11.i; /* L20: */ } /* L30: */ } } else if (! notrna && notrnb) { /* Solve A' *X + ISGN*X*B = scale*C. The (K,L)th block of X is determined starting from upper-left corner column by column by A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) Where K-1 L-1 R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] I=1 J=1 */ i__1 = *n; for (l = 1; l <= *n; ++l) { i__2 = *m; for (k = 1; k <= *m; ++k) { i__3 = k - 1; cdotc_(&q__1, &i__3, &A(1,k), &c__1, &C(1,l), &c__1); suml.r = q__1.r, suml.i = q__1.i; i__3 = l - 1; cdotu_(&q__1, &i__3, &C(k,1), ldc, &B(1,l), &c__1); sumr.r = q__1.r, sumr.i = q__1.i; i__3 = k + l * c_dim1; q__3.r = sgn * sumr.r, q__3.i = sgn * sumr.i; q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i; q__1.r = C(k,l).r - q__2.r, q__1.i = C(k,l).i - q__2.i; vec.r = q__1.r, vec.i = q__1.i; scaloc = 1.f; r_cnjg(&q__2, &A(k,k)); i__3 = l + l * b_dim1; q__3.r = sgn * B(l,l).r, q__3.i = sgn * B(l,l).i; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; a11.r = q__1.r, a11.i = q__1.i; da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11), dabs(r__2)); if (da11 <= smin) { a11.r = smin, a11.i = 0.f; da11 = smin; *info = 1; } db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs( r__2)); if (da11 < 1.f && db > 1.f) { if (db > bignum * da11) { scaloc = 1.f / db; } } q__3.r = scaloc, q__3.i = 0.f; q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r * q__3.i + vec.i * q__3.r; c_div(&q__1, &q__2, &a11); x11.r = q__1.r, x11.i = q__1.i; if (scaloc != 1.f) { i__3 = *n; for (j = 1; j <= *n; ++j) { csscal_(m, &scaloc, &C(1,j), &c__1); /* L40: */ } *scale *= scaloc; } i__3 = k + l * c_dim1; C(k,l).r = x11.r, C(k,l).i = x11.i; /* L50: */ } /* L60: */ } } else if (! notrna && ! notrnb) { /* Solve A'*X + ISGN*X*B' = C. The (K,L)th block of X is determined starting from upper-right corner column by column by A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) Where K-1 R(K,L) = SUM [A'(I,K)*X(I,L)] + I=1 N ISGN*SUM [X(K,J)*B'(L,J)]. J=L+1 */ for (l = *n; l >= 1; --l) { i__1 = *m; for (k = 1; k <= *m; ++k) { i__2 = k - 1; cdotc_(&q__1, &i__2, &A(1,k), &c__1, &C(1,l), &c__1); suml.r = q__1.r, suml.i = q__1.i; i__2 = *n - l; /* Computing MIN */ i__3 = l + 1; /* Computing MIN */ i__4 = l + 1; cdotc_(&q__1, &i__2, &C(k,min(l+1,*n)), ldc, &B(l,min(l+1,*n)), ldb); sumr.r = q__1.r, sumr.i = q__1.i; i__2 = k + l * c_dim1; r_cnjg(&q__4, &sumr); q__3.r = sgn * q__4.r, q__3.i = sgn * q__4.i; q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i; q__1.r = C(k,l).r - q__2.r, q__1.i = C(k,l).i - q__2.i; vec.r = q__1.r, vec.i = q__1.i; scaloc = 1.f; i__2 = k + k * a_dim1; i__3 = l + l * b_dim1; q__3.r = sgn * B(l,l).r, q__3.i = sgn * B(l,l).i; q__2.r = A(k,k).r + q__3.r, q__2.i = A(k,k).i + q__3.i; r_cnjg(&q__1, &q__2); a11.r = q__1.r, a11.i = q__1.i; da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11), dabs(r__2)); if (da11 <= smin) { a11.r = smin, a11.i = 0.f; da11 = smin; *info = 1; } db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs( r__2)); if (da11 < 1.f && db > 1.f) { if (db > bignum * da11) { scaloc = 1.f / db; } } q__3.r = scaloc, q__3.i = 0.f; q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r * q__3.i + vec.i * q__3.r; c_div(&q__1, &q__2, &a11); x11.r = q__1.r, x11.i = q__1.i; if (scaloc != 1.f) { i__2 = *n; for (j = 1; j <= *n; ++j) { csscal_(m, &scaloc, &C(1,j), &c__1); /* L70: */ } *scale *= scaloc; } i__2 = k + l * c_dim1; C(k,l).r = x11.r, C(k,l).i = x11.i; /* L80: */ } /* L90: */ } } else if (notrna && ! notrnb) { /* Solve A*X + ISGN*X*B' = C. The (K,L)th block of X is determined starting from bottom-left corner column by column by A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L) Where M N R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)] I=K+1 J=L+1 */ for (l = *n; l >= 1; --l) { for (k = *m; k >= 1; --k) { i__1 = *m - k; /* Computing MIN */ i__2 = k + 1; /* Computing MIN */ i__3 = k + 1; cdotu_(&q__1, &i__1, &A(k,min(k+1,*m)), lda, &C(min(k+1,*m),l), &c__1); suml.r = q__1.r, suml.i = q__1.i; i__1 = *n - l; /* Computing MIN */ i__2 = l + 1; /* Computing MIN */ i__3 = l + 1; cdotc_(&q__1, &i__1, &C(k,min(l+1,*n)), ldc, &B(l,min(l+1,*n)), ldb); sumr.r = q__1.r, sumr.i = q__1.i; i__1 = k + l * c_dim1; r_cnjg(&q__4, &sumr); q__3.r = sgn * q__4.r, q__3.i = sgn * q__4.i; q__2.r = suml.r + q__3.r, q__2.i = suml.i + q__3.i; q__1.r = C(k,l).r - q__2.r, q__1.i = C(k,l).i - q__2.i; vec.r = q__1.r, vec.i = q__1.i; scaloc = 1.f; i__1 = k + k * a_dim1; r_cnjg(&q__3, &B(l,l)); q__2.r = sgn * q__3.r, q__2.i = sgn * q__3.i; q__1.r = A(k,k).r + q__2.r, q__1.i = A(k,k).i + q__2.i; a11.r = q__1.r, a11.i = q__1.i; da11 = (r__1 = a11.r, dabs(r__1)) + (r__2 = r_imag(&a11), dabs(r__2)); if (da11 <= smin) { a11.r = smin, a11.i = 0.f; da11 = smin; *info = 1; } db = (r__1 = vec.r, dabs(r__1)) + (r__2 = r_imag(&vec), dabs( r__2)); if (da11 < 1.f && db > 1.f) { if (db > bignum * da11) { scaloc = 1.f / db; } } q__3.r = scaloc, q__3.i = 0.f; q__2.r = vec.r * q__3.r - vec.i * q__3.i, q__2.i = vec.r * q__3.i + vec.i * q__3.r; c_div(&q__1, &q__2, &a11); x11.r = q__1.r, x11.i = q__1.i; if (scaloc != 1.f) { i__1 = *n; for (j = 1; j <= *n; ++j) { csscal_(m, &scaloc, &C(1,j), &c__1); /* L100: */ } *scale *= scaloc; } i__1 = k + l * c_dim1; C(k,l).r = x11.r, C(k,l).i = x11.i; /* L110: */ } /* L120: */ } } return 0; /* End of CTRSYL */ } /* ctrsyl_ */
JelloMesh::FaceMesh::FaceMesh(const JelloMesh& m, JelloMesh::Face f) { const ParticleGrid& g = m.m_vparticles; switch(f) { case ZFRONT: m_strips.resize(m.m_rows); for (int i = 0; i < m.m_rows+1; i++) for (int j = 0; j < m.m_cols+1; j++) { if (i < m.m_rows) { m_strips[i].push_back(m.GetIndex(i+1,j,0)); m_strips[i].push_back(m.GetIndex(i,j,0)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(i), C(j+1), D(0))); neighbors.push_back(m.GetIndex(R(i), C(j-1), D(0))); neighbors.push_back(m.GetIndex(R(i-1), C(j), D(0))); neighbors.push_back(m.GetIndex(R(i+1), C(j), D(0))); m_neighbors[m.GetIndex(i,j,0)] = neighbors; } break; case ZBACK: m_strips.resize(m.m_rows); for (int i = 0; i < m.m_rows+1; i++) for (int j = 0; j < m.m_cols+1; j++) { if (i < m.m_rows) { m_strips[i].push_back(m.GetIndex(i+1,j,m.m_stacks)); m_strips[i].push_back(m.GetIndex(i,j,m.m_stacks)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(i+1), C(j), D(m.m_stacks))); neighbors.push_back(m.GetIndex(R(i-1), C(j), D(m.m_stacks))); neighbors.push_back(m.GetIndex(R(i), C(j-1), D(m.m_stacks))); neighbors.push_back(m.GetIndex(R(i), C(j+1), D(m.m_stacks))); m_neighbors[m.GetIndex(i,j,m.m_stacks)] = neighbors; } break; case XLEFT: m_strips.resize(m.m_cols); for (int j = 0; j < m.m_cols+1; j++) for (int k = 0; k < m.m_stacks+1; k++) { if (j < m.m_cols) { m_strips[j].push_back(m.GetIndex(0,j+1,k)); m_strips[j].push_back(m.GetIndex(0,j,k)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(0), C(j), D(k+1))); neighbors.push_back(m.GetIndex(R(0), C(j), D(k-1))); neighbors.push_back(m.GetIndex(R(0), C(j-1), D(k))); neighbors.push_back(m.GetIndex(R(0), C(j+1), D(k))); m_neighbors[m.GetIndex(0,j,k)] = neighbors; } break; case XRIGHT: m_strips.resize(m.m_cols); for (int j = 0; j < m.m_cols+1; j++) for (int k = 0; k < m.m_stacks+1; k++) { if (j < m.m_cols) { m_strips[j].push_back(m.GetIndex(m.m_rows,j+1,k)); m_strips[j].push_back(m.GetIndex(m.m_rows,j,k)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(m.m_rows), C(j+1), D(k))); neighbors.push_back(m.GetIndex(R(m.m_rows), C(j-1), D(k))); neighbors.push_back(m.GetIndex(R(m.m_rows), C(j), D(k-1))); neighbors.push_back(m.GetIndex(R(m.m_rows), C(j), D(k+1))); m_neighbors[m.GetIndex(m.m_rows,j,k)] = neighbors; } break; case YBOTTOM: m_strips.resize(m.m_rows); for (int i = 0; i < m.m_rows+1; i++) for (int k = 0; k < m.m_stacks+1; k++) { if (i < m.m_rows) { m_strips[i].push_back(m.GetIndex(i+1,0,k)); m_strips[i].push_back(m.GetIndex(i,0,k)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(i+1), C(0), D(k))); neighbors.push_back(m.GetIndex(R(i-1), C(0), D(k))); neighbors.push_back(m.GetIndex(R(i), C(0), D(k-1))); neighbors.push_back(m.GetIndex(R(i), C(0), D(k+1))); m_neighbors[m.GetIndex(i,0,k)] = neighbors; } break; case YTOP: m_strips.resize(m.m_rows); for (int i = 0; i < m.m_rows+1; i++) for (int k = 0; k< m.m_stacks+1; k++) { if (i < m.m_rows) { m_strips[i].push_back(m.GetIndex(i+1,m.m_cols,k)); m_strips[i].push_back(m.GetIndex(i,m.m_cols,k)); } std::vector<int> neighbors; neighbors.push_back(m.GetIndex(R(i), C(m.m_cols), D(k+1))); neighbors.push_back(m.GetIndex(R(i), C(m.m_cols), D(k-1))); neighbors.push_back(m.GetIndex(R(i-1), C(m.m_cols), D(k))); neighbors.push_back(m.GetIndex(R(i+1), C(m.m_cols), D(k))); m_neighbors[m.GetIndex(i,m.m_cols,k)] = neighbors; } break; } }
rspfSensorModelTuple::IntersectStatus rspfSensorModelTuple:: intersect(const DptSet_t obs, rspfEcefPoint& pt, NEWMAT::Matrix& covMat) const { IntersectStatus opOK = OP_FAIL; bool covOK = true; bool epOK; rspf_int32 nImages = (rspf_int32)obs.size(); NEWMAT::SymmetricMatrix N(3); NEWMAT::SymmetricMatrix BtWB(3); NEWMAT::Matrix Ni(3,3); NEWMAT::ColumnVector C(3); NEWMAT::ColumnVector BtWF(3); NEWMAT::ColumnVector F(2); NEWMAT::ColumnVector dR(3); NEWMAT::Matrix B(2,3); NEWMAT::SymmetricMatrix W(2); rspfGpt estG; theImages[0]->lineSampleHeightToWorld(obs[0], rspf::nan(), estG); for (int iter=0; iter<3; iter++) { N = 0.0; C = 0.0; for (int i=0; i<nImages; i++) { rspfDpt resid; if (!getGroundObsEqComponents(i, iter, obs[i], estG, resid, B, W)) covOK = false; F[0] = resid.x; F[1] = resid.y; BtWF << B.t() * W * F; BtWB << B.t() * W * B; C += BtWF; N += BtWB; } Ni = invert(N); dR = Ni * C; rspfEcefPoint estECF(estG); for (rspf_int32 i=0; i<3; i++) estECF[i] += dR[i]; rspfGpt upd(estECF); estG = upd; if (traceDebug()) { rspfNotify(rspfNotifyLevel_DEBUG) << "DEBUG: intersect:\n" << " iteration:\n" << iter << " C:\n" << C << " Ni:\n" << Ni << " dR:\n" << dR <<std::endl; } } // iterative loop rspfEcefPoint finalEst(estG); pt = finalEst; if (covOK) { covMat = Ni; epOK = true; } else epOK = false; if (epOK) opOK = OP_SUCCESS; else opOK = ERROR_PROP_FAIL; return opOK; }
#define F(n) (int)((FSCALE<<(n-1))*1.58740) /* 2^(8/12) */ #define Fx(n) (int)((FSCALE<<(n-1))*1.68179) /* 2^(9/12) */ #define G(n) (int)((FSCALE<<(n-1))*1.78180) /* 2^(10/12) */ #define Gx(n) (int)((FSCALE<<(n-1))*1.88775) /* 2^(11/12) */ #define A(n) (int)((FSCALE<<n)) /* A */ #define Ax(n) (int)((FSCALE<<n)*1.05946) /* 2^(1/12) */ #define B(n) (int)((FSCALE<<n)*1.12246) /* 2^(2/12) */ /* * Alarm sound? * It is unknown what this sound is like. Until somebody manages * trigger sound #1 of the Phoenix PCB sound chip I put just something * 'alarming' in here. */ static const int tune1[96*6] = { C(3), 0, 0, C(2), 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, C(4), 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0, C(3), 0, 0, 0, 0, 0, G(3), 0, 0, 0, 0, 0,
int tty_doprev_message() { struct WinDesc *cw = wins[WIN_MESSAGE]; winid prevmsg_win; int i; if ((iflags.prevmsg_window != 's') && !ttyDisplay->inread) { /* not single */ if(iflags.prevmsg_window == 'f') { /* full */ prevmsg_win = create_nhwindow(NHW_MENU); putstr(prevmsg_win, 0, "Message History"); putstr(prevmsg_win, 0, ""); cw->maxcol = cw->maxrow; i = cw->maxcol; do { if(cw->data[i] && strcmp(cw->data[i], "") ) putstr(prevmsg_win, 0, cw->data[i]); i = (i + 1) % cw->rows; } while (i != cw->maxcol); putstr(prevmsg_win, 0, toplines); display_nhwindow(prevmsg_win, TRUE); destroy_nhwindow(prevmsg_win); } else if (iflags.prevmsg_window == 'c') { /* combination */ do { morc = 0; if (cw->maxcol == cw->maxrow) { ttyDisplay->dismiss_more = C('p'); /* <ctrl/P> allowed at --More-- */ redotoplin(toplines); cw->maxcol--; if (cw->maxcol < 0) cw->maxcol = cw->rows-1; if (!cw->data[cw->maxcol]) cw->maxcol = cw->maxrow; } else if (cw->maxcol == (cw->maxrow - 1)){ ttyDisplay->dismiss_more = C('p'); /* <ctrl/P> allowed at --More-- */ redotoplin(cw->data[cw->maxcol]); cw->maxcol--; if (cw->maxcol < 0) cw->maxcol = cw->rows-1; if (!cw->data[cw->maxcol]) cw->maxcol = cw->maxrow; } else { prevmsg_win = create_nhwindow(NHW_MENU); putstr(prevmsg_win, 0, "Message History"); putstr(prevmsg_win, 0, ""); cw->maxcol = cw->maxrow; i = cw->maxcol; do { if(cw->data[i] && strcmp(cw->data[i], "") ) putstr(prevmsg_win, 0, cw->data[i]); i = (i + 1) % cw->rows; } while (i != cw->maxcol); putstr(prevmsg_win, 0, toplines); display_nhwindow(prevmsg_win, TRUE); destroy_nhwindow(prevmsg_win); } } while (morc == C('p')); ttyDisplay->dismiss_more = 0; } else { /* reversed */ morc = 0; prevmsg_win = create_nhwindow(NHW_MENU); putstr(prevmsg_win, 0, "Message History"); putstr(prevmsg_win, 0, ""); putstr(prevmsg_win, 0, toplines); cw->maxcol=cw->maxrow-1; if(cw->maxcol < 0) cw->maxcol = cw->rows-1; do { putstr(prevmsg_win, 0, cw->data[cw->maxcol]); cw->maxcol--; if (cw->maxcol < 0) cw->maxcol = cw->rows-1; if (!cw->data[cw->maxcol]) cw->maxcol = cw->maxrow; } while (cw->maxcol != cw->maxrow); display_nhwindow(prevmsg_win, TRUE); destroy_nhwindow(prevmsg_win); cw->maxcol = cw->maxrow; ttyDisplay->dismiss_more = 0; } } else if(iflags.prevmsg_window == 's') { /* single */ ttyDisplay->dismiss_more = C('p'); /* <ctrl/P> allowed at --More-- */ do { morc = 0; if (cw->maxcol == cw->maxrow) redotoplin(toplines); else if (cw->data[cw->maxcol]) redotoplin(cw->data[cw->maxcol]); cw->maxcol--; if (cw->maxcol < 0) cw->maxcol = cw->rows-1; if (!cw->data[cw->maxcol]) cw->maxcol = cw->maxrow; } while (morc == C('p')); ttyDisplay->dismiss_more = 0; } return 0; }
void SOGP::train(const VectorXd &state, const VectorXd &output) { //check if we have initialised the system if (!this->initialized) { throw OTLException("SOGP not yet initialised"); } double kstar = this->kernel->eval(state); //change the output format if this is a classification problem VectorXd mod_output; if (this->problem_type == SOGP::CLASSIFICATION) { mod_output = VectorXd::Zero(this->output_dim); for (unsigned int i=0; i<this->output_dim; i++) { mod_output(i) = -1; } mod_output(output(0)) = 1; } else { mod_output = output; } //we are just starting. if (this->current_size == 0) { this->alpha.block(0,0,1, this->output_dim) = (mod_output.array() / (kstar + this->noise)).transpose(); this->C.block(0,0,1,1) = VectorXd::Ones(1)*-1/(kstar + this->noise); this->Q.block(0,0,1,1) = VectorXd::Ones(1)*1/(kstar); this->basis_vectors.push_back(state); this->current_size++; return; } //Test if this is a "novel" state VectorXd k; this->kernel->eval(state, this->basis_vectors, k); //cache Ck VectorXd Ck = this->C.block(0,0, this->current_size, this->current_size)*k; VectorXd m = k.transpose()*this->alpha.block(0,0,this->current_size, this->output_dim); double s2 = kstar + (k.dot(Ck)); if (s2 < 1e-12) { //std::cout << "s2: " << s2 << std::endl; s2 = 1e-12; } double r = 0.0; VectorXd q; if (this->problem_type == SOGP::REGRESSION) { r = -1.0/(s2 + this->noise); q = (mod_output - m)*(-r); } else if (this->problem_type == SOGP::CLASSIFICATION) { double sx2 = this->noise + s2; double sx = sqrt(sx2); VectorXd z = VectorXd(this->output_dim); VectorXd Erfz = VectorXd(this->output_dim); for (unsigned int i=0; i<this->output_dim; i++) { z(i) = mod_output(i) * m(i) / sx; Erfz(i) = stdnormcdf(z(i)); //dErfz(i) = 1.0/sqrt(2*M_PI)*exp(-(z(i)*z(i))/2.0); //dErfz2(i) = dErfz(i)*(-z(i)); } /* TO CONNTINUE Erfz = Erf(z); dErfz = 1.0/sqrt(2*pi)*exp(-(z.^2)/2); dErfz2 = dErfz.*(-z); q = y/sx * (dErfz/Erfz); r = (1/sx2)*(dErfz2/dErfz - (dErfz/Erfz)^2); */ } else { throw OTL::OTLException("Whoops! My problem type is wrong. How did this happen?"); } VectorXd ehat = this->Q.block(0,0, this->current_size, this->current_size)*k; double gamma = kstar - k.dot(ehat); double eta = 1.0/(1.0 + gamma*r); if (gamma < 1e-12) { gamma = 0.0; } if (gamma >= this->epsilon*kstar) { //perform a full update VectorXd s = Ck; s.conservativeResize(this->current_size + 1); s(this->current_size) = 1; //update Q (inverse of C) ehat.conservativeResize(this->current_size+1); ehat(this->current_size) = -1; MatrixXd diffQ = Q.block(0,0,this->current_size+1, this->current_size+1) + (ehat*ehat.transpose())*(1.0/gamma); Q.block(0,0,this->current_size+1, this->current_size+1) = diffQ; //update alpha MatrixXd diffAlpha = alpha.block(0,0, this->current_size+1, this->output_dim) + (s*q.transpose()); alpha.block(0,0, this->current_size+1, this->output_dim) = diffAlpha; //update C MatrixXd diffC = C.block(0,0, this->current_size+1, this->current_size+1) + r*(s*s.transpose()); C.block(0,0, this->current_size+1, this->current_size+1) = diffC; //add to basis vectors this->basis_vectors.push_back(state); //increment current size this->current_size++; } else { //perform a sparse update VectorXd s = Ck + ehat; //update alpha MatrixXd diffAlpha = alpha.block(0,0, this->current_size, this->output_dim) + s*((q*eta).transpose()); alpha.block(0,0, this->current_size, this->output_dim) = diffAlpha; //update C MatrixXd diffC = C.block(0,0, this->current_size, this->current_size) + r*eta*(s*s.transpose()); C.block(0,0, this->current_size, this->current_size) = diffC; } //check if we need to reduce size if (this->basis_vectors.size() > this->capacity) { //std::cout << "Reduction!" << std::endl; double min_val = (alpha.row(0)).squaredNorm()/(Q(0,0) + C(0,0)); unsigned int min_index = 0; for (unsigned int i=1; i<this->basis_vectors.size(); i++) { double scorei = (alpha.row(i)).squaredNorm()/(Q(i,i) + C(i,i)); if (scorei < min_val) { min_val = scorei; min_index = i; } } this->reduceBasisVectorSet(min_index); } return; }
ThreeBodySM::ValueType ThreeBodySM::evaluateLog(ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L) { LogValue=0.0; RealType dudr, d2udr2; int nc(CenterRef.getTotalNum()), nptcl(P.getTotalNum()); //first fill the matrix AA(i,j) where j is a composite index for(int I=0; I<nc; I++) { BasisType& a(*ieBasis[CenterRef.GroupID[I]]); int offset(0); for(int nn=dist_ie->M[I]; nn<dist_ie->M[I+1]; nn++) { RealType sep(dist_ie->r(nn)); RealType rinv(dist_ie->rinv(nn)); int i(dist_ie->J[nn]); int offset(ieBasisOffset[I]); for(int k=0; k<a.size(); k++,offset++) { AA(i,offset)=a[k]->evaluate(sep,dudr,d2udr2); dudr *= rinv; dAA(i,offset)=dudr*dist_ie->dr(nn); d2AA(i,offset)=d2udr2+2.0*dudr; } } } for(int i=0; i<nptcl; i++) { for(int nn=dist_ee->M[i]; nn<dist_ee->M[i]; nn++) { int j(dist_ee->J[nn]); RealType sep(dist_ee->r(nn)); RealType rinv(dist_ee->rinv(nn)); for(int m=0; m<eeBasis.size(); m++) { RealType psum=0,lapmi=0,lapmj=0; PosType grmi,grmj; for(int I=0; I<nc; I++) { const Matrix<RealType>& cblock(*C(m,CenterRef.GroupID[I])); int offsetI(ieBasisOffSet[I]); for(int k=0; k< ieBasisSize[I],kb=offsetI; k++,kb++) { RealType vall=0,valk=AA(i,kb); for(int l=0; l<ieBasisSize[I],lb=offsetI; l++,lb++) { vall += cblock(k,l)*AA(j,lb); grmj += valk*cblock(k,l)*dAA(j,lb); lapmj += valk*cblock(k,l)*d2AA(j,lb); }//l psum += valk*vall; grmi += dAA(i,kb)*vall; lampi += d2AA(i,kb)*vall; }//k }//I RealType bm =eeBasis[m]->evaluate(sep,dudr,d2udr2); dudr *= rinv; PosType dbm=dudr*dist_ee->dr(nn); RealType d2bm=d2udr2+2.0*dudr; LogValue += bm*psum; G[i] += bm*grmi-dbm*psum; G[j] += bm*grmj+dbm*psum; L[i] += b2bm*psum+bm*lapi; L[j] += b2bm*psum+bm*lapj; } } } return LogValue; }
void bar() { C(1); // CHECK: {{.*}}:17:5: warning: expression result unused }
void B () { C (); }
void MAST::NPSOLOptimizationInterface::optimize() { #if MAST_ENABLE_NPSOL == 1 // make sure that functions have been provided libmesh_assert(_funobj); libmesh_assert(_funcon); int N = _feval->n_vars(), NCLIN = 0, NCNLN = _feval->n_eq()+_feval->n_ineq(), NCTOTL = N+NCLIN+NCNLN, LDA = std::max(NCLIN, 1), LDJ = std::max(NCNLN, 1), LDR = N, INFORM = 0, // on exit: Reports result of call to NPSOL // < 0 either funobj or funcon has set this to -ve // 0 => converged to point x // 1 => x satisfies optimality conditions, but sequence of iterates has not converged // 2 => Linear constraints and bounds cannot be satisfied. No feasible solution // 3 => Nonlinear constraints and bounds cannot be satisfied. No feasible solution // 4 => Major iter limit was reached // 6 => x does not satisfy first-order optimality to required accuracy // 7 => function derivatives seem to be incorrect // 9 => input parameter invalid ITER = 0, // iter count LENIW = 3*N + NCLIN + 2*NCNLN, LENW = 2*N*N + N*NCLIN + 2*N*NCNLN + 20*N + 11*NCLIN + 21*NCNLN; Real F = 0.; // on exit: final objective std::vector<int> IW (LENIW, 0), ISTATE (NCTOTL, 0); // status of constraints l <= r(x) <= u, // -2 => lower bound is violated by more than delta // -1 => upper bound is violated by more than delta // 0 => both bounds are satisfied by more than delta // 1 => lower bound is active (to within delta) // 2 => upper bound is active (to within delta) // 3 => boundars are equal and equality constraint is satisfied std::vector<Real> A (LDA, 0.), // this is used for liear constraints, not currently handled BL (NCTOTL, 0.), BU (NCTOTL, 0.), C (NCNLN, 0.), // on exit: nonlinear constraints CJAC (LDJ* N, 0.), // // on exit: CJAC(i,j) is the partial derivative of ith nonlinear constraint CLAMBDA (NCTOTL, 0.), // on entry: need not be initialized for cold start // on exit: QP multiplier from the QP subproblem, >=0 if istate(j)=1, <0 if istate(j)=2 G (N, 0.), // on exit: objective gradient R (LDR*N, 0.), // on entry: need not be initialized if called with Cold Statrt // on exit: information about Hessian, if Hessian=Yes, R is upper Cholesky factor of approx H X (N, 0.), // on entry: initial point // on exit: final estimate of solution W (LENW, 0.), // workspace xmin (N, 0.), xmax (N, 0.); // now setup the lower and upper limits for the variables and constraints _feval->init_dvar(X, xmin, xmax); for (unsigned int i=0; i<N; i++) { BL[i] = xmin[i]; BU[i] = xmax[i]; } // all constraints are assumed to be g_i(x) <= 0, so that the upper // bound is 0 and lower bound is -infinity for (unsigned int i=0; i<NCNLN; i++) { BL[i+N] = -1.e20; BU[i+N] = 0.; } std::string nm; // nm = "List"; // npoptn_(nm.c_str(), (int)nm.length()); // nm = "Verify level 3"; // npoptn_(nm.c_str(), (int)nm.length()); npsol_(&N, &NCLIN, &NCNLN, &LDA, &LDJ, &LDR, &A[0], &BL[0], &BU[0], _funcon, _funobj, &INFORM, &ITER, &ISTATE[0], &C[0], &CJAC[0], &CLAMBDA[0], &F, &G[0], &R[0], &X[0], &IW[0], &LENIW, &W[0], &LENW); #endif // MAST_ENABLE_NPSOL 1 }
void F4GB::reorder_columns() { // Set up to sort the columns. // Result is an array 0..ncols-1, giving the new order. // Find the inverse of this permutation: place values into "ord" column fields. // Loop through every element of the matrix, changing its comp array. int nrows = INTSIZE(mat->rows); int ncols = INTSIZE(mat->columns); // sort the columns int *column_order = Mem->components.allocate(ncols); int *ord = Mem->components.allocate(ncols); ColumnsSorter C(M, mat); // Actual sort of columns ///////////////// #if 0 //TODO: MES remove the code in this ifdef 0 C.reset_ncomparisons(); clock_t begin_time = clock(); for (int i=0; i<ncols; i++) { column_order[i] = i; } if (M2_gbTrace >= 2) fprintf(stderr, "ncomparisons = "); QuickSorter<ColumnsSorter>::sort(&C, column_order, ncols); clock_t end_time = clock(); if (M2_gbTrace >= 2) fprintf(stderr, "%ld, ", C.ncomparisons()); double nsecs = (double)(end_time - begin_time)/CLOCKS_PER_SEC; clock_sort_columns += nsecs; if (M2_gbTrace >= 2) fprintf(stderr, " time = %f\n", nsecs); #endif // STL version /////////////// C.reset_ncomparisons(); clock_t begin_time0 = clock(); for (int i=0; i<ncols; i++) { column_order[i] = i; } if (M2_gbTrace >= 2) fprintf(stderr, "ncomparisons = "); std::stable_sort(column_order, column_order+ncols, C); clock_t end_time0 = clock(); if (M2_gbTrace >= 2) fprintf(stderr, "%ld, ", C.ncomparisons0()); double nsecs0 = (double)(end_time0 - begin_time0)/CLOCKS_PER_SEC; clock_sort_columns += nsecs0; if (M2_gbTrace >= 2) fprintf(stderr, " time = %f\n", nsecs0); //////////////////////////// for (int i=0; i<ncols; i++) { ord[column_order[i]] = i; } // Now move the columns into position coefficient_matrix::column_array newcols; newcols.reserve(ncols); for (int i=0; i<ncols; i++) { long newc = column_order[i]; newcols.push_back(mat->columns[newc]); } // Now reset the components in each row for (int r=0; r<nrows; r++) { row_elem &row = mat->rows[r]; for (int i=0; i<row.len; i++) { int oldcol = row.comps[i]; int newcol = ord[oldcol]; row.comps[i] = newcol; } for (int i=1; i<row.len; i++) { if (row.comps[i] <= row.comps[i-1]) { fprintf(stderr, "Internal error: array out of order\n"); break; } } } std::swap(mat->columns, newcols); Mem->components.deallocate(column_order); Mem->components.deallocate(ord); }
"dragon", "elemental", "fungus or mold", "gnome", "giant humanoid", 0, "jabberwock", "Keystone Kop", "lich", "mummy", "naga", "ogre", "pudding or ooze", "quantum mechanic", "rust monster or disenchanter", "snake", "troll", "umber hulk", "vampire", "wraith", "xorn", "apelike creature", "zombie", "human or elf", "ghost", "golem", "major demon", "sea monster", "lizard", "long worm tail", "mimic" }; const struct symdef def_warnsyms[WARNCOUNT] = { {'0', "unknown creature causing you worry", C(CLR_WHITE)}, /* white warning */ {'1', "unknown creature causing you concern", C(CLR_RED)}, /* pink warning */ {'2', "unknown creature causing you anxiety", C(CLR_RED)}, /* red warning */ {'3', "unknown creature causing you disquiet", C(CLR_RED)}, /* ruby warning */ {'4', "unknown creature causing you alarm", C(CLR_MAGENTA)}, /* purple warning */ {'5', "unknown creature causing you dread", C(CLR_BRIGHT_MAGENTA)} /* black warning */ }; /* * Default screen symbols with explanations and colors. * Note: {ibm|dec|mac}_graphics[] arrays also depend on this symbol order. */ const struct symdef defsyms[MAXPCHARS] = { /* 0*/ {' ', "unexplored area",C(NO_COLOR)}, /* stone */
/* Subroutine */ int slagts_(int *job, int *n, real *a, real *b, real *c, real *d, int *in, real *y, real *tol, int *info) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLAGTS may be used to solve one of the systems of equations (T - lambda*I)*x = y or (T - lambda*I)'*x = y, where T is an n by n tridiagonal matrix, for x, following the factorization of (T - lambda*I) as (T - lambda*I) = P*L*U , by routine SLAGTF. The choice of equation to be solved is controlled by the argument JOB, and in each case there is an option to perturb zero or very small diagonal elements of U, this option being intended for use in applications such as inverse iteration. Arguments ========= JOB (input) INTEGER Specifies the job to be performed by SLAGTS as follows: = 1: The equations (T - lambda*I)x = y are to be solved, but diagonal elements of U are not to be perturbed. = -1: The equations (T - lambda*I)x = y are to be solved and, if overflow would otherwise occur, the diagonal elements of U are to be perturbed. See argument TOL below. = 2: The equations (T - lambda*I)'x = y are to be solved, but diagonal elements of U are not to be perturbed. = -2: The equations (T - lambda*I)'x = y are to be solved and, if overflow would otherwise occur, the diagonal elements of U are to be perturbed. See argument TOL below. N (input) INTEGER The order of the matrix T. A (input) REAL array, dimension (N) On entry, A must contain the diagonal elements of U as returned from SLAGTF. B (input) REAL array, dimension (N-1) On entry, B must contain the first super-diagonal elements of U as returned from SLAGTF. C (input) REAL array, dimension (N-1) On entry, C must contain the sub-diagonal elements of L as returned from SLAGTF. D (input) REAL array, dimension (N-2) On entry, D must contain the second super-diagonal elements of U as returned from SLAGTF. IN (input) INTEGER array, dimension (N) On entry, IN must contain details of the matrix P as returned from SLAGTF. Y (input/output) REAL array, dimension (N) On entry, the right hand side vector y. On exit, Y is overwritten by the solution vector x. TOL (input/output) REAL On entry, with JOB .lt. 0, TOL should be the minimum perturbation to be made to very small diagonal elements of U. TOL should normally be chosen as about eps*norm(U), where eps is the relative machine precision, but if TOL is supplied as non-positive, then it is reset to eps*max( abs( u(i,j) ) ). If JOB .gt. 0 then TOL is not referenced. On exit, TOL is changed as described above, only if TOL is non-positive on entry. Otherwise TOL is unchanged. INFO (output) INTEGER = 0 : successful exit .lt. 0: if INFO = -i, the i-th argument had an illegal value .gt. 0: overflow would occur when computing the INFO(th) element of the solution vector x. This can only occur when JOB is supplied as positive and either means that a diagonal element of U is very small, or that the elements of the right-hand side vector y are very large. ===================================================================== Parameter adjustments Function Body */ /* System generated locals */ int i__1; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double r_sign(real *, real *); /* Local variables */ static real temp, pert; static int k; static real absak, sfmin, ak; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, int *); static real bignum, eps; #define Y(I) y[(I)-1] #define IN(I) in[(I)-1] #define D(I) d[(I)-1] #define C(I) c[(I)-1] #define B(I) b[(I)-1] #define A(I) a[(I)-1] *info = 0; if (abs(*job) > 2 || *job == 0) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAGTS", &i__1); return 0; } if (*n == 0) { return 0; } eps = slamch_("Epsilon"); sfmin = slamch_("Safe minimum"); bignum = 1.f / sfmin; if (*job < 0) { if (*tol <= 0.f) { *tol = dabs(A(1)); if (*n > 1) { /* Computing MAX */ r__1 = *tol, r__2 = dabs(A(2)), r__1 = max(r__1,r__2), r__2 = dabs(B(1)); *tol = dmax(r__1,r__2); } i__1 = *n; for (k = 3; k <= *n; ++k) { /* Computing MAX */ r__4 = *tol, r__5 = (r__1 = A(k), dabs(r__1)), r__4 = max( r__4,r__5), r__5 = (r__2 = B(k - 1), dabs(r__2)), r__4 = max(r__4,r__5), r__5 = (r__3 = D(k - 2), dabs( r__3)); *tol = dmax(r__4,r__5); /* L10: */ } *tol *= eps; if (*tol == 0.f) { *tol = eps; } } } if (abs(*job) == 1) { i__1 = *n; for (k = 2; k <= *n; ++k) { if (IN(k - 1) == 0) { Y(k) -= C(k - 1) * Y(k - 1); } else { temp = Y(k - 1); Y(k - 1) = Y(k); Y(k) = temp - C(k - 1) * Y(k); } /* L20: */ } if (*job == 1) { for (k = *n; k >= 1; --k) { if (k <= *n - 2) { temp = Y(k) - B(k) * Y(k + 1) - D(k) * Y(k + 2); } else if (k == *n - 1) { temp = Y(k) - B(k) * Y(k + 1); } else { temp = Y(k); } ak = A(k); absak = dabs(ak); if (absak < 1.f) { if (absak < sfmin) { if (absak == 0.f || dabs(temp) * sfmin > absak) { *info = k; return 0; } else { temp *= bignum; ak *= bignum; } } else if (dabs(temp) > absak * bignum) { *info = k; return 0; } } Y(k) = temp / ak; /* L30: */ } } else { for (k = *n; k >= 1; --k) { if (k <= *n - 2) { temp = Y(k) - B(k) * Y(k + 1) - D(k) * Y(k + 2); } else if (k == *n - 1) { temp = Y(k) - B(k) * Y(k + 1); } else { temp = Y(k); } ak = A(k); pert = r_sign(tol, &ak); L40: absak = dabs(ak); if (absak < 1.f) { if (absak < sfmin) { if (absak == 0.f || dabs(temp) * sfmin > absak) { ak += pert; pert *= 2; goto L40; } else { temp *= bignum; ak *= bignum; } } else if (dabs(temp) > absak * bignum) { ak += pert; pert *= 2; goto L40; } } Y(k) = temp / ak; /* L50: */ } } } else { /* Come to here if JOB = 2 or -2 */ if (*job == 2) { i__1 = *n; for (k = 1; k <= *n; ++k) { if (k >= 3) { temp = Y(k) - B(k - 1) * Y(k - 1) - D(k - 2) * Y(k - 2); } else if (k == 2) { temp = Y(k) - B(k - 1) * Y(k - 1); } else { temp = Y(k); } ak = A(k); absak = dabs(ak); if (absak < 1.f) { if (absak < sfmin) { if (absak == 0.f || dabs(temp) * sfmin > absak) { *info = k; return 0; } else { temp *= bignum; ak *= bignum; } } else if (dabs(temp) > absak * bignum) { *info = k; return 0; } } Y(k) = temp / ak; /* L60: */ } } else { i__1 = *n; for (k = 1; k <= *n; ++k) { if (k >= 3) { temp = Y(k) - B(k - 1) * Y(k - 1) - D(k - 2) * Y(k - 2); } else if (k == 2) { temp = Y(k) - B(k - 1) * Y(k - 1); } else { temp = Y(k); } ak = A(k); pert = r_sign(tol, &ak); L70: absak = dabs(ak); if (absak < 1.f) { if (absak < sfmin) { if (absak == 0.f || dabs(temp) * sfmin > absak) { ak += pert; pert *= 2; goto L70; } else { temp *= bignum; ak *= bignum; } } else if (dabs(temp) > absak * bignum) { ak += pert; pert *= 2; goto L70; } } Y(k) = temp / ak; /* L80: */ } } for (k = *n; k >= 2; --k) { if (IN(k - 1) == 0) { Y(k - 1) -= C(k - 1) * Y(k); } else { temp = Y(k - 1); Y(k - 1) = Y(k); Y(k) = temp - C(k - 1) * Y(k); } /* L90: */ } } /* End of SLAGTS */ return 0; } /* slagts_ */