/* * \brief Scale matrix with from/to avoiding overflows/underflows. * * \param[in,out] A * Matrix to scale * \param[in] from, to * Scaling parameters, multiplied with from/to. * \param[in] flags * Matrix type bits ARMAS_LOWER, ARMAS_UPPER, ARMAS_SYMM * * lapack.xLASCL */ int armas_x_scale_to(armas_x_dense_t *A, DTYPE from, DTYPE to, int flags, armas_conf_t *conf) { DTYPE cfrom1, cto1, mul; int ready = 0; if (A->rows == 0 || A->cols == 0) return 0; do { cfrom1 = from*__SAFEMIN; if (cfrom1 == from) { mul = to/from; ready = 1; cto1 = to; } else { cto1 = to/__SAFEMAX; if (cto1 == to) { mul = to; ready = 1; from = 1; } else if (__ABS(cfrom1) > __ABS(to) && to != __ZERO) { mul = __SAFEMIN; from = cfrom1; } else if (__ABS(cto1) > __ABS(from)) { mul = __SAFEMAX; to = cto1; } else { mul = to/from; ready = 1; } } armas_x_mscale(A, mul, flags); } while (! ready); return 0; }
static DTYPE __vec_asum_kahan(const mvec_t *X, int N) { register int k; register DTYPE c0, s0, c1, s1; register DTYPE t0, y0, t1, y1; c0 = c1 = s0 = s1 = __ZERO; for (k = 0; k < N-1; k += 2) { y0 = __ABS(X->md[(k+0)*X->inc]) - c0; t0 = s0 + y0; c0 = (t0 - s0) - y0; s0 = t0; y1 = __ABS(X->md[(k+1)*X->inc]) - c1; t1 = s1 + y1; c1 = (t1 - s1) - y1; s1 = t1; } if (k == N) return s0 + s1; y0 = __ABS(X->md[(k+0)*X->inc]) - c0; t0 = s0 + y0; c0 = (t0 - s0) - y0; s0 = t0; return s0 + s1; }
static inline int __vec_iamin(const mvec_t *X, int N) { register int i, ix, n; register ABSTYPE min, c0, c1; if (N <= 1) return 0; min = __ABS(X->md[0]); ix = 0; for (i = 0; i < N-1; i += 2) { c0 = __ABS(X->md[(i+0)*X->inc]); c1 = __ABS(X->md[(i+1)*X->inc]); if (c1 < c0) { n = 1; c0 = c1; } if (c0 < min) { ix = i+n; min = c0; } n = 0; } if (i < N) { c0 = __ABS(X->md[i*X->inc]); ix = c0 < min ? N-1 : ix; } return ix; }
// return index of max absolute value int __vec_iamax(const mvec_t *X, int N) { register int i, ix, n; register ABSTYPE max, c0, c1; if (N <= 1) return 0; max = 0.0; ix = 0; for (i = 0; i < N-1; i += 2) { c0 = __ABS(X->md[(i+0)*X->inc]); c1 = __ABS(X->md[(i+1)*X->inc]); if (c1 > c0) { n = 1; c0 = c1; } if (c0 > max) { ix = i+n; max = c0; } n = 0; } if (i < N) { c0 = __ABS(X->md[i*X->inc]); ix = c0 > max ? N-1 : ix; } return ix; }
/* * \brief Compute shift for 2x2 bidiagonal matrix * * Computes Golub-Kahan BSVD shift for 2x2 matrix T = B.T*B * * ( e 0 ) * B = ( f g ) T = B.T*B = ( f*f+e*e f*g ) * ( 0 h ) ( f*g g*g+h*h ) */ static DTYPE compute_bsvd_shift(DTYPE e, DTYPE f, DTYPE g, DTYPE h) { DTYPE e1, e2, a, b, c, T, D; a = f*f+e*e; b = g*f; c = h*h+g*g; T = (a + c)/2.0; D = a*c - b*b; armas_x_qdroots(&e1, &e2, 1.0, T, D); if (__ABS(e1)-c < __ABS(e2)-c) return e1; return e2; }
/** * @brief Element-wise equality with in tolerances * * Test if \f$A == B\f$ within given tolerances. Elements are considered equal if * * \f$|A_{i,j} - B_{i,j}| <= atol + rtol*B_{i,j}\f$ * * @param [in] A, B * Matrices to compare element wise * @param [in] atol * Absolute tolerance * @param [in] rtol * Relative tolerance * * @retval 0 not equal * @retval 1 equal * \ingroup matrix */ int armas_x_intolerance(const armas_x_dense_t *A, const armas_x_dense_t *B, ABSTYPE atol, ABSTYPE rtol) { register int i, j; ABSTYPE df, ref; if (A->rows != B->rows || A->cols != B->cols) return 0; for (j = 0; j < A->cols; j++) { for (i = 0; i < A->rows; i++) { df = __ABS(A->elems[i+j*A->step] - B->elems[i+j*B->step]); ref = atol + rtol * __ABS(B->elems[i+j*B->step]); if (df > ref) return 0; } } return 1; }
/** * @brief Maximum absolute value of vector. */ ABSTYPE armas_x_amax(const armas_x_dense_t *x, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); int imax = armas_x_iamax(x, conf); if (imax != -1) { int r = x->rows == 0 ? 0 : imax; int c = x->rows == 0 ? imax : 0; return __ABS(armas_x_get(x, r, c)); } return __ZERO; }
// return sum of absolute values static inline ABSTYPE __vec_asum(const mvec_t *X, int N) { register int i, k; register ABSTYPE c0, c1, c2, c3, a0, a1, a2, a3; register DTYPE z0, z1, z2, z3; c0 = c1 = c2 = c3 = 0.0; for (i = 0; i < N-3; i += 4) { z0 = X->md[(i+0)*X->inc]; z1 = X->md[(i+1)*X->inc]; z2 = X->md[(i+2)*X->inc]; z3 = X->md[(i+3)*X->inc]; a0 = __ABS(z0); a1 = __ABS(z1); a2 = __ABS(z2); a3 = __ABS(z3); c0 += a0; c1 += a1; c2 += a2; c3 += a3; } if (i == N) goto update; k = i*X->inc; switch (N-i) { case 3: c0 += __ABS(X->md[k]); k += X->inc; case 2: c1 += __ABS(X->md[k]); k += X->inc; case 1: c2 += __ABS(X->md[k]); } update: return c0 + c1 + c2 + c3; }
bool CJqBoard::MoveQiZi1(int iTo,int jTo,int iFrom,int jFrom) { Msg("Test:Step a iTo %d, jTo %d, iFrom %d jFrom %d",iTo, jTo, iFrom, jFrom); if( !bIsInBoard(iTo,jTo) || !bIsInBoard(iFrom,jFrom))return false; g_startqi=mBoard[iFrom][jFrom]; ///都在盘内 int di=__ABS(iTo - iFrom), dj=__ABS(jTo - jFrom); if( di == 0 && dj == 0)return false; ///两子不在同一位置 int fQi=mBoard[iFrom][jFrom],tQi=mBoard[iTo][jTo]; if(!IsCanMove2Qi(tQi,fQi))///子不能吃或移动 return false; Msg("Test:Step: o empty EmptyStepLine"); EmptyStepLine(); g_StepLine.Push (iFrom, jFrom);//add bool f=bIsInJuYin(iFrom,jFrom),t=bIsInJuYin(iTo,jTo); if(f || t)///有一方或双方都在军营 { if(t && tQi != JQ_TYPE_NONE)return false;///在军营里的子不能吃 if( di > 1 || dj > 1)return false; g_StepLine.Push (iTo, jTo);//add return true; } ///都不在军营 Msg("Test:Step: p empty EmptyStepLine"); EmptyStepLine(); g_StepLine.Push (iFrom, jFrom);//add f=bIsInRailway(iFrom,jFrom);t=bIsInRailway(iTo,jTo); if(!f || !t)///有一方或双方都不在铁道线 { if( di > 1 || dj > 1)return false; bool bb= di == 0 || dj == 0; if(bb) { g_StepLine.Push (iTo, jTo);//add return true; } } ///都在铁道线 Msg("Test:Step: q empty EmptyStepLine"); EmptyStepLine(); ////走直线 if(di == 0) { Msg("Test:Step: b [%d,%d]", iFrom, jFrom); g_StepLine.Push (iFrom, jFrom); if( CanGoJ(jFrom,jTo,iFrom)) { g_StepLine.Push (iTo, jTo);//add return true; } if(GET_A_JQ_NAME( fQi) != JQ_TYPE_NAME_GB) return false; } Msg("Test:Step: r empty EmptyStepLine"); EmptyStepLine(); if(dj == 0) { Msg("Test:Step: c [%d,%d]", iFrom, jFrom); g_StepLine.Push (iFrom, jFrom);//add if( CanGoI(iFrom,iTo,jFrom)) { g_StepLine.Push (iTo, jTo);//add return true; } if(GET_A_JQ_NAME(fQi) != JQ_TYPE_NAME_GB) return false; } Msg("Test:Step: s empty EmptyStepLine"); EmptyStepLine(); g_StepLine.Push (iFrom, jFrom);//add ////走弯线 if(CanGoCircle(iTo,jTo,iFrom,jFrom)) { g_StepLine.Push (iTo, jTo);//add return true; } Msg("Test:Step: t empty EmptyStepLine"); EmptyStepLine(); g_StepLine.Push (iTo, jTo);//add //bool bResult = bGBGoRailway(iTo,jTo,iFrom,jFrom); bool bResult = bCanArrival(iTo,jTo,iFrom,jFrom); g_StepLine.Push (iFrom, jFrom);//add return bResult; }
double SYSTEM_ABSD(double i) {return __ABS(i);}
LONGINT SYSTEM_ABS (LONGINT i) {return __ABS(i);}
static inline void __mmul_lower_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC) { register int i, j, k, lda, ldb; DTYPE *a, *b; a = armas_x_data(A); b = armas_x_data(B); lda = A->step; ldb = B->step; for (j = 0; j < nC-3; j += 4) { // top triangle for (k = j; k < j+3; k++) { for (i = k; i < j+3 && i < nR; i++) { a[i+k*lda] = __ABS(a[i+k*lda]) * __ABS(b[i+k*ldb]); } } // rest of the column block for (i = j+3; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]); a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[i+(j+1)*ldb]); a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[i+(j+2)*ldb]); a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[i+(j+3)*ldb]); } } if (j == nC) return; for (; j < nC; j++) { for (i = j; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]); } } }
static inline void __mmul_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC, int flags) { register int i, j, lda, ldb; DTYPE *a, *b; a = armas_x_data(A); b = armas_x_data(B); lda = A->step; ldb = B->step; if (flags & (ARMAS_TRANS|ARMAS_TRANSB)) { for (j = 0; j < nC-3; j += 4) { // top column block for (i = 0; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[(j+0)+i*ldb]); a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[(j+1)+i*ldb]); a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[(j+2)+i*ldb]); a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[(j+3)+i*ldb]); } } if (j == nC) return; for (; j < nC; j++) { for (i = 0; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[(j+0)+i*ldb]); } } return; } for (j = 0; j < nC-3; j += 4) { // top column block for (i = 0; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]); a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[i+(j+1)*ldb]); a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[i+(j+2)*ldb]); a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[i+(j+3)*ldb]); } } if (j == nC) return; for (; j < nC; j++) { for (i = 0; i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]); } } }
static inline void __msub_upper_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC) { register int i, j, k, lda, ldb; DTYPE *a, *b; a = armas_x_data(A); b = armas_x_data(B); lda = A->step; ldb = B->step; for (j = 0; j < nC-3; j += 4) { // top column block for (i = 0; i <= j && i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) - __ABS(b[i+(j+0)*ldb]); a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) - __ABS(b[i+(j+1)*ldb]); a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) - __ABS(b[i+(j+2)*ldb]); a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) - __ABS(b[i+(j+3)*ldb]); } // bottom triangle for (i = j+1; i < j+4 && i < nR; i++) { for (k = i; k < j+4; k++) { a[i+k*lda] = __ABS(a[i+k*lda]) - __ABS(b[i+k*ldb]); } } } if (j == nC) return; for (; j < nC; j++) { for (i = 0; i <= j && i < nR; i++) { a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) - __ABS(b[i+(j+0)*ldb]); } } }
/* * Z = q0, q1, ..., qn1, qn (N columns, 4 rows) * qq0, qq1, ..., qqn1, qqn * e0, e1, ..., en1, 0 * ee0, ee1, ..., een1, 0 * * \return number of deflations */ static int __dqds_goodstep(DTYPE *ssum, armas_x_dense_t *Z, int N, int pp, dmin_data_t *dmind) { armas_x_dense_t sq, dq, se, de; int n, ncnt, nfail, newseg; DTYPE x, y, q0, q1, sigma, tau; EMPTY(de); EMPTY(dq); EMPTY(se); EMPTY(sq); sigma = *ssum; newseg = __SIGN(dmind->dmin); //printf("..[goodstep] entering ping=%d, N=%d, newseg=%d, sigma=%e, dmin=%e\n", // pp, N, newseg, sigma, dmind->dmin); armas_x_row(&sq, Z, pp); armas_x_row(&dq, Z, 1 - pp); armas_x_row(&se, Z, 2 + pp); armas_x_row(&de, Z, 3 - pp); dmind->niter++; if (N == 1) { armas_x_set_unsafe(Z, 0, 0, armas_x_get_at_unsafe(&sq, 0)+(sigma+dmind->cterm)); //printf("..[goodstep] deflated single valued vector eig=%9.6f\n", __SQRT(armas_x_get_unsafe(Z, 0, 0))); return 1; } // 1. Look for neglible E-values ncnt = 0; if (! newseg) { do { n = __dqds_neglible(Z, N-ncnt, pp, sigma+dmind->cterm); ncnt += n; } while (n != 0 && N-ncnt > 0); } if (N-ncnt == 0) { //printf("..[goodstep] deflated (%d) to zero length\n", ncnt); return ncnt; } // 2 test flipping 1.5*q(0) < q(N-1) if new segment or deflated values if (newseg || ncnt > 0) { q0 = armas_x_get_at_unsafe(&sq, 0); q1 = armas_x_get_at_unsafe(&sq, N-ncnt-1); if (CFLIP*q0 < q1) { //printf("..[goodstep] need flipping.. [0, %d]\n", N-ncnt-1); __dqds_flip(Z, N-ncnt); } } // 3a. if no overflow or no new segment, choose shift __dqds_shift(&tau, &sq, &se, &dq, &de, N-ncnt, ncnt, dmind); //printf("..[goodstep]: tau=%e [type=%d, dmin=%e,%e, dmin1=%e]\n", tau, t, dmind->ttype, dmind->dmin, dmind->dn, dmind->dmin1); // 4. run dqds until dmin > 0 nfail = 0; do { __dqds_sweep(&dq, &de, &sq, &se, N-ncnt, tau, dmind); if (dmind->dmin < __ZERO) { // failure here DTYPE en1 = armas_x_get_at_unsafe(&de, N-ncnt-2); if (dmind->dmin1 > __ZERO && en1 < __EPS*(sigma+dmind->dn1) && __ABS(dmind->dn) < __EPS*sigma) { // convergence masked by negative d (section 6.4) armas_x_set_at_unsafe(&dq, N-ncnt-1, __ZERO); dmind->dmin = __ZERO; //printf("..[masked] dmin1 > %e, setting qn to zero.!\n", dmind->dmin1); // break out from loop //break; } nfail++; if (nfail > 1) { tau = __ZERO; } else if (dmind->dmin1 > __ZERO) { // late failure tau = (tau + dmind->dmin)*(1.0 - 2.0*__EPS); dmind->ttype -= 11; } else { tau *= 0.25; dmind->ttype -= 12; } //printf("..failure[%d]: tau=%e\n", nfail, tau); dmind->niter++; dmind->nfail++; } } while (dmind->dmin < __ZERO || dmind->dmin1 < __ZERO); // 5. update sigma; sequence of tau values. // use cascaded summation from (2), algorithm 4.1 // this here is one step of the algorithm, error term // is summated to dmind->cterm twosum(&x, &y, *ssum, tau); //printf("..[goodstep] 2sum x=%13e, y=%13e, a=%13e, b=%13e\n", x, y, *ssum, tau); *ssum = x; dmind->cterm += y; //printf("..[goodstep] 2sum c=%13e, eig=%13e\n", dmind->cterm, __SQRT(x+dmind->cterm)); return ncnt; }
/* * Golub-Kahan-Reisch SVD algorithm as described in * - Hogben, Handbook of Linear Algebra 2007, 45.2 algorithm 1b,1c * - Golub, Matrix Computation, 3rd ed. section 8.6.2 */ int __bdsvd_golub(armas_x_dense_t *D, armas_x_dense_t *E, armas_x_dense_t *U, armas_x_dense_t *V, armas_x_dense_t *CS, DTYPE tol, armas_conf_t *conf) { int N, work, lc, i, n, nrot, ip, iq, zeros, saves; DTYPE e0, e1, d0, d1, dp, c, s, r, ushift; armas_x_dense_t sD, sE, Cr, Sr, Cl, Sl; EMPTY(sD); EMPTY(sE); N = armas_x_size(D); lc = 6*N*N; saves = 0; if (U || V) { armas_x_subvector(&Cr, CS, 0, N); armas_x_subvector(&Sr, CS, N, N); armas_x_subvector(&Cl, CS, 2*N, N); armas_x_subvector(&Sl, CS, 3*N, N); saves = 1; } ip = 0; iq = N; for (work = 1, n = 0; work && lc > 0; lc--, n++) { // convergence: |E(i)| < epsilon*(|D(i)| + D(i+1)|) => E(i) = 0.0 for (i = 0; i < iq-1; i++) { e1 = armas_x_get_at_unsafe(E, i); if (e1 == 0.0) continue; d0 = armas_x_get_at_unsafe(D, i); d1 = armas_x_get_at_unsafe(D, i+1); dp = __ABS(d0) + __ABS(d1); if (__ABS(e1) < tol*dp) { armas_x_set_at(E, i, 0.0); } } // step: divide to blocks A00, A11, A22 where A22 is diagonal // and A11 has no zero off-diagonals (This should be intergrated with // the deflation loop above) find_diagonal_blocks(&ip, &iq, D, E); if (iq == 0) { work = 0; break; } // from column iq already diagonal; work ip:iq zeros = 0; for (i = ip; i < iq; i++) { d0 = armas_x_get_at(D, i); if (d0 == 0.0) { e1 = armas_x_get_at(E, i); armas_x_gvcompute(&c, &s, &r, d0, e1); armas_x_set_at_unsafe(D, i, r); armas_x_set_at_unsafe(E, i, 0.0); zeros = 1; } } if (zeros) continue; if ((iq - ip) == 2) { // 2x2 block DTYPE smin, smax, cosl, sinl, cosr, sinr; d0 = armas_x_get_at_unsafe(D, ip); d1 = armas_x_get_at_unsafe(D, ip+1); e1 = armas_x_get_at_unsafe(E, ip); __bdsvd2x2_vec(&smin, &smax, &cosl, &sinl, &cosr, &sinr, d0, e1, d1); armas_x_set_at_unsafe(D, ip, smax); armas_x_set_at_unsafe(D, ip+1, smin); armas_x_set_at_unsafe(E, ip, __ZERO); if (U) { armas_x_gvright(U, cosl, sinl, ip, ip+1, 0, U->rows); } if (V) { armas_x_gvleft(V, cosr, sinr, ip, ip+1, 0, V->cols); } continue; } armas_x_subvector(&sD, D, ip, iq-ip); armas_x_subvector(&sE, E, ip, iq-ip-1); // get elements to compute shift from trailing B.T*T 2x2 matrix d0 = armas_x_get_at_unsafe(D, iq-2); d1 = armas_x_get_at_unsafe(D, iq-1); e0 = iq < 3 ? 0.0 :armas_x_get_at_unsafe(E, iq-3); e1 = armas_x_get_at_unsafe(E, iq-2); ushift = compute_bsvd_shift(e0, d0, e1, d1); d0 = armas_x_get_at_unsafe(D, ip); e0 = armas_x_get_at_unsafe(E, ip); nrot = __bd_qrsweep(&sD, &sE, &Cr, &Sr, &Cl, &Sl, d0*d0-ushift, d0*e0, saves); // update if (U) { armas_x_gvupdate(U, ip, &Cl, &Sl, nrot, ARMAS_RIGHT); } if (V) { armas_x_gvupdate(V, ip, &Cr, &Sr, nrot, ARMAS_LEFT); } } if (lc > 0) { // finished properly for (i = 0; i < N; i++) { d0 = armas_x_get_at_unsafe(D, i); if (d0 < 0) { armas_x_set_at_unsafe(D, i, -d0); if (V) { armas_x_row(&sD, V, i); armas_x_scale(&sD, -1.0, conf); } } } } return lc > 0 ? 0 : -1; }