Example #1
0
/*
 * \brief Scale matrix with from/to avoiding overflows/underflows.
 *
 * \param[in,out] A
 *      Matrix to scale
 * \param[in] from, to
 *      Scaling parameters, multiplied with from/to.
 * \param[in] flags
 *      Matrix type bits ARMAS_LOWER, ARMAS_UPPER, ARMAS_SYMM
 *
 * lapack.xLASCL
 */
int armas_x_scale_to(armas_x_dense_t *A, DTYPE from, DTYPE to, int flags, armas_conf_t *conf)
{
    DTYPE cfrom1, cto1, mul;
    int ready = 0;

    if (A->rows == 0 || A->cols == 0)
        return 0;

    do {
        cfrom1 = from*__SAFEMIN;
        if (cfrom1 == from) {
            mul = to/from;
            ready = 1;
            cto1 = to;
        } else {
            cto1 = to/__SAFEMAX;
            if (cto1 == to) {
                mul = to;
                ready = 1;
                from = 1;
            } else if (__ABS(cfrom1) > __ABS(to) && to != __ZERO) {
                mul = __SAFEMIN;
                from = cfrom1;
            } else if (__ABS(cto1) > __ABS(from)) {
                mul = __SAFEMAX;
                to = cto1;
            } else {
                mul = to/from;
                ready = 1;
            }
        }
        armas_x_mscale(A, mul, flags);
    } while (! ready);
    return 0;
}
Example #2
0
static
DTYPE __vec_asum_kahan(const mvec_t *X, int N)
{
  register int k;
  register DTYPE c0, s0, c1, s1;
  register DTYPE t0, y0, t1, y1;

  c0 = c1 = s0 = s1 = __ZERO;
  for (k = 0; k < N-1; k += 2) {
    y0 = __ABS(X->md[(k+0)*X->inc]) - c0;
    t0 = s0 + y0;
    c0 = (t0 - s0) - y0;
    s0 = t0;

    y1 = __ABS(X->md[(k+1)*X->inc]) - c1;
    t1 = s1 + y1;
    c1 = (t1 - s1) - y1;
    s1 = t1;
  }
  if (k == N)
    return s0 + s1;

  y0 = __ABS(X->md[(k+0)*X->inc]) - c0;
  t0 = s0 + y0;
  c0 = (t0 - s0) - y0;
  s0 = t0;
  return s0 + s1;
}
Example #3
0
static inline
int __vec_iamin(const mvec_t *X,  int N)
{
  register int i, ix, n;
  register ABSTYPE min, c0, c1;

  if (N <= 1)
    return 0;

  min = __ABS(X->md[0]);
  ix = 0;
  for (i = 0; i < N-1; i += 2) {
    c0 = __ABS(X->md[(i+0)*X->inc]);
    c1 = __ABS(X->md[(i+1)*X->inc]);
    if (c1 < c0) {
      n = 1;
      c0 = c1;
    }
    if (c0 < min) {
      ix = i+n;
      min = c0;
    }
    n = 0;
  }    
  if (i < N) {
    c0 = __ABS(X->md[i*X->inc]);
    ix = c0 < min ? N-1 : ix;
  }
  return ix;
}
Example #4
0
// return index of max absolute value
int __vec_iamax(const mvec_t *X,  int N)
{
  register int i, ix, n;
  register ABSTYPE max, c0, c1;

  if (N <= 1)
    return 0;

  max = 0.0;
  ix = 0;
  for (i = 0; i < N-1; i += 2) {
    c0 = __ABS(X->md[(i+0)*X->inc]);
    c1 = __ABS(X->md[(i+1)*X->inc]);
    if (c1 > c0) {
      n = 1;
      c0 = c1;
    }
    if (c0 > max) {
      ix = i+n;
      max = c0;
    }
    n = 0;
  }    
  if (i < N) {
    c0 = __ABS(X->md[i*X->inc]);
    ix = c0 > max ? N-1 : ix;
  }
  return ix;
}
Example #5
0
/*
 * \brief Compute shift for 2x2 bidiagonal matrix
 *
 * Computes Golub-Kahan BSVD shift for 2x2 matrix T = B.T*B
 *
 *      ( e  0 )
 *  B = ( f  g )  T = B.T*B = ( f*f+e*e    f*g   )
 *      ( 0  h )              (   f*g    g*g+h*h )
 */
static
DTYPE compute_bsvd_shift(DTYPE e, DTYPE f, DTYPE g, DTYPE h)
{
    DTYPE e1, e2, a, b, c, T, D;
    a = f*f+e*e;
    b = g*f;
    c = h*h+g*g;
    T = (a + c)/2.0;
    D = a*c - b*b;
    armas_x_qdroots(&e1, &e2, 1.0, T, D);
    if (__ABS(e1)-c < __ABS(e2)-c)
        return e1;
    return e2;
}
Example #6
0
/**
 * @brief Element-wise equality with in tolerances
 *
 * Test if \f$A == B\f$ within given tolerances. Elements are considered equal if
 *
 *  \f$|A_{i,j} - B_{i,j}| <= atol + rtol*B_{i,j}\f$
 *
 * @param [in] A, B 
 *     Matrices to compare element wise
 * @param [in] atol 
 *     Absolute tolerance
 * @param [in] rtol 
 *     Relative tolerance
 *
 * @retval 0 not equal
 * @retval 1 equal
 * \ingroup matrix
 */
int armas_x_intolerance(const armas_x_dense_t *A, const armas_x_dense_t *B, ABSTYPE atol, ABSTYPE rtol)
{
  register int i, j;
  ABSTYPE df, ref;

  if (A->rows != B->rows || A->cols != B->cols)
    return 0;

  for (j = 0; j < A->cols; j++) {
    for (i = 0; i < A->rows; i++) {
      df = __ABS(A->elems[i+j*A->step] - B->elems[i+j*B->step]);
      ref = atol + rtol * __ABS(B->elems[i+j*B->step]);
      if (df > ref)
        return 0;
    }
  }
  return 1;
}
Example #7
0
/**
 * @brief Maximum absolute value of vector.
 */
ABSTYPE armas_x_amax(const armas_x_dense_t *x, armas_conf_t *conf)
{
  if (!conf)
    conf = armas_conf_default();

  int imax = armas_x_iamax(x, conf);
  if (imax != -1) {
    int r = x->rows == 0 ? 0    : imax;
    int c = x->rows == 0 ? imax : 0;
    return __ABS(armas_x_get(x, r, c));
  }
  return __ZERO;
}
Example #8
0
// return sum of absolute values
static inline
ABSTYPE __vec_asum(const mvec_t *X,  int N)
{
  register int i, k;
  register ABSTYPE c0, c1, c2, c3, a0, a1, a2, a3;
  register DTYPE z0, z1, z2, z3;

  c0 = c1 = c2 = c3 = 0.0;
  for (i = 0; i < N-3; i += 4) {
    z0 = X->md[(i+0)*X->inc];
    z1 = X->md[(i+1)*X->inc];
    z2 = X->md[(i+2)*X->inc];
    z3 = X->md[(i+3)*X->inc];
    a0 = __ABS(z0);
    a1 = __ABS(z1);
    a2 = __ABS(z2);
    a3 = __ABS(z3);
    c0 += a0;
    c1 += a1;
    c2 += a2;
    c3 += a3;
  }    
  if (i == N)
    goto update;
  
  k = i*X->inc;
  switch (N-i) {
  case 3:
    c0 += __ABS(X->md[k]);
    k += X->inc;
  case 2:
    c1 += __ABS(X->md[k]);
    k += X->inc;
  case 1:
    c2 += __ABS(X->md[k]);
  }
 update:
  return c0 + c1 + c2 + c3;
}
Example #9
0
bool CJqBoard::MoveQiZi1(int iTo,int jTo,int iFrom,int jFrom)
{
	Msg("Test:Step a iTo %d, jTo %d, iFrom %d jFrom %d",iTo, jTo, iFrom, jFrom);
	if( !bIsInBoard(iTo,jTo) || !bIsInBoard(iFrom,jFrom))return false;
	g_startqi=mBoard[iFrom][jFrom];
	///都在盘内
	int di=__ABS(iTo - iFrom), dj=__ABS(jTo - jFrom);
	if( di == 0 && dj == 0)return false;
	///两子不在同一位置

	int fQi=mBoard[iFrom][jFrom],tQi=mBoard[iTo][jTo];
	if(!IsCanMove2Qi(tQi,fQi))///子不能吃或移动
		return false;

	Msg("Test:Step: o empty EmptyStepLine");
	EmptyStepLine();
	g_StepLine.Push (iFrom, jFrom);//add
	bool f=bIsInJuYin(iFrom,jFrom),t=bIsInJuYin(iTo,jTo);
	
	if(f || t)///有一方或双方都在军营
	{
		if(t && tQi != JQ_TYPE_NONE)return false;///在军营里的子不能吃		
		if( di > 1 || dj > 1)return false;

		g_StepLine.Push (iTo, jTo);//add
		return true;
	}

	///都不在军营
	Msg("Test:Step: p empty EmptyStepLine");
	EmptyStepLine();
	g_StepLine.Push (iFrom, jFrom);//add
	f=bIsInRailway(iFrom,jFrom);t=bIsInRailway(iTo,jTo);

	if(!f || !t)///有一方或双方都不在铁道线
	{
		if( di > 1 || dj > 1)return false;
		bool bb= di == 0 || dj == 0;
		if(bb)
		{
			g_StepLine.Push (iTo, jTo);//add
			return true;
		}
		
	}

	///都在铁道线
	Msg("Test:Step: q empty EmptyStepLine");
	EmptyStepLine();

	////走直线
	if(di == 0)
	{
		Msg("Test:Step: b [%d,%d]", iFrom, jFrom);
		g_StepLine.Push (iFrom, jFrom);
		if( CanGoJ(jFrom,jTo,iFrom))
		{
			g_StepLine.Push (iTo, jTo);//add
			return true;
		}
		if(GET_A_JQ_NAME( fQi) != JQ_TYPE_NAME_GB)
			return false;
	}

	Msg("Test:Step: r empty EmptyStepLine");
	EmptyStepLine();
	if(dj == 0)
	{
		Msg("Test:Step: c [%d,%d]", iFrom, jFrom);
		g_StepLine.Push (iFrom, jFrom);//add
		if( CanGoI(iFrom,iTo,jFrom))
		{
			g_StepLine.Push (iTo, jTo);//add
			return true;
		}
		
		if(GET_A_JQ_NAME(fQi) != JQ_TYPE_NAME_GB)
			return false;
	}

	Msg("Test:Step: s empty EmptyStepLine");
	EmptyStepLine();
	g_StepLine.Push (iFrom, jFrom);//add

	////走弯线
	if(CanGoCircle(iTo,jTo,iFrom,jFrom))
	{
		g_StepLine.Push (iTo, jTo);//add
		return true;
	}

	Msg("Test:Step: t empty EmptyStepLine");
	EmptyStepLine();

	g_StepLine.Push (iTo, jTo);//add
	//bool bResult = bGBGoRailway(iTo,jTo,iFrom,jFrom);
	bool bResult = bCanArrival(iTo,jTo,iFrom,jFrom);
	g_StepLine.Push (iFrom, jFrom);//add

	return bResult;
}
Example #10
0
double  SYSTEM_ABSD(double i)              {return __ABS(i);}
Example #11
0
LONGINT SYSTEM_ABS (LONGINT i)             {return __ABS(i);}
Example #12
0
static inline
void __mmul_lower_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC)
{
  register int i, j, k, lda, ldb;
  DTYPE *a, *b;

  a = armas_x_data(A);
  b = armas_x_data(B);
  lda = A->step; ldb = B->step;

  for (j = 0; j < nC-3; j += 4) {
    // top triangle
    for (k = j; k < j+3; k++) {
      for (i = k; i < j+3 && i < nR; i++) {
        a[i+k*lda] = __ABS(a[i+k*lda]) * __ABS(b[i+k*ldb]);
      }
    }
    // rest of the column block
    for (i = j+3; i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]);
      a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[i+(j+1)*ldb]);
      a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[i+(j+2)*ldb]);
      a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[i+(j+3)*ldb]);
    }
  }
  if (j == nC)
    return;
  for (; j < nC; j++) {
    for (i = j; i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]);
    }
  }
}
Example #13
0
static inline
void __mmul_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC, int flags)
{
  register int i, j, lda, ldb;
  DTYPE *a, *b;

  a = armas_x_data(A);
  b = armas_x_data(B);
  lda = A->step; ldb = B->step;

  if (flags & (ARMAS_TRANS|ARMAS_TRANSB)) {
    for (j = 0; j < nC-3; j += 4) {
      // top column block
      for (i = 0; i < nR; i++) {
        a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[(j+0)+i*ldb]);
        a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[(j+1)+i*ldb]);
        a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[(j+2)+i*ldb]);
        a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[(j+3)+i*ldb]);
      }
    }
    if (j == nC)
      return;
    for (; j < nC; j++) {
      for (i = 0; i < nR; i++) {
        a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[(j+0)+i*ldb]);
      }
    }
    return;
  }

  for (j = 0; j < nC-3; j += 4) {
    // top column block
    for (i = 0; i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]);
      a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) * __ABS(b[i+(j+1)*ldb]);
      a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) * __ABS(b[i+(j+2)*ldb]);
      a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) * __ABS(b[i+(j+3)*ldb]);
    }
  }
  if (j == nC)
    return;
  for (; j < nC; j++) {
    for (i = 0; i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) * __ABS(b[i+(j+0)*ldb]);
    }
  }
}
Example #14
0
static inline
void __msub_upper_abs(armas_x_dense_t *A, const armas_x_dense_t *B, int nR, int nC)
{
  register int i, j, k, lda, ldb;
  DTYPE *a, *b;

  a = armas_x_data(A);
  b = armas_x_data(B);
  lda = A->step; ldb = B->step;

  for (j = 0; j < nC-3; j += 4) {
    // top column block
    for (i = 0; i <= j && i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) - __ABS(b[i+(j+0)*ldb]);
      a[i+(j+1)*lda] = __ABS(a[i+(j+1)*lda]) - __ABS(b[i+(j+1)*ldb]);
      a[i+(j+2)*lda] = __ABS(a[i+(j+2)*lda]) - __ABS(b[i+(j+2)*ldb]);
      a[i+(j+3)*lda] = __ABS(a[i+(j+3)*lda]) - __ABS(b[i+(j+3)*ldb]);
    }
    // bottom triangle
    for (i = j+1; i < j+4 && i < nR; i++) {
      for (k = i; k < j+4; k++) {
        a[i+k*lda] = __ABS(a[i+k*lda]) - __ABS(b[i+k*ldb]);
      }
    }
  }
  if (j == nC)
    return;
  for (; j < nC; j++) {
    for (i = 0; i <= j && i < nR; i++) {
      a[i+(j+0)*lda] = __ABS(a[i+(j+0)*lda]) - __ABS(b[i+(j+0)*ldb]);
    }
  }
}
Example #15
0
/*
 *  Z = q0,  q1,  ..., qn1,  qn    (N columns, 4 rows)
 *      qq0, qq1, ..., qqn1, qqn
 *      e0,  e1,  ..., en1,  0
 *      ee0, ee1, ..., een1, 0
 *
 * \return number of deflations
 */
static
int __dqds_goodstep(DTYPE *ssum, armas_x_dense_t *Z, int N, int pp, dmin_data_t *dmind)
{
    armas_x_dense_t sq, dq, se, de;
    int n, ncnt, nfail, newseg;
    DTYPE x, y, q0, q1, sigma, tau;

    EMPTY(de); EMPTY(dq); EMPTY(se); EMPTY(sq);

    sigma = *ssum;
    newseg = __SIGN(dmind->dmin);
    //printf("..[goodstep] entering ping=%d, N=%d, newseg=%d, sigma=%e, dmin=%e\n",
    //     pp, N, newseg, sigma, dmind->dmin);

    armas_x_row(&sq, Z, pp);
    armas_x_row(&dq, Z, 1 - pp);
    armas_x_row(&se, Z, 2 + pp);
    armas_x_row(&de, Z, 3 - pp);

    dmind->niter++;
    if (N == 1) {
        armas_x_set_unsafe(Z, 0, 0, armas_x_get_at_unsafe(&sq, 0)+(sigma+dmind->cterm));
        //printf("..[goodstep] deflated single valued vector eig=%9.6f\n", __SQRT(armas_x_get_unsafe(Z, 0, 0)));
        return 1;
    }

    // 1. Look for neglible E-values
    ncnt = 0;
    if (! newseg) {
        do {
            n = __dqds_neglible(Z, N-ncnt, pp, sigma+dmind->cterm);
            ncnt += n;
        } while (n != 0 && N-ncnt > 0);
    }
    if (N-ncnt == 0) {
        //printf("..[goodstep] deflated (%d) to zero length\n", ncnt);
        return ncnt;
    }
    // 2 test flipping 1.5*q(0) < q(N-1) if new segment or deflated values
    if (newseg || ncnt > 0) {
        q0 = armas_x_get_at_unsafe(&sq, 0);
        q1 = armas_x_get_at_unsafe(&sq, N-ncnt-1);
        if (CFLIP*q0 < q1) {
            //printf("..[goodstep] need flipping.. [0, %d]\n", N-ncnt-1);
            __dqds_flip(Z, N-ncnt);
        }
    }
    
    // 3a. if no overflow or no new segment, choose shift
    __dqds_shift(&tau, &sq, &se, &dq, &de, N-ncnt, ncnt, dmind);
    //printf("..[goodstep]: tau=%e [type=%d, dmin=%e,%e, dmin1=%e]\n", tau, t, dmind->ttype, dmind->dmin, dmind->dn, dmind->dmin1);
    
    // 4.  run dqds until dmin > 0
    nfail = 0;
    do {
        __dqds_sweep(&dq, &de, &sq, &se, N-ncnt, tau, dmind);
        if (dmind->dmin < __ZERO) {
            // failure here
            DTYPE en1 = armas_x_get_at_unsafe(&de, N-ncnt-2);
            if (dmind->dmin1 > __ZERO && en1 < __EPS*(sigma+dmind->dn1) &&
                __ABS(dmind->dn) < __EPS*sigma) {
                // convergence masked by negative d (section 6.4)
                armas_x_set_at_unsafe(&dq, N-ncnt-1, __ZERO);
                dmind->dmin = __ZERO;
                //printf("..[masked] dmin1 > %e, setting qn to zero.!\n", dmind->dmin1);
                // break out from loop
                //break;
            }
    
            nfail++;
            if (nfail > 1) {
                tau = __ZERO;
            } else if (dmind->dmin1 > __ZERO) {
                // late failure
                tau = (tau + dmind->dmin)*(1.0 - 2.0*__EPS);
                dmind->ttype -= 11;
            } else {
                tau *= 0.25;
                dmind->ttype -= 12;
            }
            //printf("..failure[%d]: tau=%e\n", nfail, tau);
            dmind->niter++;
            dmind->nfail++;
        }
    } while (dmind->dmin < __ZERO || dmind->dmin1 < __ZERO);

    // 5. update sigma; sequence of tau values.
    //    use cascaded summation from (2), algorithm 4.1
    //    this here is one step of the algorithm, error term
    //    is summated to dmind->cterm
 
    twosum(&x, &y, *ssum, tau);
    //printf("..[goodstep] 2sum x=%13e, y=%13e, a=%13e, b=%13e\n", x, y, *ssum, tau);
    *ssum = x;
    dmind->cterm += y;
    //printf("..[goodstep] 2sum c=%13e, eig=%13e\n", dmind->cterm, __SQRT(x+dmind->cterm));
    
    return ncnt;
}
Example #16
0
/*
 * Golub-Kahan-Reisch SVD algorithm as described in
 *  - Hogben, Handbook of Linear Algebra 2007, 45.2 algorithm 1b,1c
 *  - Golub, Matrix Computation, 3rd ed. section 8.6.2
 */
int __bdsvd_golub(armas_x_dense_t *D, armas_x_dense_t *E,
                  armas_x_dense_t *U, armas_x_dense_t *V,
                  armas_x_dense_t *CS, DTYPE tol, armas_conf_t *conf)
{
    int N, work, lc, i, n, nrot, ip, iq, zeros, saves;
    DTYPE e0, e1, d0, d1, dp, c, s, r, ushift;
    armas_x_dense_t sD, sE, Cr, Sr, Cl, Sl;

    EMPTY(sD); EMPTY(sE);

    N = armas_x_size(D);
    lc = 6*N*N;
    saves = 0;
    
    if (U || V) {
        armas_x_subvector(&Cr, CS, 0, N);
        armas_x_subvector(&Sr, CS, N, N);
        armas_x_subvector(&Cl, CS, 2*N, N);
        armas_x_subvector(&Sl, CS, 3*N, N);
        saves = 1;
    }

    ip = 0; iq = N; 
    for (work = 1, n = 0; work && lc > 0; lc--, n++) {
        // convergence: |E(i)| < epsilon*(|D(i)| + D(i+1)|) => E(i) = 0.0
        for (i = 0; i < iq-1; i++) {
            e1 = armas_x_get_at_unsafe(E, i);
            if (e1 == 0.0)
                continue;
            d0 = armas_x_get_at_unsafe(D, i);
            d1 = armas_x_get_at_unsafe(D, i+1);
            dp = __ABS(d0) + __ABS(d1);
            if (__ABS(e1) < tol*dp) {
                armas_x_set_at(E, i, 0.0);
            }
        }
        // step: divide to blocks A00, A11, A22 where A22 is diagonal
        // and A11 has no zero off-diagonals (This should be intergrated with
        // the deflation loop above)
        find_diagonal_blocks(&ip, &iq, D, E);
        if (iq == 0) {
            work = 0;
            break;
        }

        // from column iq already diagonal; work ip:iq
        zeros = 0;
        for (i = ip; i < iq; i++) {
            d0 = armas_x_get_at(D, i);
            if (d0 == 0.0) {
                e1 = armas_x_get_at(E, i);
                armas_x_gvcompute(&c, &s, &r, d0, e1);
                armas_x_set_at_unsafe(D, i, r);
                armas_x_set_at_unsafe(E, i, 0.0);
                zeros = 1;
            }
        }
        if (zeros)
            continue;

        if ((iq - ip) == 2) {
            // 2x2 block
            DTYPE smin, smax, cosl, sinl, cosr, sinr;
            d0 = armas_x_get_at_unsafe(D, ip);
            d1 = armas_x_get_at_unsafe(D, ip+1);
            e1 = armas_x_get_at_unsafe(E, ip);
            __bdsvd2x2_vec(&smin, &smax, &cosl, &sinl, &cosr, &sinr, d0, e1, d1);
            armas_x_set_at_unsafe(D, ip, smax);
            armas_x_set_at_unsafe(D, ip+1, smin);
            armas_x_set_at_unsafe(E, ip, __ZERO);
            if (U) {
                armas_x_gvright(U, cosl, sinl, ip, ip+1, 0, U->rows);
            }
            if (V) {
                armas_x_gvleft(V, cosr, sinr, ip, ip+1, 0, V->cols);
            }
            continue;
        }

        armas_x_subvector(&sD, D, ip, iq-ip);
        armas_x_subvector(&sE, E, ip, iq-ip-1);

        // get elements to compute shift from trailing B.T*T 2x2 matrix
        d0 = armas_x_get_at_unsafe(D, iq-2);
        d1 = armas_x_get_at_unsafe(D, iq-1);
        e0 = iq < 3 ? 0.0 :armas_x_get_at_unsafe(E, iq-3);
        e1 = armas_x_get_at_unsafe(E, iq-2);
        ushift = compute_bsvd_shift(e0, d0, e1, d1);

        d0 = armas_x_get_at_unsafe(D, ip);
        e0 = armas_x_get_at_unsafe(E, ip);
        nrot = __bd_qrsweep(&sD, &sE, &Cr, &Sr, &Cl, &Sl, d0*d0-ushift, d0*e0, saves);
        // update 
        if (U) {
            armas_x_gvupdate(U, ip, &Cl, &Sl, nrot, ARMAS_RIGHT);
        }
        if (V) {
            armas_x_gvupdate(V, ip, &Cr, &Sr, nrot, ARMAS_LEFT);
        }
    }

    if (lc > 0) {
        // finished properly 
        for (i = 0; i < N; i++) {
            d0 = armas_x_get_at_unsafe(D, i);
            if (d0 < 0) {
                armas_x_set_at_unsafe(D, i, -d0);
                if (V) {
                    armas_x_row(&sD, V, i);
                    armas_x_scale(&sD, -1.0, conf);
                }
            }
        }
    }
    return lc > 0 ? 0 : -1;
}