static void icvSVD_64f( double* a, int lda, int m, int n, double* w, double* uT, int lduT, int nu, double* vT, int ldvT, double* buffer ) { double* e; double* temp; double *w1, *e1; double *hv; double ku0 = 0, kv0 = 0; double anorm = 0; double *a1, *u0 = uT, *v0 = vT; double scale, h; int i, j, k, l; int nm, m1, n1; int nv = n; int iters = 0; double* hv0 = (double*)cvStackAlloc( (m+2)*sizeof(hv0[0])) + 1; e = buffer; w1 = w; e1 = e + 1; nm = n; temp = buffer + nm; memset( w, 0, nm * sizeof( w[0] )); memset( e, 0, nm * sizeof( e[0] )); m1 = m; n1 = n; /* transform a to bi-diagonal form */ for( ;; ) { int update_u; int update_v; if( m1 == 0 ) break; scale = h = 0; update_u = uT && m1 > m - nu; hv = update_u ? uT : hv0; for( j = 0, a1 = a; j < m1; j++, a1 += lda ) { double t = a1[0]; scale += fabs( hv[j] = t ); } if( scale != 0 ) { double f = 1./scale, g, s = 0; for( j = 0; j < m1; j++ ) { double t = (hv[j] *= f); s += t * t; } g = sqrt( s ); f = hv[0]; if( f >= 0 ) g = -g; hv[0] = f - g; h = 1. / (f * g - s); memset( temp, 0, n1 * sizeof( temp[0] )); /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */ icvMatrAXPY_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 ); for( k = 1; k < n1; k++ ) temp[k] *= h; /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */ icvMatrAXPY_64f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda ); *w1 = g*scale; } w1++; /* store -2/(hv'*hv) */ if( update_u ) { if( m1 == m ) ku0 = h; else hv[-1] = h; } a++; n1--; if( vT ) vT += ldvT + 1; if( n1 == 0 ) break; scale = h = 0; update_v = vT && n1 > n - nv; hv = update_v ? vT : hv0; for( j = 0; j < n1; j++ ) { double t = a[j]; scale += fabs( hv[j] = t ); } if( scale != 0 ) { double f = 1./scale, g, s = 0; for( j = 0; j < n1; j++ ) { double t = (hv[j] *= f); s += t * t; } g = sqrt( s ); f = hv[0]; if( f >= 0 ) g = -g; hv[0] = f - g; h = 1. / (f * g - s); hv[-1] = 0.; /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */ icvMatrAXPY3_64f( m1, n1, hv, lda, a, h ); *e1 = g*scale; } e1++; /* store -2/(hv'*hv) */ if( update_v ) { if( n1 == n ) kv0 = h; else hv[-1] = h; } a += lda; m1--; if( uT ) uT += lduT + 1; } m1 -= m1 != 0; n1 -= n1 != 0; /* accumulate left transformations */ if( uT ) { m1 = m - m1; uT = u0 + m1 * lduT; for( i = m1; i < nu; i++, uT += lduT ) { memset( uT + m1, 0, (m - m1) * sizeof( uT[0] )); uT[i] = 1.; } for( i = m1 - 1; i >= 0; i-- ) { double s; int lh = nu - i; l = m - i; hv = u0 + (lduT + 1) * i; h = i == 0 ? ku0 : hv[-1]; assert( h <= 0 ); if( h != 0 ) { uT = hv; icvMatrAXPY3_64f( lh, l-1, hv+1, lduT, uT+1, h ); s = hv[0] * h; for( k = 0; k < l; k++ ) hv[k] *= s; hv[0] += 1; } else { for( j = 1; j < l; j++ ) hv[j] = 0; for( j = 1; j < lh; j++ ) hv[j * lduT] = 0; hv[0] = 1; } } uT = u0; } /* accumulate right transformations */ if( vT ) { n1 = n - n1; vT = v0 + n1 * ldvT; for( i = n1; i < nv; i++, vT += ldvT ) { memset( vT + n1, 0, (n - n1) * sizeof( vT[0] )); vT[i] = 1.; } for( i = n1 - 1; i >= 0; i-- ) { double s; int lh = nv - i; l = n - i; hv = v0 + (ldvT + 1) * i; h = i == 0 ? kv0 : hv[-1]; assert( h <= 0 ); if( h != 0 ) { vT = hv; icvMatrAXPY3_64f( lh, l-1, hv+1, ldvT, vT+1, h ); s = hv[0] * h; for( k = 0; k < l; k++ ) hv[k] *= s; hv[0] += 1; } else { for( j = 1; j < l; j++ ) hv[j] = 0; for( j = 1; j < lh; j++ ) hv[j * ldvT] = 0; hv[0] = 1; } } vT = v0; } for( i = 0; i < nm; i++ ) { double tnorm = fabs( w[i] ); tnorm += fabs( e[i] ); if( anorm < tnorm ) anorm = tnorm; } anorm *= DBL_EPSILON; /* diagonalization of the bidiagonal form */ for( k = nm - 1; k >= 0; k-- ) { double z = 0; iters = 0; for( ;; ) /* do iterations */ { double c, s, f, g, x, y; int flag = 0; /* test for splitting */ for( l = k; l >= 0; l-- ) { if( fabs(e[l]) <= anorm ) { flag = 1; break; } assert( l > 0 ); if( fabs(w[l - 1]) <= anorm ) break; } if( !flag ) { c = 0; s = 1; for( i = l; i <= k; i++ ) { f = s * e[i]; e[i] *= c; if( anorm + fabs( f ) == anorm ) break; g = w[i]; h = pythag( f, g ); w[i] = h; c = g / h; s = -f / h; if( uT ) icvGivens_64f( m, uT + lduT * (l - 1), uT + lduT * i, c, s ); } } z = w[k]; if( l == k || iters++ == MAX_ITERS ) break; /* shift from bottom 2x2 minor */ x = w[l]; y = w[k - 1]; g = e[k - 1]; h = e[k]; f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y); g = pythag( f, 1 ); if( f < 0 ) g = -g; f = x - (z / x) * z + (h / x) * (y / (f + g) - h); /* next QR transformation */ c = s = 1; for( i = l + 1; i <= k; i++ ) { g = e[i]; y = w[i]; h = s * g; g *= c; z = pythag( f, h ); e[i - 1] = z; c = f / z; s = h / z; f = x * c + g * s; g = -x * s + g * c; h = y * s; y *= c; if( vT ) icvGivens_64f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s ); z = pythag( f, h ); w[i - 1] = z; /* rotation can be arbitrary if z == 0 */ if( z != 0 ) { c = f / z; s = h / z; } f = c * g + s * y; x = -s * g + c * y; if( uT ) icvGivens_64f( m, uT + lduT * (i - 1), uT + lduT * i, c, s ); } e[l] = 0; e[k] = f; w[k] = x; } /* end of iteration loop */ if( iters > MAX_ITERS ) break; if( z < 0 ) { w[k] = -z; if( vT ) { for( j = 0; j < n; j++ ) vT[j + k * ldvT] = -vT[j + k * ldvT]; } } } /* end of diagonalization loop */ /* sort singular values and corresponding values */ for( i = 0; i < nm; i++ ) { k = i; for( j = i + 1; j < nm; j++ ) if( w[k] < w[j] ) k = j; if( k != i ) { double t; CV_SWAP( w[i], w[k], t ); if( vT ) for( j = 0; j < n; j++ ) CV_SWAP( vT[j + ldvT*k], vT[j + ldvT*i], t ); if( uT ) for( j = 0; j < m; j++ ) CV_SWAP( uT[j + lduT*k], uT[j + lduT*i], t ); } } }
static CvStatus icvSVD_64f( double* a, int lda, double* w, double* u, int ldu, double* v, int ldv, CvSize size, double* buffer ) { double* e; double* temp; double *w1, *e1; double *hv; double ku0 = 0, kv0 = 0; double anorm = 0; double *a1 = a, *u0 = u, *v0 = v; double *u1, *v1; int ldu1, ldv1; double scale, h; int i, j, k, l; int n = size.width, m = size.height; int nm, m1, n1; int iters = 0; e = buffer; if( m >= n ) { w1 = w; e1 = e + 1; nm = n; } else { w1 = e + 1; e1 = w; nm = m; } temp = buffer + nm; memset( w, 0, nm * sizeof( w[0] )); memset( e, 0, nm * sizeof( e[0] )); m1 = m; n1 = n; if( m < n ) goto row_transform; for( ;; ) { if( m1 == 0 ) break; scale = h = 0; a = a1; hv = u ? u : w1; for( j = 0; j < m1; j++, a += lda ) { double t = a[0]; scale += fabs( hv[j] = t ); } if( scale != 0 ) { double f = 1./scale, g, s = 0; for( j = 0; j < m1; j++ ) { double t = (hv[j] *= f); s += t * t; } g = sqrt( s ); f = hv[0]; if( f >= 0 ) g = -g; hv[0] = f - g; h = 1. / (f * g - s); memset( temp, 0, n1 * sizeof( temp[0] )); a = a1; /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */ icvMatrAXPY1_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1 ); for( k = 1; k < n1; k++ ) temp[k] *= h; a = a1; /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */ icvMatrAXPY2_64f( m1, n1 - 1, temp + 1, lda, hv, a + 1 ); *w1++ = g*scale; } /* store -2/(hv'*hv) */ if( u ) { if( m1 == m ) ku0 = h; else hv[-1] = h; } a1++; n1--; if( v ) v += ldv + 1; row_transform: if( n1 == 0 ) break; scale = h = 0; a = a1; hv = v ? v : e1; for( j = 0; j < n1; j++ ) { double t = a[j]; scale += fabs( hv[j] = t ); } if( scale != 0 ) { double f = 1./scale, g, s = 0; for( j = 0; j < n1; j++ ) { double t = (hv[j] *= f); s += t * t; } g = sqrt( s ); f = hv[0]; if( f >= 0 ) g = -g; hv[0] = f - g; h = 1. / (f * g - s); /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */ icvMatrAXPY3_64f( m1, n1, hv, lda, a, h ); *e1++ = g*scale; } /* store -2/(hv'*hv) */ if( v ) { if( n1 == n ) kv0 = h; else hv[-1] = h; } a1 += lda; m1--; if( u ) u += ldu + 1; } m1 -= m1 != 0; n1 -= n1 != 0; /* accumulate left transformations */ if( u ) { m1 = m - m1; u = u0 + m1 * ldu; for( i = m1; i < m; i++, u += ldu ) { memset( u + m1, 0, (m - m1) * sizeof( u[0] )); u[i] = 1.; } for( i = m1 - 1; i >= 0; i-- ) { double h, s; l = m - i; hv = u0 + (ldu + 1) * i; h = i == 0 ? ku0 : hv[-1]; assert( h <= 0 ); if( h != 0 ) { u = hv; icvMatrAXPY3_64f( l, l-1, hv+1, ldu, u+1, h ); s = hv[0] * h; for( k = 0; k < l; k++ ) hv[k] *= s; hv[0] += 1; } else { for( j = 1; j < l; j++ ) hv[j] = hv[j * ldu] = 0; hv[0] = 1; } } u = u0; } /* accumulate right transformations */ if( v ) { n1 = n - n1; v = v0 + n1 * ldv; for( i = n1; i < n; i++, v += ldv ) { memset( v + n1, 0, (n - n1) * sizeof( v[0] )); v[i] = 1.; } for( i = n1 - 1; i >= 0; i-- ) { double h, s; l = n - i; hv = v0 + (ldv + 1) * i; h = i == 0 ? kv0 : hv[-1]; assert( h <= 0 ); if( h != 0 ) { v = hv; icvMatrAXPY3_64f( l, l-1, hv+1, ldv, v+1, h ); s = hv[0] * h; for( k = 0; k < l; k++ ) hv[k] *= s; hv[0] += 1; } else { for( j = 1; j < l; j++ ) hv[j] = hv[j * ldv] = 0; hv[0] = 1; } } v = v0; } for( i = 0; i < nm; i++ ) { double tnorm = fabs( w[i] ) + fabs( e[i] ); if( anorm < tnorm ) anorm = tnorm; } if( m >= n ) { m1 = m; n1 = n; u1 = u; ldu1 = ldu; v1 = v; ldv1 = ldv; } else { m1 = n; n1 = m; u1 = v; ldu1 = ldv; v1 = u; ldv1 = ldu; } /* diagonalization of the bidiagonal form */ for( k = nm - 1; k >= 0; k-- ) { double z = 0; iters = 0; for( ;; ) /* do iterations */ { double c, s, f, g, h, x, y; int flag = 0; /* test for splitting */ for( l = k; l >= 0; l-- ) { if( anorm + fabs( e[l] ) == anorm ) { flag = 1; break; } assert( l > 0 ); if( anorm + fabs( w[l - 1] ) == anorm ) break; } if( !flag ) { c = 0; s = 1; for( i = l; i <= k; i++ ) { double f = s * e[i]; e[i] *= c; if( anorm + fabs( f ) == anorm ) break; g = w[i]; h = pythag( f, g ); w[i] = h; c = g / h; s = -f / h; if( u1 ) { icvGivens_64f( m1, u1 + ldu1 * (i - 1), u1 + ldu1 * i, c, s ); } } } z = w[k]; if( l == k || iters++ == MAX_ITERS ) break; /* shift from bottom 2x2 minor */ x = w[l]; y = w[k - 1]; g = e[k - 1]; h = e[k]; f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y); g = pythag( f, 1 ); if( f < 0 ) g = -g; f = x - (z / x) * z + (h / x) * (y / (f + g) - h); /* next QR transformation */ c = s = 1; for( i = l + 1; i <= k; i++ ) { g = e[i]; y = w[i]; h = s * g; g *= c; z = pythag( f, h ); e[i - 1] = z; c = f / z; s = h / z; f = x * c + g * s; g = -x * s + g * c; h = y * s; y *= c; if( v1 ) { icvGivens_64f( n1, v1 + ldv1 * (i - 1), v1 + ldv1 * i, c, s ); } z = pythag( f, h ); w[i - 1] = z; /* rotation can be arbitrary if z == 0 */ if( z != 0 ) { c = f / z; s = h / z; } f = c * g + s * y; x = -s * g + c * y; if( u1 ) { icvGivens_64f( m1, u1 + ldu1 * (i - 1), u1 + ldu1 * i, c, s ); } } e[l] = 0; e[k] = f; w[k] = x; } /* end of iteration loop */ if( iters > MAX_ITERS ) break; if( z < 0 ) { w[k] = -z; if( v ) { for( j = 0; j < n; j++ ) v[j + k * ldv] = -v[j + k * ldv]; } } } /* end of diagonalization loop */ /* sort singular values */ for( i = 0; i < nm; i++ ) { k = i; for( j = i + 1; j < nm; j++ ) if( w[k] < w[j] ) k = j; if( k != i ) { /* swap i & k values */ double t = w[k]; w[k] = w[i]; w[i] = t; if( v ) { for( j = 0; j < n; j++ ) { t = v[j + ldv * k]; v[j + ldv * k] = v[j + ldv * i]; v[j + ldv * i] = t; } } if( u ) { for( j = 0; j < m; j++ ) { t = u[j + k * ldu]; u[j + ldu * k] = u[j + i * ldu]; u[j + ldu * i] = t; } } } } return CV_NO_ERR; }