/* LUfactor -- gaussian elimination with scaled partial pivoting -- Note: returns LU matrix which is A */ MAT *LUfactor(MAT *A, PERM *pivot) { unsigned int i, j, m, n; unsigned int k, k_max; int i_max; char MatrixTempBuffer[ 1000 ]; MatrixReal **A_v, *A_piv, *A_row; MatrixReal max1, temp, tiny; VEC *scale = VNULL; if ( A==(MAT *)NULL || pivot==(PERM *)NULL ){ error(E_NULL,"LUfactor"); } if ( pivot->size != A->m ) error(E_SIZES,"LUfactor"); m = A->m; n = A->n; if( SET_VEC_SIZE( A->m ) <1000 ) vec_get( &scale, (void *)MatrixTempBuffer, A->m ); else scale = v_get( A->m ); //MEM_STAT_REG(scale,TYPE_VEC); A_v = A->me; tiny = (MatrixReal)(10.0/HUGE_VAL); /* initialise pivot with identity permutation */ for ( i=0; i<m; i++ ) pivot->pe[i] = i; /* set scale parameters */ for ( i=0; i<m; i++ ) { max1 = 0.0; for ( j=0; j<n; j++ ) { temp = (MatrixReal)fabs(A_v[i][j]); max1 = mat_max(max1,temp); } scale->ve[i] = max1; } /* main loop */ k_max = mat_min(m,n)-1; for ( k=0; k<k_max; k++ ) { /* find best pivot row */ max1 = 0.0; i_max = -1; for ( i=k; i<m; i++ ) if ( fabs(scale->ve[i]) >= tiny*fabs(A_v[i][k]) ) { temp = (MatrixReal)fabs(A_v[i][k])/scale->ve[i]; if ( temp > max1 ) { max1 = temp; i_max = i; } } /* if no pivot then ignore column k... */ if ( i_max == -1 ) { /* set pivot entry A[k][k] exactly to zero, rather than just "small" */ A_v[k][k] = 0.0; continue; } /* do we pivot ? */ if ( i_max != (int)k ) /* yes we do... */ { px_transp(pivot,i_max,k); for ( j=0; j<n; j++ ) { temp = A_v[i_max][j]; A_v[i_max][j] = A_v[k][j]; A_v[k][j] = temp; } } /* row operations */ for ( i=k+1; i<m; i++ ) /* for each row do... */ { /* Note: divide by zero should never happen */ temp = A_v[i][k] = A_v[i][k]/A_v[k][k]; A_piv = &(A_v[k][k+1]); A_row = &(A_v[i][k+1]); if ( k+1 < n ) __mltadd__(A_row,A_piv,-temp,(int)(n-(k+1))); } } if( scale != (VEC *)MatrixTempBuffer ) // память выделялась, надо освободить V_FREE(scale); return A; }
BAND *bdLUfactor(BAND *bA, PERM *pivot) #endif { int i, j, k, l, n, n1, lb, ub, lub, k_end, k_lub; int i_max, shift; Real **bA_v; Real max1, temp; if ( bA==(BAND *)NULL || pivot==(PERM *)NULL ) error(E_NULL,"bdLUfactor"); lb = bA->lb; ub = bA->ub; lub = lb+ub; n = bA->mat->n; n1 = n-1; lub = lb+ub; if ( pivot->size != n ) error(E_SIZES,"bdLUfactor"); /* initialise pivot with identity permutation */ for ( i=0; i < n; i++ ) pivot->pe[i] = i; /* extend band matrix */ /* extended part is filled with zeros */ bA = bd_resize(bA,lb,min(n1,lub),n); bA_v = bA->mat->me; /* main loop */ for ( k=0; k < n1; k++ ) { k_end = max(0,lb+k-n1); k_lub = min(k+lub,n1); /* find the best pivot row */ max1 = 0.0; i_max = -1; for ( i=lb; i >= k_end; i-- ) { temp = fabs(bA_v[i][k]); if ( temp > max1 ) { max1 = temp; i_max = i; } } /* if no pivot then ignore column k... */ if ( i_max == -1 ) continue; /* do we pivot ? */ if ( i_max != lb ) /* yes we do... */ { /* save transposition using non-shifted indices */ shift = lb-i_max; px_transp(pivot,k+shift,k); for ( i=lb, j=k; j <= k_lub; i++,j++ ) { temp = bA_v[i][j]; bA_v[i][j] = bA_v[i-shift][j]; bA_v[i-shift][j] = temp; } } /* row operations */ for ( i=lb-1; i >= k_end; i-- ) { temp = bA_v[i][k] /= bA_v[lb][k]; shift = lb-i; for ( j=k+1,l=i+1; j <= k_lub; l++,j++ ) bA_v[l][j] -= temp*bA_v[l+shift][j]; } } return bA; }
/* BKPfactor -- Bunch-Kaufman-Parlett factorisation of A in-situ -- A is factored into the form P'AP = MDM' where P is a permutation matrix, M lower triangular and D is block diagonal with blocks of size 1 or 2 -- P is stored in pivot; blocks[i]==i iff D[i][i] is a block */ extern MAT *BKPfactor(MAT *A, PERM *pivot, PERM *blocks) { int i, j, k, n, onebyone, r; Real **A_me, aii, aip1, aip1i, lambda, sigma, tmp; Real det, s, t; if ( ! A || ! pivot || ! blocks ) error(E_NULL,"BKPfactor"); if ( A->m != A->n ) error(E_SQUARE,"BKPfactor"); if ( A->m != pivot->size || pivot->size != blocks->size ) error(E_SIZES,"BKPfactor"); n = A->n; A_me = A->me; px_ident(pivot); px_ident(blocks); for ( i = 0; i < n; i = onebyone ? i+1 : i+2 ) { /* printf("# Stage: %d\n",i); */ aii = fabs(m_entry(A,i,i)); lambda = 0.0; r = (i+1 < n) ? i+1 : i; for ( k = i+1; k < n; k++ ) { tmp = fabs(m_entry(A,i,k)); if ( tmp >= lambda ) { lambda = tmp; r = k; } } /* printf("# lambda = %g, r = %d\n", lambda, r); */ /* printf("# |A[%d][%d]| = %g\n",r,r,fabs(m_entry(A,r,r))); */ /* determine if 1x1 or 2x2 block, and do pivoting if needed */ if ( aii >= alpha*lambda ) { onebyone = TRUE; goto dopivot; } /* compute sigma */ sigma = 0.0; for ( k = i; k < n; k++ ) { if ( k == r ) continue; tmp = ( k > r ) ? fabs(m_entry(A,r,k)) : fabs(m_entry(A,k,r)); if ( tmp > sigma ) sigma = tmp; } if ( aii*sigma >= alpha*sqr(lambda) ) onebyone = TRUE; else if ( fabs(m_entry(A,r,r)) >= alpha*sigma ) { /* printf("# Swapping rows/cols %d and %d\n",i,r); */ interchange(A,i,r); px_transp(pivot,i,r); onebyone = TRUE; } else { /* printf("# Swapping rows/cols %d and %d\n",i+1,r); */ interchange(A,i+1,r); px_transp(pivot,i+1,r); px_transp(blocks,i,i+1); onebyone = FALSE; } /* printf("onebyone = %s\n",btos(onebyone)); */ /* printf("# Matrix so far (@checkpoint A) =\n"); */ /* m_output(A); */ /* printf("# pivot =\n"); px_output(pivot); */ /* printf("# blocks =\n"); px_output(blocks); */ dopivot: if ( onebyone ) { /* do one by one block */ if ( m_entry(A,i,i) != 0.0 ) { aii = m_entry(A,i,i); for ( j = i+1; j < n; j++ ) { tmp = m_entry(A,i,j)/aii; for ( k = j; k < n; k++ ) m_sub_val(A,j,k,tmp*m_entry(A,i,k)); m_set_val(A,i,j,tmp); } } } else /* onebyone == FALSE */ { /* do two by two block */ det = m_entry(A,i,i)*m_entry(A,i+1,i+1)-sqr(m_entry(A,i,i+1)); /* Must have det < 0 */ /* printf("# det = %g\n",det); */ aip1i = m_entry(A,i,i+1)/det; aii = m_entry(A,i,i)/det; aip1 = m_entry(A,i+1,i+1)/det; for ( j = i+2; j < n; j++ ) { s = - aip1i*m_entry(A,i+1,j) + aip1*m_entry(A,i,j); t = - aip1i*m_entry(A,i,j) + aii*m_entry(A,i+1,j); for ( k = j; k < n; k++ ) m_sub_val(A,j,k,m_entry(A,i,k)*s + m_entry(A,i+1,k)*t); m_set_val(A,i,j,s); m_set_val(A,i+1,j,t); } } /* printf("# Matrix so far (@checkpoint B) =\n"); */ /* m_output(A); */ /* printf("# pivot =\n"); px_output(pivot); */ /* printf("# blocks =\n"); px_output(blocks); */ } /* set lower triangular half */ for ( i = 0; i < A->m; i++ ) for ( j = 0; j < i; j++ ) m_set_val(A,i,j,m_entry(A,j,i)); return A; }
/* zQRCPfactor -- forms the QR factorisation of A with column pivoting -- factorisation stored in compact form as described above ( not quite standard format ) */ ZMAT *zQRCPfactor(ZMAT *A, ZVEC* diag, PERM *px) { unsigned int i, i_max, j, k, limit; STATIC ZVEC *tmp1=ZVNULL, *tmp2=ZVNULL, *w=ZVNULL; STATIC VEC *gamma=VNULL; Real beta; Real maxgamma, sum, tmp; complex ztmp; if ( ! A || ! diag || ! px ) error(E_NULL,"QRCPfactor"); limit = min(A->m,A->n); if ( diag->dim < limit || px->size != A->n ) error(E_SIZES,"QRCPfactor"); tmp1 = zv_resize(tmp1,A->m); tmp2 = zv_resize(tmp2,A->m); gamma = v_resize(gamma,A->n); w = zv_resize(w,A->n); MEM_STAT_REG(tmp1,TYPE_ZVEC); MEM_STAT_REG(tmp2,TYPE_ZVEC); MEM_STAT_REG(gamma,TYPE_VEC); MEM_STAT_REG(w, TYPE_ZVEC); /* initialise gamma and px */ for ( j=0; j<A->n; j++ ) { px->pe[j] = j; sum = 0.0; for ( i=0; i<A->m; i++ ) sum += square(A->me[i][j].re) + square(A->me[i][j].im); gamma->ve[j] = sum; } for ( k=0; k<limit; k++ ) { /* find "best" column to use */ i_max = k; maxgamma = gamma->ve[k]; for ( i=k+1; i<A->n; i++ ) /* Loop invariant:maxgamma=gamma[i_max] >=gamma[l];l=k,...,i-1 */ if ( gamma->ve[i] > maxgamma ) { maxgamma = gamma->ve[i]; i_max = i; } /* swap columns if necessary */ if ( i_max != k ) { /* swap gamma values */ tmp = gamma->ve[k]; gamma->ve[k] = gamma->ve[i_max]; gamma->ve[i_max] = tmp; /* update column permutation */ px_transp(px,k,i_max); /* swap columns of A */ for ( i=0; i<A->m; i++ ) { ztmp = A->me[i][k]; A->me[i][k] = A->me[i][i_max]; A->me[i][i_max] = ztmp; } } /* get H/holder vector for the k-th column */ zget_col(A,k,tmp1); /* hhvec(tmp1,k,&beta->ve[k],tmp1,&A->me[k][k]); */ zhhvec(tmp1,k,&beta,tmp1,&A->me[k][k]); diag->ve[k] = tmp1->ve[k]; /* apply H/holder vector to remaining columns */ _zhhtrcols(A,k,k+1,tmp1,beta,w); /* update gamma values */ for ( j=k+1; j<A->n; j++ ) gamma->ve[j] -= square(A->me[k][j].re)+square(A->me[k][j].im); } #ifdef THREADSAFE ZV_FREE(tmp1); ZV_FREE(tmp2); V_FREE(gamma); ZV_FREE(w); #endif return (A); }
SPMAT *spLUfactor(SPMAT *A, PERM *px, double alpha) #endif { int i, best_i, k, idx, len, best_len, m, n; SPROW *r, *r_piv, tmp_row; STATIC SPROW *merge = (SPROW *)NULL; Real max_val, tmp; STATIC VEC *col_vals=VNULL; if ( ! A || ! px ) error(E_NULL,"spLUfctr"); if ( alpha <= 0.0 || alpha > 1.0 ) error(E_RANGE,"alpha in spLUfctr"); if ( px->size <= A->m ) px = px_resize(px,A->m); px_ident(px); col_vals = v_resize(col_vals,A->m); MEM_STAT_REG(col_vals,TYPE_VEC); m = A->m; n = A->n; if ( ! A->flag_col ) sp_col_access(A); if ( ! A->flag_diag ) sp_diag_access(A); A->flag_col = A->flag_diag = FALSE; if ( ! merge ) { merge = sprow_get(20); MEM_STAT_REG(merge,TYPE_SPROW); } for ( k = 0; k < n; k++ ) { /* find pivot row/element for partial pivoting */ /* get first row with a non-zero entry in the k-th column */ max_val = 0.0; for ( i = k; i < m; i++ ) { r = &(A->row[i]); idx = sprow_idx(r,k); if ( idx < 0 ) tmp = 0.0; else tmp = r->elt[idx].val; if ( fabs(tmp) > max_val ) max_val = fabs(tmp); col_vals->ve[i] = tmp; } if ( max_val == 0.0 ) continue; best_len = n+1; /* only if no possibilities */ best_i = -1; for ( i = k; i < m; i++ ) { tmp = fabs(col_vals->ve[i]); if ( tmp == 0.0 ) continue; if ( tmp >= alpha*max_val ) { r = &(A->row[i]); idx = sprow_idx(r,k); len = (r->len) - idx; if ( len < best_len ) { best_len = len; best_i = i; } } } /* swap row #best_i with row #k */ MEM_COPY(&(A->row[best_i]),&tmp_row,sizeof(SPROW)); MEM_COPY(&(A->row[k]),&(A->row[best_i]),sizeof(SPROW)); MEM_COPY(&tmp_row,&(A->row[k]),sizeof(SPROW)); /* swap col_vals entries */ tmp = col_vals->ve[best_i]; col_vals->ve[best_i] = col_vals->ve[k]; col_vals->ve[k] = tmp; px_transp(px,k,best_i); r_piv = &(A->row[k]); for ( i = k+1; i < n; i++ ) { /* compute and set multiplier */ tmp = col_vals->ve[i]/col_vals->ve[k]; if ( tmp != 0.0 ) sp_set_val(A,i,k,tmp); else continue; /* perform row operations */ merge->len = 0; r = &(A->row[i]); sprow_mltadd(r,r_piv,-tmp,k+1,merge,TYPE_SPROW); idx = sprow_idx(r,k+1); if ( idx < 0 ) idx = -(idx+2); /* see if r needs expanding */ if ( r->maxlen < idx + merge->len ) sprow_xpd(r,idx+merge->len,TYPE_SPMAT); r->len = idx+merge->len; MEM_COPY((char *)(merge->elt),(char *)&(r->elt[idx]), merge->len*sizeof(row_elt)); } } #ifdef THREADSAFE sprow_free(merge); V_FREE(col_vals); #endif return A; }
MAT *LUfactor(MAT *A, PERM *pivot) #endif { unsigned int i, j, m, n; int i_max, k, k_max; Real **A_v, *A_piv, *A_row; Real max1, temp, tiny; STATIC VEC *scale = VNULL; if ( A==(MAT *)NULL || pivot==(PERM *)NULL ) error(E_NULL,"LUfactor"); if ( pivot->size != A->m ) error(E_SIZES,"LUfactor"); m = A->m; n = A->n; scale = v_resize(scale,A->m); MEM_STAT_REG(scale,TYPE_VEC); A_v = A->me; tiny = 10.0/HUGE_VAL; /* initialise pivot with identity permutation */ for ( i=0; i<m; i++ ) pivot->pe[i] = i; /* set scale parameters */ for ( i=0; i<m; i++ ) { max1 = 0.0; for ( j=0; j<n; j++ ) { temp = fabs(A_v[i][j]); max1 = max(max1,temp); } scale->ve[i] = max1; } /* main loop */ k_max = min(m,n)-1; for ( k=0; k<k_max; k++ ) { /* find best pivot row */ max1 = 0.0; i_max = -1; for ( i=k; i<m; i++ ) if ( fabs(scale->ve[i]) >= tiny*fabs(A_v[i][k]) ) { temp = fabs(A_v[i][k])/scale->ve[i]; if ( temp > max1 ) { max1 = temp; i_max = i; } } /* if no pivot then ignore column k... */ if ( i_max == -1 ) { /* set pivot entry A[k][k] exactly to zero, rather than just "small" */ A_v[k][k] = 0.0; continue; } /* do we pivot ? */ if ( i_max != k ) /* yes we do... */ { px_transp(pivot,i_max,k); for ( j=0; j<n; j++ ) { temp = A_v[i_max][j]; A_v[i_max][j] = A_v[k][j]; A_v[k][j] = temp; } } /* row operations */ for ( i=k+1; i<m; i++ ) /* for each row do... */ { /* Note: divide by zero should never happen */ temp = A_v[i][k] = A_v[i][k]/A_v[k][k]; A_piv = &(A_v[k][k+1]); A_row = &(A_v[i][k+1]); if ( k+1 < n ) __mltadd__(A_row,A_piv,-temp,(int)(n-(k+1))); /********************************************* for ( j=k+1; j<n; j++ ) A_v[i][j] -= temp*A_v[k][j]; (*A_row++) -= temp*(*A_piv++); *********************************************/ } } #ifdef THREADSAFE V_FREE(scale); #endif return A; }