/* sconv is a general purpose convolution routine using sunperf BLAS function sdot for speed. arguments: in - input vector of function to be convolved with nin - length of in filter - vector of filter coefficients to convolve into in nfilter - length of filter ioff - number of samples to skip into trace before starting convolution operation (i.e. left side of integral of first sample will start here. ) decfac - decimation factor. For use in decimation can make this > 1 and routine will only hit every decfac values. This is much more efficient than decimation after the fact. For normal convolution, set to 1. out - output vector (WARNING: this vector must be alloced large enough to hold complete output. No check is made to verify if it's length is sufficiently large to hold output vec. nout - actual number of samples in out (output) Returns: normal return is 0. -1 for illegal inputs with no output. This condition must be trapped as the routine produces null results. A positive result is returned if ioff is passed as a negative value. The result can still be used when this happens, but the time of the first sample may not be what the calling program believes. This condition should be trapped and the time of first output sample in out increased by ret_code*dt where dt is the sample interval of the in trace. Author: Gary L. Pavlis Written: July 21, 1998 */ int sconv(float *in, int nin, float *filter, int nfilter, int ioff, int decfac, float *out, int *nout) { int i,j,iend; int ret_code=0; /* First check for bad parameters and correct if possible */ if(nfilter > nin) { register_error(0,"sconv: filter length (%d) longer than input time series (%d)\n",nfilter,nin); return(-1); } if(ioff < 0) { register_error(0,"sconv: illegal offset value = %d set to 0\n", ioff); ret_code = -ioff; ioff = 0; } /* elegance of blas shows here */ for(i=ioff,j=0,iend=ioff+nfilter,*nout=0; iend<nin; i += decfac,++j,iend += decfac) { out[j] = (float)sdot(nfilter,in+i,1,filter,1); } *nout = j; return(ret_code); }
void spofa(float *a,long lda,long n,long *info) /* SPOFA FACTORS A REAL SYMMETRIC POSITIVE DEFINITE MATRIX. SPOFA IS USUALLY CALLED BY SPOCO, BUT IT CAN BE CALLED DIRECTLY WITH A SAVING IN TIME IF RCOND IS NOT NEEDED. (TIME FOR SPOCO) = (1 + 18/N)*(TIME FOR SPOFA) . ON ENTRY A REAL(LDA, N) THE SYMMETRIC MATRIX TO BE FACTORED. ONLY THE DIAGONAL AND UPPER TRIANGLE ARE USED. LDA INTEGER THE LEADING DIMENSION OF THE ARRAY A . N INTEGER THE ORDER OF THE MATRIX A . ON RETURN A AN UPPER TRIANGULAR MATRIX R SO THAT A = TRANS(R)*R WHERE TRANS(R) IS THE TRANSPOSE. THE STRICT LOWER TRIANGLE IS UNALTERED. IF INFO .NE. 0 , THE FACTORIZATION IS NOT COMPLETE. INFO INTEGER = 0 FOR NORMAL RETURN. = K SIGNALS AN ERROR CONDITION. THE LEADING MINOR OF ORDER K IS NOT POSITIVE DEFINITE. LINPACK. THIS VERSION DATED 08/14/78 . CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB. SUBROUTINES AND FUNCTIONS BLAS SDOT FORTRAN SQRT INTERNAL VARIABLES */ { extern float sdot(long n,float *sx,long incx,float *sy,long incy); static long j,jm1,k; static float t,s; /* BEGIN BLOCK WITH ...EXITS TO 40 */ for(j=1; j<=n; j++) { *info = j; s = 0.0; jm1 = j-1; if(jm1 < 1) goto S20; for(k=0; k<jm1; k++) { t = *(a+k+(j-1)*lda)-sdot(k,(a+k*lda),1L,(a+(j-1)*lda),1L); t /= *(a+k+k*lda); *(a+k+(j-1)*lda) = t; s += (t*t); } S20: s = *(a+j-1+(j-1)*lda)-s; /* ......EXIT */ if(s <= 0.0) goto S40; *(a+j-1+(j-1)*lda) = sqrt(s); } *info = 0; S40: return; }
/* This routine computes the pure pseudoinverse from the svd returned by svdcmp (U*S*V^T) so Agi = V*S^-1*U^T. The algorithm used is a little overly tricky using an internally allocated work vector to make the routine nondestructive to the input matrices. This computes the "pure" pseudoinverse by setting the svd cutoff value based on float epsilon (from float.h). Note input U is mxn, s is an n vector, V is nxn, and the output Agi is nxm. Function returns the number of singular values actually used to compute Agi. Author: Gary L. Pavlis */ int pseudoinverse(float **U, float *s, float **V, int m, int n, float **Agi) { int i,j, k; /* counters*/ float *work; /* work space */ float smax; float sinv; double sv_cutoff; int nsv_used; #ifndef SUNPERF int one=1; #endif if((work=(float *)calloc(n,sizeof(float))) == NULL) elog_die(1,"Pseudoinverse computation: cannot alloc work array of length %d\n", n); /* first find the larges singular value, then just zero all those smaller than the cutoff determined as the ratio wrt to largest singular value */ smax = 0.0; for(i=0;i<n;++i) if(s[i] > smax) smax = s[i]; sv_cutoff = (double)smax*FLT_EPSILON; /* This is a copy operation */ for(i=0;i<m;++i) for(j=0;j<n;++j) Agi[j][i] = U[i][j]; /* this works because of C storage order, but is strange. It is the multiply by S^-1 */ for(j=0,nsv_used=0;j<n;++j) { if( (double)s[j] > sv_cutoff) { sinv = 1.0/s[j]; ++nsv_used; } else sinv = 0.0; #ifdef SUNPERF sscal(m,sinv,Agi[j],1); #else sscal_(&m,&sinv,Agi[j],&one); #endif } /* multiply by V using a column work vector*/ for(j=0;j<m;++j) { for(k=0;k<n;++k) work[k] = Agi[k][j]; for(i=0;i<n;++i) #ifdef SUNPERF Agi[i][j] = sdot(n,work,1,V[i],1); #else Agi[i][j] = sdot_(&n,work,&one,V[i],&one); #endif } free(work); return(nsv_used); }
unsigned long long int acml_time() { clock_gettime(CLOCK_MONOTONIC, &begin); ans1 = sdot(N,x,incx,y,incy); clock_gettime(CLOCK_MONOTONIC, &end); unsigned long long int time = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("%16f%16lld",ans1,time); return time; }
float inline cblas_sdot( const int n, const float* x, const int incx, const float* y, const int incy) { return sdot(n, x, incx, y, incy); }
int main(int argc, char *argv[]) { srand(time(0)); int n = atoi(argv[1]); int i; float *x; float *y; int incx = 1; int incy = 1; x = (float*)malloc(sizeof(float)*n); y = (float*)malloc(sizeof(float)*n); for (i = 0; i < n; i++) { x[i] = rand()/1.0/RAND_MAX - 0.5; y[i] = rand()/1.0/RAND_MAX - 0.5; } init_(); float ans1, ans2, ans3, ans4; unsigned long long int t1, t2, t3, t4; clock_gettime(CLOCK_MONOTONIC, &begin); ans1 = sdot(n, x, incx, y, incy); clock_gettime(CLOCK_MONOTONIC, &end); t1 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; clock_gettime(CLOCK_MONOTONIC, &begin); ans2 = ftsdot_(&n, x, &incx, y, &incy); clock_gettime(CLOCK_MONOTONIC, &end); t2 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; clock_gettime(CLOCK_MONOTONIC, &begin); ans3 = asm_dot(n, x, y); clock_gettime(CLOCK_MONOTONIC, &end); t3 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; clock_gettime(CLOCK_MONOTONIC, &begin); ans4 = ft_asm_dot(n, x, y); clock_gettime(CLOCK_MONOTONIC, &end); t4 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec; printf("%16f\n", ans1); printf("%16f\n", ans2); printf("%16f\n", ans3); printf("%16f\n", ans4); printf("acm%16llu\n",t1); printf("for%16llu\n",t2); printf("asm%16llu\n",t3); printf("as2%16llu\n",t4); return 0; }
//---------------------------- MOUSE HANDLING ----------------------------------- // return sector under the point or null if no sector CIGCSector *C2DMap::IsOverSector(CPoint point) { if (igcmap == NULL) return NULL; CSize sdot(SDOTSECT,SDOTSECT); POSITION pos = igcmap->cl_sectors.GetHeadPosition(); for (int i=0;i < igcmap->cl_sectors.GetCount();i++) { CIGCSector *psector; psector = &(igcmap->cl_sectors.GetNext(pos)); // should test over dot of size SDOTSECT centered CPoint cp = psector->pos2D; cp.Offset(-SDOTSECT/2,-SDOTSECT/2); CRect rect(cp,sdot); //if ((point.x == psector->pos2D.x) && (point.y == psector->pos2D.y)) if (rect.PtInRect(point)) { return psector; } } return NULL; }
/************************************************************************* * This function implements the CG solver used during the directed diffusion **************************************************************************/ void ConjGrad2(MatrixType *A, floattype *b, floattype *x, floattype tol, floattype *workspace) { int i, k, n; floattype *p, *r, *q, *z, *M; floattype alpha, beta, rho, rho_1 = -1.0, error, bnrm2, tmp; idxtype *rowptr, *colind; floattype *values; n = A->nrows; rowptr = A->rowptr; colind = A->colind; values = A->values; /* Initial Setup */ p = workspace; r = workspace + n; q = workspace + 2*n; z = workspace + 3*n; M = workspace + 4*n; for (i=0; i<n; i++) { x[i] = 0.0; if (values[rowptr[i]] != 0.0) M[i] = 1.0/values[rowptr[i]]; else M[i] = 0.0; } /* r = b - Ax */ mvMult2(A, x, r); for (i=0; i<n; i++) r[i] = b[i]-r[i]; bnrm2 = snorm2(n, b); if (bnrm2 > 0.0) { error = snorm2(n, r) / bnrm2; if (error > tol) { /* Begin Iterations */ for (k=0; k<n; k++) { for (i=0; i<n; i++) z[i] = r[i]*M[i]; rho = sdot(n, r, z); if (k == 0) scopy(n, z, p); else { if (rho_1 != 0.0) beta = rho/rho_1; else beta = 0.0; for (i=0; i<n; i++) p[i] = z[i] + beta*p[i]; } mvMult2(A, p, q); /* q = A*p */ tmp = sdot(n, p, q); if (tmp != 0.0) alpha = rho/tmp; else alpha = 0.0; saxpy(n, alpha, p, x); /* x = x + alpha*p */ saxpy(n, -alpha, q, r); /* r = r - alpha*q */ error = snorm2(n, r) / bnrm2; if (error < tol) break; rho_1 = rho; } } } }
void sqrsl (float **x, int n, int k, float *qraux, float *y, float *qy, float *qty, float *b, float *rsd, float *xb, int job, int *info) /***************************************************************************** Use the output of sqrdc to compute coordinate transformations, projections, and least squares solutions. For k <= MIN(n,p), let xk be the matrix xk = (x[jpvt[0]], x[jpvt[1]], ..., x[jpvt[k-1]]) formed from columns jpvt[0], jpvt[1], ..., jpvt[k-1] of the original n by p matrix x that was input to sqrdc. (If no pivoting was done, xk consists of the first k columns of x in their original order.) sqrdc produces a factored orthogonal matrix Q and an upper triangular matrix R such that xk = Q * (R) (0) This information is contained in coded form in the arrays x and qraux. ****************************************************************************** Input: x matrix[p][n] containing output of sqrdc. n number of rows in the matrix xk; same as in sqrdc. k number of columns in the matrix xk; k must not be greater than MIN(n,p), where p is the same as in sqrdc. qraux array[p] containing auxiliary output from sqrdc. y array[n] to be manipulated by sqrsl. job specifies what is to be computed. job has the decimal expansion ABCDE, with the following meaning: if A != 0, compute qy. if B, C, D, or E != 0, compute qty. if C != 0, compute b. if D != 0, compute rsd. if E != 0, compute xb. Note that a request to compute b, rsd, or xb automatically triggers the computation of qty, for which an array must be provided. Output: qy array[n] containing Qy, if its computation has been requested. qty array[n] containing Q'y, if its computation has been requested. Here Q' denotes the transpose of Q. b array[k] containing solution of the least squares problem: minimize norm2(y - xk*b), if its computation has been requested. (Note that if pivoting was requested in sqrdc, the j-th component of b will be associated with column jpvt[j] of the original matrix x that was input into sqrdc.) rsd array[n] containing the least squares residual y - xk*b, if its computation has been requested. rsd is also the orthogonal projection of y onto the orthogonal complement of the column space of xk. xb array[n] containing the least squares approximation xk*b, if its computation has been requested. xb is also the orthogonal projection of y onto the column space of x. info =0 unless the computation of b has been requested and R is exactly singular. In this case, info is the index of the first zero diagonal element of R and b is left unaltered. ****************************************************************************** Notes: This function was adapted from LINPACK FORTRAN. Because two-dimensional arrays cannot be declared with variable dimensions in C, the matrix x is actually a pointer to an array of pointers to floats, as declared above and used below. Elements of x are stored as follows: x[0][0] x[1][0] x[2][0] ... x[k-1][0] x[0][1] x[1][1] x[2][1] ... x[k-1][1] x[0][2] x[1][2] x[2][2] ... x[k-1][2] . . . . . . . . . . x[0][n-1] x[1][n-1] x[2][n-1] ... x[k-1][n-1] The parameters qy, qty, b, rsd, and xb are not referenced if their computation is not requested and in this case can be replaced by NULL pointers in the calling program. To save storage, the user may in some cases use the same array for different parameters in the calling sequence. A frequently occuring example is when one wishes to compute any of b, rsd, or xb and does not need y or qty. In this case one may equivalence y, qty, and one of b, rsd, or xb, while providing separate arrays for anything else that is to be computed. Thus the calling sequence sqrsl(x,n,k,qraux,y,NULL,y,b,y,NULL,110,&info) will result in the computation of b and rsd, with rsd overwriting y. More generally, each item in the following list contains groups of permissible equivalences for a single calling sequence. 1. (y,qty,b) (rsd) (xb) (qy) 2. (y,qty,rsd) (b) (xb) (qy) 3. (y,qty,xb) (b) (rsd) (qy) 4. (y,qy) (qty,b) (rsd) (xb) 5. (y,qy) (qty,rsd) (b) (xb) 6. (y,qy) (qty,xb) (b) (rsd) In any group the value returned in the array allocated to the group corresponds to the last member of the group. ****************************************************************************** Author: Dave Hale, Colorado School of Mines, 12/29/89 *****************************************************************************/ { int i,j,ju,cb,cqy,cqty,cr,cxb; float t,temp; /* set info flag */ *info = 0; /* determine what is to be computed */ cqy = job/10000!=0; cqty = job%10000!=0; cb = (job%1000)/100!=0; cr = (job%100)/10!=0; cxb = job%10!=0; ju = MIN(k,n-1); /* special action when n=1 */ if (ju==0) { if (cqy) qy[0] = y[0]; if (cqty) qty[0] = y[0]; if (cxb) xb[0] = y[0]; if (cb) { if (x[0][0]==0.0) *info = 1; else b[0] = y[0]/x[0][0]; } if (cr) rsd[0] = 0.0; return; } /* set up to compute Qy or Q'y */ if (cqy) scopy(n,y,1,qy,1); if (cqty) scopy(n,y,1,qty,1); if (cqy) { /* compute Qy */ for (j=ju-1; j>=0; j--) { if (qraux[j]!=0.0) { temp = x[j][j]; x[j][j] = qraux[j]; t = -sdot(n-j,&x[j][j],1,&qy[j],1)/x[j][j]; saxpy(n-j,t,&x[j][j],1,&qy[j],1); x[j][j] = temp; } } } if (cqty) { /* compute Q'y */ for (j=0; j<ju; j++) { if (qraux[j]!=0.0) { temp = x[j][j]; x[j][j] = qraux[j]; t = -sdot(n-j,&x[j][j],1,&qty[j],1)/x[j][j]; saxpy(n-j,t,&x[j][j],1,&qty[j],1); x[j][j] = temp; } } } /* set up to compute b, rsd, or xb */ if (cb) scopy(k,qty,1,b,1); if (cxb) scopy(k,qty,1,xb,1); if (cr && k<n) scopy(n-k,&qty[k],1,&rsd[k],1); if (cxb && k<n) for (i=k; i<n; i++) xb[i] = 0.0; if (cr) for (i=0; i<k; i++) rsd[i] = 0.0; if (cb) { /* compute b */ for (j=k-1; j>=0; j--) { if (x[j][j]==0.0) { *info = j; break; } b[j] /= x[j][j]; if (j!=0) { t = -b[j]; saxpy(j,t,x[j],1,b,1); } } } if (cr || cxb) { /* compute rsd or xb as requested */ for (j=ju-1; j>=0; j--) { if (qraux[j]!=0.0) { temp = x[j][j]; x[j][j] = qraux[j]; if (cr) { t = -sdot(n-j,&x[j][j],1,&rsd[j],1)/ x[j][j]; saxpy(n-j,t,&x[j][j],1,&rsd[j],1); } if (cxb) { t = -sdot(n-j,&x[j][j],1,&xb[j],1)/ x[j][j]; saxpy(n-j,t,&x[j][j],1,&xb[j],1); } x[j][j] = temp; } } } }
void sqrdc (float **x, int n, int p, float *qraux, int *jpvt, float *work, int job) /***************************************************************************** Use Householder transformations to compute the QR decomposition of an n by p matrix x. Column pivoting based on the 2-norms of the reduced columns may be performed at the user's option. ****************************************************************************** Input: x matrix[p][n] to decompose (see notes below) n number of rows in the matrix x p number of columns in the matrix x jpvt array[p] controlling the pivot columns (see notes below) job =0 for no pivoting; =1 for pivoting Output: x matrix[p][n] decomposed (see notes below) qraux array[p] containing information required to recover the orthogonal part of the decomposition jpvt array[p] with jpvt[k] containing the index of the original matrix that has been interchanged into the k-th column, if pivoting is requested. Workspace: work array[p] of workspace ****************************************************************************** Notes: This function was adapted from LINPACK FORTRAN. Because two-dimensional arrays cannot be declared with variable dimensions in C, the matrix x is actually a pointer to an array of pointers to floats, as declared above and used below. Elements of x are stored as follows: x[0][0] x[1][0] x[2][0] ... x[p-1][0] x[0][1] x[1][1] x[2][1] ... x[p-1][1] x[0][2] x[1][2] x[2][2] ... x[p-1][2] . . . . . . . . . . x[0][n-1] x[1][n-1] x[2][n-1] ... x[p-1][n-1] After decomposition, x contains in its upper triangular matrix R of the QR decomposition. Below its diagonal x contains information from which the orthogonal part of the decomposition can be recovered. Note that if pivoting has been requested, the decomposition is not that of the original matrix x but that of x with its columns permuted as described by jpvt. The selection of pivot columns is controlled by jpvt as follows. The k-th column x[k] of x is placed in one of three classes according to the value of jpvt[k]. if jpvt[k] > 0, then x[k] is an initial column. if jpvt[k] == 0, then x[k] is a free column. if jpvt[k] < 0, then x[k] is a final column. Before the decomposition is computed, initial columns are moved to the beginning of the array x and final columns to the end. Both initial and final columns are frozen in place during the computation and only free columns are moved. At the k-th stage of the reduction, if x[k] is occupied by a free column it is interchanged with the free column of largest reduced norm. jpvt is not referenced if job == 0. ****************************************************************************** Author: Dave Hale, Colorado School of Mines, 12/29/89 *****************************************************************************/ { int j,jp,l,lup,maxj,pl,pu,negj,swapj; float maxnrm,t,tt,ttt,nrmxl; pl = 0; pu = -1; /* if pivoting has been requested */ if (job!=0) { /* rearrange columns according to jpvt */ for (j=0; j<p; j++) { swapj = jpvt[j]>0; negj = jpvt[j]<0; jpvt[j] = j; if (negj) jpvt[j] = -j; if (swapj) { if (j!=pl) sswap(n,x[pl],1,x[j],1); jpvt[j] = jpvt[pl]; jpvt[pl] = j; pl++; } } pu = p-1; for (j=p-1; j>=0; j--) { if (jpvt[j]<0) { jpvt[j] = -jpvt[j]; if (j!=pu) { sswap(n,x[pu],1,x[j],1); jp = jpvt[pu]; jpvt[pu] = jpvt[j]; jpvt[j] = jp; } pu--; } } } /* compute the norms of the free columns */ for (j=pl; j<=pu; j++) { qraux[j] = snrm2(n,x[j],1); work[j] = qraux[j]; } /* perform the Householder reduction of x */ lup = MIN(n,p); for (l=0; l<lup; l++) { if (l>=pl && l<pu) { /* * locate the column of largest norm and * bring it into pivot position. */ maxnrm = 0.0; maxj = l; for (j=l; j<=pu; j++) { if (qraux[j]>maxnrm) { maxnrm = qraux[j]; maxj = j; } } if (maxj!=l) { sswap(n,x[l],1,x[maxj],1); qraux[maxj] = qraux[l]; work[maxj] = work[l]; jp = jpvt[maxj]; jpvt[maxj] = jpvt[l]; jpvt[l] = jp; } } qraux[l] = 0.0; if (l!=n-1) { /* * compute the Householder transformation * for column l */ nrmxl = snrm2(n-l,&x[l][l],1); if (nrmxl!=0.0) { if (x[l][l]!=0.0) nrmxl = (x[l][l]>0.0) ? ABS(nrmxl) : -ABS(nrmxl); sscal(n-l,1.0/nrmxl,&x[l][l],1); x[l][l] += 1.0; /* * apply the transformation to the remaining * columns, updating the norms */ for (j=l+1; j<p; j++) { t = -sdot(n-l,&x[l][l],1,&x[j][l],1)/ x[l][l]; saxpy(n-l,t,&x[l][l],1,&x[j][l],1); if (j>=pl && j<=pu && qraux[j]!=0.0) { tt = ABS(x[j][l])/qraux[j]; tt = 1.0-tt*tt; tt = MAX(tt,0.0); t = tt; ttt = qraux[j]/work[j]; tt = 1.0+0.05*tt*ttt*ttt; if (tt!=1.0) { qraux[j] *= sqrt(t); } else { qraux[j] = snrm2(n-l-1, &x[j][l+1],1); work[j] = qraux[j]; } } } /* save the transformation */ qraux[l] = x[l][l]; x[l][l] = -nrmxl; } } } }
void sgesl (float **a, int n, int *ipvt, float *b, int job) /***************************************************************************** solve linear system Ax = b or A'x = b after LU factorization ****************************************************************************** Input: a matrix[n][n] that has been LU factored (see notes below) n dimension of a ipvt indices of pivot permutations (see notes below) b right-hand-side vector[n] job =0 to solve Ax = b =1 to solve A'x = b Output: b solution vector[n] ****************************************************************************** Notes: This function was adapted from LINPACK FORTRAN. Because two-dimensional arrays cannot be declared with variable dimensions in C, the matrix a is actually a pointer to an array of pointers to floats, as declared above and used below. ****************************************************************************** Author: Dave Hale, Colorado School of Mines, 10/01/89 *****************************************************************************/ { int k,l,nm1; float t; nm1 = n-1; /* if solving Ax = b */ if (job==0) { /* first solve Ly = b */ for (k=0; k<nm1; k++) { l = ipvt[k]; t = b[l]; if (l!=k) { b[l] = b[k]; b[k] = t; } saxpy(n-k-1,t,&a[k][k+1],1,&b[k+1],1); } /* now solve Ux = y */ for (k=n-1; k>=0; k--) { b[k] /= a[k][k]; t = -b[k]; saxpy(k,t,a[k],1,b,1); } /* else, if solving A'x = b */ } else { /* first solve U'y = b */ for (k=0; k<n; k++) { t = sdot(k,a[k],1,b,1); b[k] = (b[k]-t)/a[k][k]; } /* now solve L'x = y */ for (k=n-2; k>=0; k--) { b[k] += sdot(n-k-1,&a[k][k+1],1,&b[k+1],1); l = ipvt[k]; if (l!=k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } }
void sgeco (float **a, int n, int *ipvt, float *rcond, float *z) /***************************************************************************** Gaussian elimination to obtain the LU factorization and condition number of a matrix. ****************************************************************************** Input: a matrix[n][n] to be factored (see notes below) n dimension of a Output: a matrix[n][n] factored (see notes below) ipvt indices of pivot permutations (see notes below) rcond reciprocal condition number (see notes below) Workspace: z array[n] ****************************************************************************** Notes: This function was adapted from LINPACK FORTRAN. Because two-dimensional arrays cannot be declared with variable dimensions in C, the matrix a is actually a pointer to an array of pointers to floats, as declared above and used below. Elements of a are stored as follows: a[0][0] a[1][0] a[2][0] ... a[n-1][0] a[0][1] a[1][1] a[2][1] ... a[n-1][1] a[0][2] a[1][2] a[2][2] ... a[n-1][2] . . . . . . . . . . a[0][n-1] a[1][n-1] a[2][n-1] ... a[n-1][n-1] Both the factored matrix a and the pivot indices ipvt are required to solve linear systems of equations via sgesl. Given the reciprocal of the condition number, rcond, and the float epsilon, FLT_EPSILON, the number of significant decimal digits, nsdd, in the solution of a linear system of equations may be estimated by: nsdd = (int)log10(rcond/FLT_EPSILON) ****************************************************************************** Author: Dave Hale, Colorado School of Mines, 10/01/89 *****************************************************************************/ { int info,j,k,kp1,l; float ek,t,wk,wkm,anorm,s,sm,ynorm; /* compute 1-norm of a */ for (j=0,anorm=0.0; j<n; j++) { t = sasum(n,a[j],1); anorm = (t>anorm)?t:anorm; } /* factor */ sgefa(a,n,ipvt,&info); /* rcond = 1/(norm(a)*(estimate of norm(inverse(a)))). * estimate = norm(z)/norm(y) where Az = y and A'y = e. * A' is the transpose of A. The components of e are * chosen to cause maximum local growth in the elements of * w where U'w = e. The vectors are frequently rescaled * to avoid overflow */ /* solve U'w = e */ ek = 1.0; for (j=0; j<n; j++) z[j] = 0.0; for (k=0; k<n; k++) { if (z[k]!=0.0) ek = (z[k]>0.0)?-ABS(ek):ABS(ek); if (ABS(ek-z[k])>ABS(a[k][k])) { s = ABS(a[k][k])/ABS(ek-z[k]); sscal(n,s,z,1); ek *= s; } wk = ek-z[k]; wkm = -ek-z[k]; s = ABS(wk); sm = ABS(wkm); if (a[k][k]==0.0) { wk = 1.0; wkm = 1.0; } else { wk = wk/a[k][k]; wkm = wkm/a[k][k]; } kp1 = k+1; if (kp1<n) { for (j=kp1; j<n; j++) { t = z[j]+wkm*a[j][k]; sm += ABS(t); z[j] += wk*a[j][k]; s += ABS(z[j]); } if (s<sm) { t = wkm-wk; wk = wkm; for (j=kp1; j<n; j++) z[j] += t*a[j][k]; } } z[k] = wk; } s = 1.0/sasum(n,z,1); sscal(n,s,z,1); /* solve L'y = w */ for (k=n-1; k>=0; k--) { if (k<n-1) z[k] += sdot(n-k-1,&a[k][k+1],1,&z[k+1],1); if (ABS(z[k])>1.0) { s = 1.0/ABS(z[k]); sscal(n,s,z,1); } l = ipvt[k]; t = z[l]; z[l] = z[k]; z[k] = t; } s = 1.0/sasum(n,z,1); sscal(n,s,z,1); ynorm = 1.0; /* solve Lv = y */ for (k=0; k<n; k++) { l = ipvt[k]; t = z[l]; z[l] = z[k]; z[k] = t; if (k<n-1) saxpy(n-k-1,t,&a[k][k+1],1,&z[k+1],1); if (ABS(z[k])>1.0) { s = 1.0/ABS(z[k]); sscal(n,s,z,1); ynorm *= s; } } s = 1.0/sasum(n,z,1); sscal(n,s,z,1); ynorm *= s; /* solve Uz = v */ for (k=n-1; k>=0; k--) { if (ABS(z[k])>ABS(a[k][k])) { s = ABS(a[k][k])/ABS(z[k]); sscal(n,s,z,1); ynorm *= s; } if (a[k][k]!=0.0) z[k] /= a[k][k]; else z[k] = 1.0; t = -z[k]; saxpy(k,t,a[k],1,z,1); } /* make znorm = 1.0 */ s = 1.0/sasum(n,z,1); sscal(n,s,z,1); ynorm *= s; if (anorm!=0.0) *rcond = ynorm/anorm; else *rcond = 0.0; }
main() { int i,n=N; printf("isamax = %d\n",isamax(n,sx,1)); printf("isamax = %d\n",isamax(n/2,sx,2)); printf("isamax = %d\n",isamax(n,sy,1)); printf("sasum = %g\n",sasum(n,sx,1)); printf("sasum = %g\n",sasum(n/2,sx,2)); printf("sasum = %g\n",sasum(n,sy,1)); printf("snrm2 = %g\n",snrm2(n,sx,1)); printf("snrm2 = %g\n",snrm2(n/2,sx,2)); printf("snrm2 = %g\n",snrm2(n,sy,1)); printf("sdot = %g\n",sdot(n,sx,1,sy,1)); printf("sdot = %g\n",sdot(n/2,sx,2,sy,2)); printf("sdot = %g\n",sdot(n/2,sx,-2,sy,2)); printf("sdot = %g\n",sdot(n,sy,1,sy,1)); printf("sscal\n"); sscal(n,2.0,sx,1); pvec(n,sx); sscal(n,0.5,sx,1); pvec(n,sx); sscal(n/2,2.0,sx,2); pvec(n,sx); sscal(n/2,0.5,sx,2); pvec(n,sx); printf("sswap\n"); sswap(n,sx,1,sy,1); pvec(n,sx); pvec(n,sy); sswap(n,sy,1,sx,1); pvec(n,sx); pvec(n,sy); sswap(n/2,sx,1,sx+n/2,-1); pvec(n,sx); sswap(n/2,sx,1,sx+n/2,-1); pvec(n,sx); sswap(n/2,sx,2,sy,2); pvec(n,sx); pvec(n,sy); sswap(n/2,sx,2,sy,2); pvec(n,sx); pvec(n,sy); printf("saxpy\n"); saxpy(n,2.0,sx,1,sy,1); pvec(n,sx); pvec(n,sy); saxpy(n,-2.0,sx,1,sy,1); pvec(n,sx); pvec(n,sy); saxpy(n/2,2.0,sx,2,sy,2); pvec(n,sx); pvec(n,sy); saxpy(n/2,-2.0,sx,2,sy,2); pvec(n,sx); pvec(n,sy); saxpy(n/2,2.0,sx,-2,sy,1); pvec(n,sx); pvec(n,sy); saxpy(n/2,-2.0,sx,-2,sy,1); pvec(n,sx); pvec(n,sy); printf("scopy\n"); scopy(n/2,sx,2,sy,2); pvec(n,sx); pvec(n,sy); scopy(n/2,sx+1,2,sy+1,2); pvec(n,sx); pvec(n,sy); scopy(n/2,sx,2,sy,1); pvec(n,sx); pvec(n,sy); scopy(n/2,sx+1,-2,sy+n/2,-1); pvec(n,sx); pvec(n,sy); }
void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job ) /******************************************************************************/ /* Purpose: SGESL solves a real general linear system A * X = B. Discussion: SGESL can solve either of the systems A * X = B or A' * X = B. The system matrix must have been factored by SGECO or SGEFA. A division by zero will occur if the input factor contains a zero on the diagonal. Technically this indicates singularity but it is often caused by improper arguments or improper setting of LDA. It will not occur if the subroutines are called correctly and if SGECO has set 0.0 < RCOND or SGEFA has set INFO == 0. Modified: 04 April 2006 Author: FORTRAN77 original by Dongarra, Moler, Bunch and Stewart. C translation by John Burkardt. Reference: Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart, LINPACK User's Guide, SIAM, (Society for Industrial and Applied Mathematics), 3600 University City Science Center, Philadelphia, PA, 19104-2688. ISBN: 0-89871-172-X Parameters: Input, float A[LDA*N], the output from SGECO or SGEFA. Input, int LDA, the leading dimension of A. Input, int N, the order of the matrix A. Input, int IPVT[N], the pivot vector from SGECO or SGEFA. Input/output, float B[N]. On input, the right hand side vector. On output, the solution vector. Input, int JOB. 0, solve A * X = B; nonzero, solve A' * X = B. */ { int k; int l; float t; /* Solve A * X = B. */ if ( job == 0 ) { for ( k = 1; k <= n-1; k++ ) { l = ipvt[k-1]; t = b[l-1]; if ( l != k ) { b[l-1] = b[k-1]; b[k-1] = t; } saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 ); } for ( k = n; 1 <= k; k-- ) { b[k-1] = b[k-1] / a[k-1+(k-1)*lda]; t = -b[k-1]; saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 ); } } /* Solve A' * X = B. */ else { for ( k = 1; k <= n; k++ ) { t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 ); b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda]; } for ( k = n-1; 1 <= k; k-- ) { b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 ); l = ipvt[k-1]; if ( l != k ) { t = b[l-1]; b[l-1] = b[k-1]; b[k-1] = t; } } } return; }
/* Kabsch alignment */ void kabsch_alignment( std::vector<float> ref, std::vector<float> tar, t_tiltdata &data, gmx_bool bVerbose) { if (ref.size() != tar.size()) { std::cerr << "\nError! Sizes of reference coordinate matrix and simulated structure coordinate matrices do not match!" << std::endl; std::exit(1); } int ncoords = ref.size(); int natoms = ncoords/3; // Center the two selections std::vector<float> stsel1(ncoords,0), stsel2(ncoords,0), stsel2T(ncoords,0); std::vector<float> ref_com(3,0), tar_com(3,0); average_coordinate(ref, ref_com); average_coordinate(tar, tar_com); for (int i=0; i<natoms; i++) { for (int j=0; j<3; j++) { stsel1[i+j*natoms] = ref[i+j*natoms] - ref_com[j]; stsel2[i+j*natoms] = tar[i+j*natoms] - tar_com[j]; } } // Initial residual float E0 = sdot(ncoords,&stsel1[0],1,&stsel1[0],1)+sdot(ncoords,&stsel2[0],1,&stsel2[0],1) ; // dot(target_transpose,reference) std::vector<float> T1_dot_2(3*natoms,0); sgemm('T','N',3,natoms,natoms,1,&stsel2[0],natoms,&stsel1[0],natoms,1,&T1_dot_2[0],3); // SVD of the dot product std::vector<float> U(9,0), S(3,0), V(9,0), work(5*9,0); int info; sgesvd('A','A',3,3,&T1_dot_2[0],3,&S[0],&U[0],3,&V[0],3,&work[0],9*5,info); /*std::cout << "\n S: "; for (int i=0;i<3;i++) { std::cout << S[i] << " "; } std::cout << "\n U: "; for (int i=0;i<9;i++) { std::cout << U[i] << " "; }*/ float reflect = det3x3(&U[0]) * det3x3(&V[0]); if ( 1 - reflect > 1e-5) { S[2] = -S[2]; U[6] = -U[6]; U[7] = -U[7]; U[8] = -U[8]; } float rmsd = sqrt(fabs( E0 - (2.0 * (S[0]+S[1]+S[2]) ) ) /natoms); // Rotation matrix is dot(U,V) std::vector<float> M(9,0); sgemm('N','N',3,3,3,1,&U[0],3,&V[0],3,1,&M[0],3); /* M = [ 0 3 6 ] = [ 00 01 02 ] [ 1 4 7 ] [ 10 11 12 ] [ 2 5 8 ] [ 20 21 22 ] */ float trace = M[0]+M[4]+M[8]; float angle = acos((trace-1)/2)*RAD2DEG; float rx,ry,rz,ux,uy,uz; rx = atan2(M[5],M[8])*RAD2DEG; ry = atan2(-M[2],sqrt(M[5]*M[5]+M[8]*M[8]))*RAD2DEG; rz = atan2(M[1],M[0])*RAD2DEG; float zeta = sqrt( (M[5]-M[7])*(M[5]-M[7]) + (M[6]-M[2])*(M[6]-M[2]) + (M[3]-M[1])*(M[3]-M[1]) ); //std::cout << "\n" << M[5] << " - " << M[7] << " = " << M[5]-M[7]; //std::cout << "\n" << M[6] << " - " << M[2] << " = " << M[6]-M[2]; //std::cout << "\n" << M[3] << " - " << M[1] << " = " << M[3]-M[1] << std::endl; ux = (M[5]-M[7])/zeta; uy = (M[6]-M[2])/zeta; uz = (M[3]-M[1])/zeta; //std::cout << zeta << " { " << ux << " " << uy << " " << uz << " }" << sqrt(ux*ux+uy*uy+uz*uz) << std:: endl; if (bVerbose) { fprintf(stdout,"%12s%12s%12s%12s%12s%12s%12s%12s\n","Angle(deg)","rmsd(nm)","x(deg)","y(deg)","z(deg)","ux(nm)","uy(nm)","uz(nm)"); fprintf(stdout,"%12.3f%12.6f%12.4f%12.4f%12.4f%12.4f%12.4f%12.4f\n",angle,rmsd,rx,ry,rz,ux,uy,uz); } data.rotation.push_back(angle); data.rmsd.push_back(rmsd); data.x_rotation.push_back(rx); data.y_rotation.push_back(ry); data.z_rotation.push_back(rz); data.x_rotation_axis.push_back(ux); data.y_rotation_axis.push_back(uy); data.z_rotation_axis.push_back(uz); return; }