/* sconv is a general purpose convolution routine using sunperf BLAS function
sdot for speed.  

arguments:

	in - input vector of function to be convolved with
	nin  - length of in
	filter - vector of filter coefficients to convolve into in
	nfilter - length of filter
	ioff - number of samples to skip into trace before starting
		convolution operation (i.e. left side of integral of
		first sample will start here. )
	decfac - decimation factor.  For use in decimation can make
		this > 1 and routine will only hit every decfac values.
		This is much more efficient than decimation after the fact.
		For normal convolution, set to 1.
	out - output vector  (WARNING:  this vector must be alloced large
		enough to hold complete output.  No check is made to 
		verify if it's length is sufficiently large to hold output vec.
	nout - actual number of samples in out (output)

Returns:

	normal return is 0.  -1 for illegal inputs with no output.  This
condition must be trapped as the routine produces null results.
A positive result is returned if ioff is passed as a negative value.
The result can still be used when this happens, but the time of the
first sample may not be what the calling program believes.  This condition
should be trapped and the time of first output sample in out increased
by ret_code*dt where dt is the sample interval of the in trace. 
 

Author:  Gary L. Pavlis
Written:  July 21, 1998
*/
int sconv(float *in, int nin, float *filter, int nfilter,
		int ioff, int decfac,
		float *out, int *nout)
{
	int i,j,iend;
	int ret_code=0;

	/* First check for bad parameters and correct if possible */
	if(nfilter > nin)
	{
		register_error(0,"sconv:  filter length (%d) longer than input time series (%d)\n",nfilter,nin);
		return(-1);
	}
	if(ioff < 0)
	{
		register_error(0,"sconv:  illegal offset value = %d set to 0\n",
			ioff);
		ret_code = -ioff;
		ioff = 0;
	}
	/* elegance of blas shows here */
	for(i=ioff,j=0,iend=ioff+nfilter,*nout=0;
		iend<nin;
		i += decfac,++j,iend += decfac)
	{
		out[j] = (float)sdot(nfilter,in+i,1,filter,1);
	}
	*nout = j;
	return(ret_code);
}
Exemple #2
0
void spofa(float *a,long lda,long n,long *info)
/*
     SPOFA FACTORS A REAL SYMMETRIC POSITIVE DEFINITE MATRIX.
     SPOFA IS USUALLY CALLED BY SPOCO, BUT IT CAN BE CALLED
     DIRECTLY WITH A SAVING IN TIME IF  RCOND  IS NOT NEEDED.
     (TIME FOR SPOCO) = (1 + 18/N)*(TIME FOR SPOFA) .
     ON ENTRY
        A       REAL(LDA, N)
                THE SYMMETRIC MATRIX TO BE FACTORED.  ONLY THE
                DIAGONAL AND UPPER TRIANGLE ARE USED.
        LDA     INTEGER
                THE LEADING DIMENSION OF THE ARRAY  A .
        N       INTEGER
                THE ORDER OF THE MATRIX  A .
     ON RETURN
        A       AN UPPER TRIANGULAR MATRIX  R  SO THAT  A = TRANS(R)*R
                WHERE  TRANS(R)  IS THE TRANSPOSE.
                THE STRICT LOWER TRIANGLE IS UNALTERED.
                IF  INFO .NE. 0 , THE FACTORIZATION IS NOT COMPLETE.
        INFO    INTEGER
                = 0  FOR NORMAL RETURN.
                = K  SIGNALS AN ERROR CONDITION.  THE LEADING MINOR
                     OF ORDER  K  IS NOT POSITIVE DEFINITE.
     LINPACK.  THIS VERSION DATED 08/14/78 .
     CLEVE MOLER, UNIVERSITY OF NEW MEXICO, ARGONNE NATIONAL LAB.
     SUBROUTINES AND FUNCTIONS
     BLAS SDOT
     FORTRAN SQRT
     INTERNAL VARIABLES
*/
{
extern float sdot(long n,float *sx,long incx,float *sy,long incy);
static long j,jm1,k;
static float t,s;
/*
     BEGIN BLOCK WITH ...EXITS TO 40
*/
    for(j=1; j<=n; j++) {
        *info = j;
        s = 0.0;
        jm1 = j-1;
        if(jm1 < 1) goto S20;
        for(k=0; k<jm1; k++) {
            t = *(a+k+(j-1)*lda)-sdot(k,(a+k*lda),1L,(a+(j-1)*lda),1L);
            t /=  *(a+k+k*lda);
            *(a+k+(j-1)*lda) = t;
            s += (t*t);
        }
S20:
        s = *(a+j-1+(j-1)*lda)-s;
/*
     ......EXIT
*/
        if(s <= 0.0) goto S40;
        *(a+j-1+(j-1)*lda) = sqrt(s);
    }
    *info = 0;
S40:
    return;
}
/* This routine computes the pure pseudoinverse from the svd returned
by svdcmp (U*S*V^T) so Agi = V*S^-1*U^T.  The algorithm used is a
little overly tricky using an internally allocated work vector to 
make the routine nondestructive to the input matrices.  This computes
the "pure" pseudoinverse by setting the svd cutoff value based on
float epsilon (from float.h).

Note input U is mxn, s is an n vector, V is nxn, and the output
Agi is nxm.  

Function returns the number of singular values actually used to 
compute Agi.  

Author:  Gary L. Pavlis
*/
int pseudoinverse(float **U, float *s, float **V, int m, int n, float **Agi)
{
	int i,j, k;  /* counters*/
	float *work;  /* work space */
	float smax;
	float sinv;
	double sv_cutoff;
	int nsv_used;
#ifndef SUNPERF
	int one=1;
#endif


        if((work=(float *)calloc(n,sizeof(float))) == NULL)
                elog_die(1,"Pseudoinverse computation: cannot alloc work array of length %d\n",
                        n);

	/* first find the larges singular value, then just zero
	all those smaller than the cutoff determined as the ratio
	wrt to largest singular value */
	smax = 0.0;
	for(i=0;i<n;++i) 
		if(s[i] > smax) smax = s[i];
	sv_cutoff = (double)smax*FLT_EPSILON;
	/* This is a copy operation */
	for(i=0;i<m;++i)
		for(j=0;j<n;++j) Agi[j][i] = U[i][j];
	/* this works because of C storage order, but is strange.
	It is the multiply by S^-1 */
	for(j=0,nsv_used=0;j<n;++j)
	{
		if( (double)s[j] > sv_cutoff)
		{
			sinv = 1.0/s[j];
			++nsv_used;
		}
		else
			sinv = 0.0;
#ifdef SUNPERF
		sscal(m,sinv,Agi[j],1);
#else
		sscal_(&m,&sinv,Agi[j],&one);
#endif
	}
	/* multiply by V using a column work vector*/
	for(j=0;j<m;++j)
	{
		for(k=0;k<n;++k) work[k] = Agi[k][j];
		for(i=0;i<n;++i)
#ifdef SUNPERF
			Agi[i][j] = sdot(n,work,1,V[i],1);
#else
			Agi[i][j] = sdot_(&n,work,&one,V[i],&one);
#endif
	}
	free(work);
	return(nsv_used);
}
Exemple #4
0
unsigned long long int acml_time()
{ 
  clock_gettime(CLOCK_MONOTONIC, &begin);
  ans1 = sdot(N,x,incx,y,incy);
  clock_gettime(CLOCK_MONOTONIC, &end);
  unsigned long long int time = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec;
  printf("%16f%16lld",ans1,time);
  return time;
}
float inline
cblas_sdot(
  const int    n,
  const float* x,
  const int    incx,
  const float* y,
  const int    incy)
{
  return sdot(n, x, incx, y, incy);
}
Exemple #6
0
int main(int argc, char *argv[])
{
    srand(time(0));
    int n = atoi(argv[1]);
    int i;
    float *x;
    float *y;
    int incx = 1;
    int incy = 1;
    x = (float*)malloc(sizeof(float)*n);
    y = (float*)malloc(sizeof(float)*n);
    for (i = 0; i < n; i++)
    {
        x[i] = rand()/1.0/RAND_MAX - 0.5;
        y[i] = rand()/1.0/RAND_MAX - 0.5;
    }

	init_();

    float ans1, ans2, ans3, ans4;
	unsigned long long int t1, t2, t3, t4;

	clock_gettime(CLOCK_MONOTONIC, &begin);
	ans1 = sdot(n, x, incx, y, incy);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t1 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec;

	clock_gettime(CLOCK_MONOTONIC, &begin);
    ans2 = ftsdot_(&n, x, &incx, y, &incy);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t2 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec;

   	clock_gettime(CLOCK_MONOTONIC, &begin);
	ans3 = asm_dot(n, x, y);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t3 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec;

   	clock_gettime(CLOCK_MONOTONIC, &begin);
	ans4 = ft_asm_dot(n, x, y);
	clock_gettime(CLOCK_MONOTONIC, &end);
	t4 = 1000000000L*(end.tv_sec - begin.tv_sec) + end.tv_nsec - begin.tv_nsec;


	printf("%16f\n", ans1);
    printf("%16f\n", ans2);
    printf("%16f\n", ans3);
    printf("%16f\n", ans4);
	printf("acm%16llu\n",t1);
	printf("for%16llu\n",t2);
	printf("asm%16llu\n",t3);
	printf("as2%16llu\n",t4);
    return 0;
}
Exemple #7
0
//---------------------------- MOUSE HANDLING -----------------------------------
// return sector under the point or null if no sector
CIGCSector *C2DMap::IsOverSector(CPoint point)
{
	if (igcmap == NULL) return NULL;
	CSize sdot(SDOTSECT,SDOTSECT);
	POSITION pos = igcmap->cl_sectors.GetHeadPosition();
	for (int i=0;i < igcmap->cl_sectors.GetCount();i++)
	{
		CIGCSector *psector;
		psector = &(igcmap->cl_sectors.GetNext(pos));
		// should test over dot  of size SDOTSECT centered
		CPoint cp = psector->pos2D;
		cp.Offset(-SDOTSECT/2,-SDOTSECT/2);
		CRect rect(cp,sdot);

		//if ((point.x == psector->pos2D.x) && (point.y == psector->pos2D.y))
		if (rect.PtInRect(point))
		{
			return psector;
		}
	}
	return NULL;
}
Exemple #8
0
/*************************************************************************
* This function implements the CG solver used during the directed diffusion
**************************************************************************/
void ConjGrad2(MatrixType *A, floattype *b, floattype *x, floattype tol, floattype *workspace)
{
  int i, k, n;
  floattype *p, *r, *q, *z, *M;
  floattype alpha, beta, rho, rho_1 = -1.0, error, bnrm2, tmp;
  idxtype *rowptr, *colind;
  floattype *values;

  n = A->nrows;
  rowptr = A->rowptr;
  colind = A->colind;
  values = A->values;

  /* Initial Setup */
  p = workspace;
  r = workspace + n;
  q = workspace + 2*n;
  z = workspace + 3*n;
  M = workspace + 4*n;

  for (i=0; i<n; i++) {
    x[i] = 0.0;
    if (values[rowptr[i]] != 0.0)
      M[i] = 1.0/values[rowptr[i]];
    else
      M[i] = 0.0;
  }

  /* r = b - Ax */
  mvMult2(A, x, r);
  for (i=0; i<n; i++)
    r[i] = b[i]-r[i];

  bnrm2 = snorm2(n, b);
  if (bnrm2 > 0.0) {
    error = snorm2(n, r) / bnrm2;

    if (error > tol) {
      /* Begin Iterations */
      for (k=0; k<n; k++) {
        for (i=0; i<n; i++)
          z[i] = r[i]*M[i];

        rho = sdot(n, r, z);

        if (k == 0)
          scopy(n, z, p);
        else {
          if (rho_1 != 0.0)
            beta = rho/rho_1;
          else
            beta = 0.0;
          for (i=0; i<n; i++)
            p[i] = z[i] + beta*p[i];
        }

        mvMult2(A, p, q); /* q = A*p */

        tmp = sdot(n, p, q);
        if (tmp != 0.0)
          alpha = rho/tmp;
        else
          alpha = 0.0;
        saxpy(n, alpha, p, x);    /* x = x + alpha*p */
        saxpy(n, -alpha, q, r);   /* r = r - alpha*q */
        error = snorm2(n, r) / bnrm2;
        if (error < tol)
          break;

        rho_1 = rho;
      }
    }
  }
}
Exemple #9
0
void
sqrsl (float **x, int n, int k, float *qraux,
	float *y, float *qy, float *qty,
	float *b, float *rsd, float *xb, int job, int *info)
/*****************************************************************************
Use the output of sqrdc to compute coordinate transformations, projections,
and least squares solutions.  For k <= MIN(n,p), let xk be the matrix
	xk = (x[jpvt[0]], x[jpvt[1]], ..., x[jpvt[k-1]])
formed from columns jpvt[0], jpvt[1], ..., jpvt[k-1] of the original
n by p matrix x that was input to sqrdc.  (If no pivoting was done, xk
consists of the first k columns of x in their original order.)  sqrdc
produces a factored orthogonal matrix Q and an upper triangular matrix R
such that
	xk = Q * (R)
	         (0)
This information is contained in coded form in the arrays x and qraux.
******************************************************************************
Input:
x		matrix[p][n] containing output of sqrdc.
n		number of rows in the matrix xk; same as in sqrdc.
k		number of columns in the matrix xk; k must not be greater
		than MIN(n,p), where p is the same as in sqrdc.
qraux		array[p] containing auxiliary output from sqrdc.
y		array[n] to be manipulated by sqrsl.
job		specifies what is to be computed.  job has the decimal
		expansion ABCDE, with the following meaning:
		if A != 0, compute qy.
		if B, C, D, or E != 0, compute qty.
		if C != 0, compute b.
		if D != 0, compute rsd.
		if E != 0, compute xb.
		Note that a request to compute b, rsd, or xb automatically
		triggers the computation of qty, for which an array must
		be provided.

Output:
qy		array[n] containing Qy, if its computation has been
		requested.
qty		array[n] containing Q'y, if its computation has
		been requested.  Here Q' denotes the transpose of Q.
b		array[k] containing solution of the least squares problem:
			minimize norm2(y - xk*b),
		if its computation has been requested.  (Note that if
		pivoting was requested in sqrdc, the j-th component of
		b will be associated with column jpvt[j] of the original
		matrix x that was input into sqrdc.)
rsd		array[n] containing the least squares residual y - xk*b,
		if its computation has been requested.  rsd is also the
		orthogonal projection of y onto the orthogonal complement
		of the column space of xk.
xb		array[n] containing the least squares approximation xk*b,
		if its computation has been requested.  xb is also the
		orthogonal projection of y onto the column space of x.
info		=0 unless the computation of b has been requested and R
		is exactly singular.  In this case, info is the index of
		the first zero diagonal element of R and b is left
		unaltered.
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix x
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of x are stored as follows:
x[0][0]    x[1][0]    x[2][0]   ... x[k-1][0]
x[0][1]    x[1][1]    x[2][1]   ... x[k-1][1]
x[0][2]    x[1][2]    x[2][2]   ... x[k-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
x[0][n-1]  x[1][n-1]  x[2][n-1] ... x[k-1][n-1]

The parameters qy, qty, b, rsd, and xb are not referenced if their
computation is not requested and in this case can be replaced by NULL
pointers in the calling program.  To save storage, the user may in
some cases use the same array for different parameters in the calling
sequence.  A frequently occuring example is when one wishes to compute
any of b, rsd, or xb and does not need y or qty.  In this case one may
equivalence y, qty, and one of b, rsd, or xb, while providing separate
arrays for anything else that is to be computed.  Thus the calling
sequence
	sqrsl(x,n,k,qraux,y,NULL,y,b,y,NULL,110,&info)
will result in the computation of b and rsd, with rsd overwriting y.
More generally, each item in the following list contains groups of
permissible equivalences for a single calling sequence.
	1. (y,qty,b) (rsd) (xb) (qy)
	2. (y,qty,rsd) (b) (xb) (qy)
	3. (y,qty,xb) (b) (rsd) (qy)
	4. (y,qy) (qty,b) (rsd) (xb)
	5. (y,qy) (qty,rsd) (b) (xb)
	6. (y,qy) (qty,xb) (b) (rsd)
In any group the value returned in the array allocated to the group
corresponds to the last member of the group.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 12/29/89
*****************************************************************************/
{
	int i,j,ju,cb,cqy,cqty,cr,cxb;
	float t,temp;
	
	/* set info flag */
	*info = 0;
	
	/* determine what is to be computed */
	cqy = job/10000!=0;
	cqty = job%10000!=0;
	cb = (job%1000)/100!=0;
	cr = (job%100)/10!=0;
	cxb = job%10!=0;
	ju = MIN(k,n-1);
	
	/* special action when n=1 */
	if (ju==0) {
		if (cqy) qy[0] = y[0];
		if (cqty) qty[0] = y[0];
		if (cxb) xb[0] = y[0];
		if (cb) {
			if (x[0][0]==0.0)
				*info = 1;
			else
				b[0] = y[0]/x[0][0];
		}
		if (cr) rsd[0] = 0.0;
		return;
	}
	
	/* set up to compute Qy or Q'y */
	if (cqy) scopy(n,y,1,qy,1);
	if (cqty) scopy(n,y,1,qty,1);
	if (cqy) {
	
		/* compute Qy */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qy[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qy[j],1);
				x[j][j] = temp;
			}
		}
	}
	if (cqty) {
		
		/* compute Q'y */
		for (j=0; j<ju; j++) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qty[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qty[j],1);
				x[j][j] = temp;
			}
		}
	}
	
	/* set up to compute b, rsd, or xb */
	if (cb) scopy(k,qty,1,b,1);
	if (cxb) scopy(k,qty,1,xb,1);
	if (cr && k<n) scopy(n-k,&qty[k],1,&rsd[k],1);
	if (cxb && k<n)
		for (i=k; i<n; i++)
			xb[i] = 0.0;
	if (cr)
		for (i=0; i<k; i++)
			rsd[i] = 0.0;
	if (cb) {
	
		/* compute b */
		for (j=k-1; j>=0; j--) {
			if (x[j][j]==0.0) {
				*info = j;
				break;
			}
			b[j] /= x[j][j];
			if (j!=0) {
				t = -b[j];
				saxpy(j,t,x[j],1,b,1);
			}
		}
	}
	if (cr || cxb) {
	
		/* compute rsd or xb as requested */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				if (cr) {
					t = -sdot(n-j,&x[j][j],1,&rsd[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&rsd[j],1);
				}
				if (cxb) {
					t = -sdot(n-j,&x[j][j],1,&xb[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&xb[j],1);
				}
				x[j][j] = temp;
			}
		}
	}
}
Exemple #10
0
void
sqrdc (float **x, int n, int p, float *qraux, int *jpvt,
	float *work, int job)
/*****************************************************************************
Use Householder transformations to compute the QR decomposition of an n by p
matrix x.  Column pivoting based on the 2-norms of the reduced columns may be
performed at the user's option.
******************************************************************************
Input:
x		matrix[p][n] to decompose (see notes below)
n		number of rows in the matrix x
p		number of columns in the matrix x
jpvt		array[p] controlling the pivot columns (see notes below)
job		=0 for no pivoting;
		=1 for pivoting

Output:
x		matrix[p][n] decomposed (see notes below)
qraux		array[p] containing information required to recover the
		orthogonal part of the decomposition
jpvt		array[p] with jpvt[k] containing the index of the original
		matrix that has been interchanged into the k-th column, if
		pivoting is requested.

Workspace:
work		array[p] of workspace
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix x
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of x are stored as follows:
x[0][0]    x[1][0]    x[2][0]   ... x[p-1][0]
x[0][1]    x[1][1]    x[2][1]   ... x[p-1][1]
x[0][2]    x[1][2]    x[2][2]   ... x[p-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
x[0][n-1]  x[1][n-1]  x[2][n-1] ... x[p-1][n-1]

After decomposition, x contains in its upper triangular matrix R of the QR
decomposition.  Below its diagonal x contains information from which the
orthogonal part of the decomposition can be recovered.  Note that if
pivoting has been requested, the decomposition is not that of the original
matrix x but that of x with its columns permuted as described by jpvt.

The selection of pivot columns is controlled by jpvt as follows.
The k-th column x[k] of x is placed in one of three classes according to
the value of jpvt[k].
	if jpvt[k] >  0, then x[k] is an initial column.
	if jpvt[k] == 0, then x[k] is a free column.
	if jpvt[k] <  0, then x[k] is a final column.
Before the decomposition is computed, initial columns are moved to the
beginning of the array x and final columns to the end.  Both initial and
final columns are frozen in place during the computation and only free
columns are moved.  At the k-th stage of the reduction, if x[k] is occupied
by a free column it is interchanged with the free column of largest reduced
norm.  jpvt is not referenced if job == 0.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 12/29/89
*****************************************************************************/
{
	int j,jp,l,lup,maxj,pl,pu,negj,swapj;
	float maxnrm,t,tt,ttt,nrmxl;
	
	pl = 0;
	pu = -1;
	
	/* if pivoting has been requested */
	if (job!=0) {
		
		/* rearrange columns according to jpvt */
		for (j=0; j<p; j++) {
			swapj = jpvt[j]>0;
			negj = jpvt[j]<0;
			jpvt[j] = j;
			if (negj) jpvt[j] = -j;
			if (swapj) {
				if (j!=pl) sswap(n,x[pl],1,x[j],1);
				jpvt[j] = jpvt[pl];
				jpvt[pl] = j;
				pl++;
			}
		}
		pu = p-1;
		for (j=p-1; j>=0; j--) {
			if (jpvt[j]<0) {
				jpvt[j] = -jpvt[j];
				if (j!=pu) {
					sswap(n,x[pu],1,x[j],1);
					jp = jpvt[pu];
					jpvt[pu] = jpvt[j];
					jpvt[j] = jp;
				}
				pu--;
			}
		}
	}
	
	/* compute the norms of the free columns */
	for (j=pl; j<=pu; j++) {
		qraux[j] = snrm2(n,x[j],1);
		work[j] = qraux[j];
	}
	
	/* perform the Householder reduction of x */
	lup = MIN(n,p);
	for (l=0; l<lup; l++) {
		if (l>=pl && l<pu) {
			
			/* 
			 * locate the column of largest norm and
			 * bring it into pivot position.
			 */
			maxnrm = 0.0;
			maxj = l;
			for (j=l; j<=pu; j++) {
			 	if (qraux[j]>maxnrm) {
					maxnrm = qraux[j];
					maxj = j;
				}
			}
			if (maxj!=l) {
				sswap(n,x[l],1,x[maxj],1);
				qraux[maxj] = qraux[l];
				work[maxj] = work[l];
				jp = jpvt[maxj];
				jpvt[maxj] = jpvt[l];
				jpvt[l] = jp;
			}
		}
		qraux[l] = 0.0;
		if (l!=n-1) {
		
			/*
			 * compute the Householder transformation
			 * for column l
			 */
			nrmxl = snrm2(n-l,&x[l][l],1);
			if (nrmxl!=0.0) {
				if (x[l][l]!=0.0)
					nrmxl = (x[l][l]>0.0) ?
						ABS(nrmxl) :
						-ABS(nrmxl);
				sscal(n-l,1.0/nrmxl,&x[l][l],1);
				x[l][l] += 1.0;
				
				/*
				 * apply the transformation to the remaining
				 * columns, updating the norms
				 */
				 for (j=l+1; j<p; j++) {
					 t = -sdot(n-l,&x[l][l],1,&x[j][l],1)/
					 	x[l][l];
					saxpy(n-l,t,&x[l][l],1,&x[j][l],1);
					if (j>=pl && j<=pu && qraux[j]!=0.0) {
						tt = ABS(x[j][l])/qraux[j];
						tt = 1.0-tt*tt;
						tt = MAX(tt,0.0);
						t = tt;
						ttt = qraux[j]/work[j];
						tt = 1.0+0.05*tt*ttt*ttt;
						if (tt!=1.0) {
							qraux[j] *= sqrt(t);
						} else {
							qraux[j] = snrm2(n-l-1,
								&x[j][l+1],1);
							work[j] = qraux[j];
						}
					}
				}
				
				/* save the transformation */
				qraux[l] = x[l][l];
				x[l][l] = -nrmxl;
			}
		}
	}
}	
Exemple #11
0
void
sgesl (float **a, int n, int *ipvt, float *b, int job)
/*****************************************************************************
solve linear system Ax = b or A'x = b after LU factorization
******************************************************************************
Input:
a		matrix[n][n] that has been LU factored (see notes below)
n		dimension of a
ipvt		indices of pivot permutations (see notes below)
b		right-hand-side vector[n]
job		=0 to solve Ax = b
		=1 to solve A'x = b

Output:
b		solution vector[n]
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix a
is actually a pointer to an array of pointers to floats, as declared
above and used below.
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 10/01/89
*****************************************************************************/
{
	int k,l,nm1;
	float t;

	nm1 = n-1;

	/* if solving Ax = b */
	if (job==0) {

		/* first solve Ly = b */
		for (k=0; k<nm1; k++) {
			l = ipvt[k];
			t = b[l];
			if (l!=k) {
				b[l] = b[k];
				b[k] = t;
			}
			saxpy(n-k-1,t,&a[k][k+1],1,&b[k+1],1);
		}

		/* now solve Ux = y */
		for (k=n-1; k>=0; k--) {
			b[k] /= a[k][k];
			t = -b[k];
			saxpy(k,t,a[k],1,b,1);
		}

	/* else, if solving A'x = b */
	} else {

		/* first solve U'y = b */
		for (k=0; k<n; k++) {
			t = sdot(k,a[k],1,b,1);
			b[k] = (b[k]-t)/a[k][k];
		}

		/* now solve L'x = y */
		for (k=n-2; k>=0; k--) {
			b[k] += sdot(n-k-1,&a[k][k+1],1,&b[k+1],1);
			l = ipvt[k];
			if (l!=k) {
				t = b[l];
				b[l] = b[k];
				b[k] = t;
			}
		}
	}
}
Exemple #12
0
void
sgeco (float **a, int n, int *ipvt, float *rcond, float *z)
/*****************************************************************************
Gaussian elimination to obtain the LU factorization and
condition number of a matrix.
******************************************************************************
Input:
a		matrix[n][n] to be factored (see notes below)
n		dimension of a

Output:
a		matrix[n][n] factored (see notes below)
ipvt		indices of pivot permutations (see notes below)
rcond		reciprocal condition number (see notes below)

Workspace:
z		array[n]
******************************************************************************
Notes:
This function was adapted from LINPACK FORTRAN.  Because two-dimensional 
arrays cannot be declared with variable dimensions in C, the matrix a
is actually a pointer to an array of pointers to floats, as declared
above and used below.

Elements of a are stored as follows:
a[0][0]    a[1][0]    a[2][0]   ... a[n-1][0]
a[0][1]    a[1][1]    a[2][1]   ... a[n-1][1]
a[0][2]    a[1][2]    a[2][2]   ... a[n-1][2]
.                                       .
.             .                         .
.                        .              .
.                                       .
a[0][n-1]  a[1][n-1]  a[2][n-1] ... a[n-1][n-1]

Both the factored matrix a and the pivot indices ipvt are required
to solve linear systems of equations via sgesl.

Given the reciprocal of the condition number, rcond, and the float
epsilon, FLT_EPSILON, the number of significant decimal digits, nsdd,
in the solution of a linear system of equations may be estimated by:
	nsdd = (int)log10(rcond/FLT_EPSILON)
******************************************************************************
Author:  Dave Hale, Colorado School of Mines, 10/01/89
*****************************************************************************/
{
	int info,j,k,kp1,l;
	float ek,t,wk,wkm,anorm,s,sm,ynorm;

	/* compute 1-norm of a */
	for (j=0,anorm=0.0; j<n; j++) {
		t = sasum(n,a[j],1);
		anorm = (t>anorm)?t:anorm;
	}

	/* factor */
	sgefa(a,n,ipvt,&info);

	/* rcond = 1/(norm(a)*(estimate of norm(inverse(a)))).
	 * estimate = norm(z)/norm(y) where Az = y and A'y = e.
	 * A' is the transpose of A.  The components of e are
	 * chosen to cause maximum local growth in the elements of
	 * w where U'w = e.  The vectors are frequently rescaled
	 * to avoid overflow
	 */

	/* solve U'w = e */
	ek = 1.0;
	for (j=0; j<n; j++)
		z[j] = 0.0;
	for (k=0; k<n; k++) {
		if (z[k]!=0.0) ek = (z[k]>0.0)?-ABS(ek):ABS(ek);
		if (ABS(ek-z[k])>ABS(a[k][k])) {
			s = ABS(a[k][k])/ABS(ek-z[k]);
			sscal(n,s,z,1);
			ek *= s;
		}
		wk = ek-z[k];
		wkm = -ek-z[k];
		s = ABS(wk);
		sm = ABS(wkm);
		if (a[k][k]==0.0) {
			wk = 1.0;
			wkm = 1.0;
		} else {
			wk = wk/a[k][k];
			wkm = wkm/a[k][k];
		}
		kp1 = k+1;
		if (kp1<n) {
			for (j=kp1; j<n; j++) {
				t = z[j]+wkm*a[j][k];
				sm += ABS(t);
				z[j] += wk*a[j][k];
				s += ABS(z[j]);
			}
			if (s<sm) {
				t = wkm-wk;
				wk = wkm;
				for (j=kp1; j<n; j++)
					z[j] += t*a[j][k];
			}
		}
		z[k] = wk;
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);

	/* solve L'y = w */
	for (k=n-1; k>=0; k--) {
		if (k<n-1) z[k] += sdot(n-k-1,&a[k][k+1],1,&z[k+1],1);
		if (ABS(z[k])>1.0) {
			s = 1.0/ABS(z[k]);
			sscal(n,s,z,1);
		}
		l = ipvt[k];
		t = z[l];
		z[l] = z[k];
		z[k] = t;
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);

	ynorm = 1.0;

	/* solve Lv = y */
	for (k=0; k<n; k++) {
		l = ipvt[k];
		t = z[l];
		z[l] = z[k];
		z[k] = t;
		if (k<n-1) saxpy(n-k-1,t,&a[k][k+1],1,&z[k+1],1);
		if (ABS(z[k])>1.0) {
			s = 1.0/ABS(z[k]);
			sscal(n,s,z,1);
			ynorm *= s;
		}
	}
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);
	ynorm *= s;

	/* solve Uz = v */
	for (k=n-1; k>=0; k--) {
		if (ABS(z[k])>ABS(a[k][k])) {
			s = ABS(a[k][k])/ABS(z[k]);
			sscal(n,s,z,1);
			ynorm *= s;
		}
		if (a[k][k]!=0.0) 
			z[k] /= a[k][k];
		else
			z[k] = 1.0;
		t = -z[k];
		saxpy(k,t,a[k],1,z,1);
	}

	/* make znorm = 1.0 */
	s = 1.0/sasum(n,z,1);
	sscal(n,s,z,1);
	ynorm *= s;

	if (anorm!=0.0) 
		*rcond = ynorm/anorm;
	else
		*rcond = 0.0;
}
Exemple #13
0
main()
{
	int i,n=N;

	printf("isamax = %d\n",isamax(n,sx,1));
	printf("isamax = %d\n",isamax(n/2,sx,2));
	printf("isamax = %d\n",isamax(n,sy,1));

	printf("sasum = %g\n",sasum(n,sx,1));
	printf("sasum = %g\n",sasum(n/2,sx,2));
	printf("sasum = %g\n",sasum(n,sy,1));

	printf("snrm2 = %g\n",snrm2(n,sx,1));
	printf("snrm2 = %g\n",snrm2(n/2,sx,2));
	printf("snrm2 = %g\n",snrm2(n,sy,1));

	printf("sdot = %g\n",sdot(n,sx,1,sy,1));
	printf("sdot = %g\n",sdot(n/2,sx,2,sy,2));
	printf("sdot = %g\n",sdot(n/2,sx,-2,sy,2));
	printf("sdot = %g\n",sdot(n,sy,1,sy,1));

	printf("sscal\n");
	sscal(n,2.0,sx,1);
	pvec(n,sx);
	sscal(n,0.5,sx,1);
	pvec(n,sx);
	sscal(n/2,2.0,sx,2);
	pvec(n,sx);
	sscal(n/2,0.5,sx,2);
	pvec(n,sx);

	printf("sswap\n");
	sswap(n,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n,sy,1,sx,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);

	printf("saxpy\n");
	saxpy(n,2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n,-2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);

	printf("scopy\n");
	scopy(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,2,sy+1,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx,2,sy,1);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,-2,sy+n/2,-1);
	pvec(n,sx); pvec(n,sy);
}
Exemple #14
0
void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )

/******************************************************************************/
/*
  Purpose:

    SGESL solves a real general linear system A * X = B.

  Discussion:

    SGESL can solve either of the systems A * X = B or A' * X = B.

    The system matrix must have been factored by SGECO or SGEFA.

    A division by zero will occur if the input factor contains a
    zero on the diagonal.  Technically this indicates singularity
    but it is often caused by improper arguments or improper
    setting of LDA.  It will not occur if the subroutines are
    called correctly and if SGECO has set 0.0 < RCOND
    or SGEFA has set INFO == 0.

  Modified:

    04 April 2006

  Author:

    FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
    C translation by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN: 0-89871-172-X

  Parameters:

    Input, float A[LDA*N], the output from SGECO or SGEFA.

    Input, int LDA, the leading dimension of A.

    Input, int N, the order of the matrix A.

    Input, int IPVT[N], the pivot vector from SGECO or SGEFA.

    Input/output, float B[N].
    On input, the right hand side vector.
    On output, the solution vector.

    Input, int JOB.
    0, solve A * X = B;
    nonzero, solve A' * X = B.
*/
{
  int k;
  int l;
  float t;
/*
  Solve A * X = B.
*/
  if ( job == 0 )
  {
    for ( k = 1; k <= n-1; k++ )
    {
      l = ipvt[k-1];
      t = b[l-1];

      if ( l != k )
      {
        b[l-1] = b[k-1];
        b[k-1] = t;
      }
      saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
    }

    for ( k = n; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
      t = -b[k-1];
      saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
    }
  }
/*
  Solve A' * X = B.
*/
  else
  {
    for ( k = 1; k <= n; k++ )
    {
      t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
      b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
    }

    for ( k = n-1; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
      l = ipvt[k-1];

      if ( l != k )
      {
        t = b[l-1];
        b[l-1] = b[k-1];
        b[k-1] = t;
      }
    }
  }
  return;
}
Exemple #15
0
/* Kabsch alignment */
void kabsch_alignment( std::vector<float> ref, std::vector<float> tar, t_tiltdata &data, gmx_bool bVerbose)
{
    if (ref.size() != tar.size())
    {
        std::cerr << "\nError! Sizes of reference coordinate matrix and simulated structure coordinate matrices do not match!" << std::endl;
        std::exit(1);
    }
    int ncoords = ref.size();
    int natoms = ncoords/3;
    // Center the two selections
    std::vector<float> stsel1(ncoords,0), stsel2(ncoords,0), stsel2T(ncoords,0);
    std::vector<float> ref_com(3,0), tar_com(3,0);
    average_coordinate(ref, ref_com);
    average_coordinate(tar, tar_com);
    for (int i=0; i<natoms; i++)
    {
        for (int j=0; j<3; j++)
        {
            stsel1[i+j*natoms] = ref[i+j*natoms] - ref_com[j];
            stsel2[i+j*natoms] = tar[i+j*natoms] - tar_com[j];
        }
    }
    // Initial residual
    float E0 = sdot(ncoords,&stsel1[0],1,&stsel1[0],1)+sdot(ncoords,&stsel2[0],1,&stsel2[0],1) ;
    // dot(target_transpose,reference)
    std::vector<float> T1_dot_2(3*natoms,0);
    sgemm('T','N',3,natoms,natoms,1,&stsel2[0],natoms,&stsel1[0],natoms,1,&T1_dot_2[0],3);
    // SVD of the dot product
    std::vector<float> U(9,0), S(3,0), V(9,0), work(5*9,0);
    int info;
    sgesvd('A','A',3,3,&T1_dot_2[0],3,&S[0],&U[0],3,&V[0],3,&work[0],9*5,info);
    /*std::cout << "\n S: ";
    for (int i=0;i<3;i++)
    {
        std::cout << S[i] << " ";
    }
    std::cout << "\n U: ";
    for (int i=0;i<9;i++)
    {
        std::cout << U[i] << " ";
    }*/
    float reflect = det3x3(&U[0]) * det3x3(&V[0]);
    if ( 1 - reflect > 1e-5)
    {
        S[2] = -S[2];
        U[6] = -U[6];
        U[7] = -U[7];
        U[8] = -U[8];
    }
    float rmsd = sqrt(fabs(
                           E0
                           - (2.0 *
                              (S[0]+S[1]+S[2])
                              )
                           )
                      /natoms);
    // Rotation matrix is dot(U,V)
    std::vector<float> M(9,0);
    sgemm('N','N',3,3,3,1,&U[0],3,&V[0],3,1,&M[0],3);
    /*
     M = [ 0 3 6 ] = [ 00 01 02 ]
         [ 1 4 7 ]   [ 10 11 12 ]
         [ 2 5 8 ]   [ 20 21 22 ]
     */
    float trace = M[0]+M[4]+M[8];
    float angle = acos((trace-1)/2)*RAD2DEG;
    float rx,ry,rz,ux,uy,uz;
    rx = atan2(M[5],M[8])*RAD2DEG;
    ry = atan2(-M[2],sqrt(M[5]*M[5]+M[8]*M[8]))*RAD2DEG;
    rz = atan2(M[1],M[0])*RAD2DEG;
    float zeta = sqrt(
                        (M[5]-M[7])*(M[5]-M[7])
                      + (M[6]-M[2])*(M[6]-M[2])
                      + (M[3]-M[1])*(M[3]-M[1])
                      );
    //std::cout << "\n" << M[5] << " - " << M[7] << " = " << M[5]-M[7];
    //std::cout << "\n" << M[6] << " - " << M[2] << " = " << M[6]-M[2];
    //std::cout << "\n" << M[3] << " - " << M[1] << " = " << M[3]-M[1] << std::endl;
    ux = (M[5]-M[7])/zeta;
    uy = (M[6]-M[2])/zeta;
    uz = (M[3]-M[1])/zeta;
    //std::cout << zeta << " { " << ux << " " << uy << " " << uz << " }" << sqrt(ux*ux+uy*uy+uz*uz) << std:: endl;
    if (bVerbose)
    {
        fprintf(stdout,"%12s%12s%12s%12s%12s%12s%12s%12s\n","Angle(deg)","rmsd(nm)","x(deg)","y(deg)","z(deg)","ux(nm)","uy(nm)","uz(nm)");
        fprintf(stdout,"%12.3f%12.6f%12.4f%12.4f%12.4f%12.4f%12.4f%12.4f\n",angle,rmsd,rx,ry,rz,ux,uy,uz);
    }
    data.rotation.push_back(angle);
    data.rmsd.push_back(rmsd);
    data.x_rotation.push_back(rx);
    data.y_rotation.push_back(ry);
    data.z_rotation.push_back(rz);
    data.x_rotation_axis.push_back(ux);
    data.y_rotation_axis.push_back(uy);
    data.z_rotation_axis.push_back(uz);

    return;
}