void Update_lbfgs (int n, /* I num unknowns */ int t, /* I num vectors to store */ real *s_vec, /* I x_new - x_old */ real *y_vec, /* I g_new - g_old */ real *Bs_vec, /* IO scratch space */ int *NUPDT, /* IO num updates made to B/H */ real *CMPS, /* IO storage for t s_vecs */ real *CMPY) /* IO storage for t y_vecs */ /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * This routine updates the limited memory BFGS approximations for * the Hessian B and its inverse H. The update uses s_vec and * y_vec, whose product should be negative to preserve the positive * definiteness of the BFGS approximations. If s'y is not a descent * direction, then y is "damped", an idea due to Powell. * Then y_vec becomes: * y_damped = alpha*y + (1-alpha)*Bs * * 0.8*s'Bs * where alpha = ---------- * s'Bs - s'y * * If 0 <= s'y <= 1.0E-8 y'y then the update is skipped to prevent * division by a small number. * * The L-BFGS structures are currently FORTRAN subroutines that * require the variables NUPDT, CMPS, and CMPY. These are used as * work space storage and should not be altered between FORTRAN calls. *********************************************************************/ { real s_dot_y, sBs, y_dot_y, alpha; /*-- Damp the estimate if necessary. */ s_dot_y = ddot (n, s_vec, 1, y_vec, 1); multbv_ (&n, &t, s_vec, Bs_vec, NUPDT, CMPS, CMPY); sBs = ddot (n, s_vec, 1, Bs_vec, 1); if (s_dot_y < (0.2 * sBs)) { fprintf (bfgs_fp, "--- damping L-BFGS update\n"); alpha = 0.8 * sBs / (sBs - s_dot_y); dscal (n, alpha, y_vec, 1); daxpy (n, (1.0 - alpha), Bs_vec, 1, y_vec, 1); s_dot_y = ddot (n, s_vec, 1, y_vec, 1); } /*-- Decide whether to skip the update. */ y_dot_y = ddot (n, y_vec, 1, y_vec, 1); if ((s_dot_y >= 0.0) && (s_dot_y <= (sqrt(MCHEPS) * y_dot_y))) { fprintf (bfgs_fp, "--- skipping L-BFGS update\n"); return; } /*-- Make the updates. */ updtbh_ (&n, &t, s_vec, y_vec, NUPDT, CMPS, CMPY); return; }
/* * Computes residuals. * * hrx = -A'*y - G'*z; rx = hrx - c.*tau; hresx = norm(rx,2); * hry = A*x; ry = hry - b.*tau; hresy = norm(ry,2); * hrz = s + G*x; rz = hrz - h.*tau; hresz = norm(rz,2); * rt = kappa + c'*x + b'*y + h'*z; */ void computeResiduals(pwork *w) { /* rx = -A'*y - G'*z - c.*tau */ if( w->p > 0 ) { sparseMtVm(w->A, w->y, w->rx, 1, 0); sparseMtVm(w->G, w->z, w->rx, 0, 0); } else { sparseMtVm(w->G, w->z, w->rx, 1, 0); } w->hresx = norm2(w->rx, w->n); vsubscale(w->n, w->tau, w->c, w->rx); /* ry = A*x - b.*tau */ if( w->p > 0 ){ sparseMV(w->A, w->x, w->ry, 1, 1); w->hresy = norm2(w->ry, w->p); vsubscale(w->p, w->tau, w->b, w->ry); } else { w->hresy = 0; w->ry = NULL; } /* rz = s + G*x - h.*tau */ sparseMV(w->G, w->x, w->rz, 1, 1); vadd(w->m, w->s, w->rz); w->hresz = norm2(w->rz, w->m); vsubscale(w->m, w->tau, w->h, w->rz); /* rt = kappa + c'*x + b'*y + h'*z; */ w->cx = ddot(w->n, w->c, w->x); w->by = w->p > 0 ? ddot(w->p, w->b, w->y) : 0.0; w->hz = ddot(w->m, w->h, w->z); w->rt = w->kap + w->cx + w->by + w->hz; }
double *cgsolve(int k) { int i, first_i, last_i; int n = k * k; int maxiters = 1000 > 5*k ? 1000 : k; // partition data if (n % size) { first_i = (n / size + 1) * rank; last_i = (rank != size-1 ? first_i+n/size+1 : n); } else { first_i = n / size * rank; last_i = n / size * (rank + 1); } double *b_vec = (double *)malloc(n * sizeof(double)); double *r_vec = (double *)malloc(n * sizeof(double)); double *d_vec = (double *)malloc(n * sizeof(double)); double *A_vec = (double *)malloc(n * sizeof(double)); double *x_vec = (double *)malloc(n * sizeof(double)); for (i=0; i<n; i++) { double tmp = cs240_getB(i, n); b_vec[i] = tmp; r_vec[i] = tmp; d_vec[i] = tmp; x_vec[i] = 0; } double normb = sqrt(ddot(b_vec+first_i, b_vec+first_i, last_i-first_i)); double rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i); double relres = 1; i = 0; while (relres > 1e-6 && i++ < maxiters) { /*while (i++ < 1) {*/ matvec(A_vec, d_vec, k); double alpha = rtr / ddot(d_vec+first_i, A_vec+first_i, last_i-first_i); daxpy(x_vec, d_vec, 1, alpha, n); daxpy(r_vec, A_vec, 1, -1*alpha, n); double rtrold = rtr; rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i); double beta = rtr / rtrold; daxpy(d_vec, r_vec, beta, 1, n); relres = sqrt(rtr) / normb; } return x_vec; }
void loglike (int N, int W, int D, int T, double alpha, double beta, int *w, int *d, int **Nwt, int **Ndt, int *Nt, int *Nd) // { int i, j, t; double llike; static int init = 0; static double **prob_w_given_t; static double **prob_t_given_d; static double *Nd_; double Nt_; if (init==0) { init = 1; prob_w_given_t = dmat(W,T); prob_t_given_d = dmat(D,T); Nd_ = dvec(D); for (j = 0; j < D; j++) Nd_[j] = Nd[j] + T*alpha; } for (t = 0; t < T; t++) { Nt_ = Nt[t] + W*beta; for (i = 0; i < W; i++) prob_w_given_t[i][t] = (Nwt[i][t]+beta) / Nt_; for (j = 0; j < D; j++) prob_t_given_d[j][t] = (Ndt[j][t]+alpha)/ Nd_[j]; } llike = 0; for (i = 0; i < N; i++) llike += log(ddot(T, prob_w_given_t[w[i]], prob_t_given_d[d[i]])); printf(">>> llike = %.6e ", llike); printf("pplex = %.4f\n", exp(-llike/N)); }
static void task_ddot_xy_work_blocking( void * arg , TPI_ThreadPool pool ) { int p_size , p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskXY * const t = (struct TaskXY *) arg ; const unsigned block_size = t->block ; const unsigned block_start = block_size * p_rank ; const unsigned block_stride = block_size * p_size ; const double * const x_end = t->x_beg + t->number ; const double * x = t->x_beg + block_start ; const double * y = t->x_beg + block_start ; double local = 0.0 ; for ( ; x < x_end ; x += block_stride , y += block_stride ) { const unsigned n = x_end - x ; local += ddot( ( block_size < n ? block_size : n ) , x , y ); } t->xy_sum[ p_rank ] = local ; } }
static int DvechmatDot(void* AA, double x[], int nn, int n, double *v){ dvechmat* A=(dvechmat*)AA; ffinteger ione=1,nnn=nn; double dd,*val=A->AA->val; dd=ddot(&nnn,val,&ione,x,&ione); *v=2*dd*A->alpha; return 0; }
double KNITRO_EXPORT KTR_ddot (const int n, const double * const x, const int incx, const double * const y, const int incy) { return( ddot (n, x, incx, y, incy) ); }
void dgemv (double * A, double * x, double * y, int nrows, int ncols) { // Calcula o produto de uma matriz por um vetor while (nrows -- > 0) { * y ++ = ddot(A, x, ncols); A += ncols; } return ; }
static int DvechmatDot(void* AA, double x[], int nn, int n, double *v){ //printf("File %s line %d DvechmatDot with address %d\n",__FILE__, __LINE__,&DvechmatDot); dvechmat* A=(dvechmat*)AA; ffinteger ione=1,nnn=nn; double dd,*val=A->AA->val; dd=ddot(&nnn,val,&ione,x,&ione); *v=2*dd*A->alpha; return 0; }
double dangle( LWDVector a, LWDVector b ) { LWDVector na, nb; dcopyv( na, a ); dnormalize( na ); dcopyv( nb, b ); dnormalize( nb ); return acos( ddot( na, nb )); }
double inline cblas_ddot( const int n, const double* x, const int incx, const double* y, const int incy) { return ddot(n, x, incx, y, incy); }
/* ************************************************************ TIME-CRITICAL PROCEDURE -- r=realssqr(x,n) Computes r=sum(x_i^2) using BLAS. ************************************************************ */ double realssqr(const double *x, const mwIndex n) { mwIndex one=1; #ifdef PC return ddot(&n,x,&one,x,&one); #endif #ifdef UNIX return ddot_(&n,x,&one,x,&one); #endif }
double innerproduct(const Vector x, const Vector y) { double res=ddot(&x->len, x->data, &x->stride, y->data, &y->stride); #ifdef HAVE_MPI if (x->comm_size > 1) { double r2=res; MPI_Allreduce(&r2, &res, 1, MPI_DOUBLE, MPI_SUM, *x->comm); } #endif return res; }
double dotproduct(Vector u, Vector v) { double locres; locres = ddot(&u->len, u->data, &u->stride, v->data, &v->stride); return locres; #ifdef HAVE_MPI if (u->comm_size > 1) { MPI_Allreduce(&locres, &res, 1, MPI_DOUBLE, MPI_SUM, *u->comm); return res; } #endif }
/* compute a part of X*Y */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { double *Xt, *Y, *Z, *I, *J, *v, LL; ptrdiff_t m, n, r, L, p, ir, jr, k; ptrdiff_t inc = 1; if (nargin != 5 || nargout > 1) mexErrMsgTxt ("Usage: v = partXY (Xt, Y, I, J, L)") ; /* ---------------------------------------------------------------- */ /* inputs */ /* ---------------------------------------------------------------- */ Xt = mxGetPr( pargin [0] ); Y = mxGetPr( pargin [1] ); I = mxGetPr( pargin [2] ); J = mxGetPr( pargin [3] ); LL = mxGetScalar( pargin [4] ); L = (ptrdiff_t) LL; m = mxGetN( pargin [0] ); n = mxGetN( pargin [1] ); r = mxGetM( pargin [0] ); if ( r != mxGetM( pargin [1] )) mexErrMsgTxt ("rows of Xt must be equal to rows of Y") ; if ( r > m || r > n ) mexErrMsgTxt ("rank must be r <= min(m,n)") ; /* ---------------------------------------------------------------- */ /* output */ /* ---------------------------------------------------------------- */ pargout [0] = mxCreateDoubleMatrix(1, L, mxREAL); v = mxGetPr( pargout [0] ); /* C array indices start from 0 */ for (p = 0; p < L; p++) { ir = ( I[p] - 1 ) * r; jr = ( J[p] - 1 ) * r; /* v[p] = 0.0; for (k = 0; k < r; k++) v[p] += Xt[ ir + k ] * Y[ jr + k ];*/ v[p] = ddot(&r, Xt+ir, &inc, Y+jr, &inc); } return; }
/* compute a part of X*Y */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { if (nargin != 5 || nargout > 1) mexErrMsgTxt ("Usage: v = partXY (Xt, Y, I, J, L)") ; /* ---------------------------------------------------------------- */ /* inputs */ /* ---------------------------------------------------------------- */ double *Xt = mxGetPr( pargin [0] ); double *Y = mxGetPr( pargin [1] ); double *I = mxGetPr( pargin [2] ); double *J = mxGetPr( pargin [3] ); double LL = mxGetScalar( pargin [4] ); ptrdiff_t L = (ptrdiff_t) LL; ptrdiff_t m = mxGetN( pargin [0] ); ptrdiff_t n = mxGetN( pargin [1] ); ptrdiff_t r = mxGetM( pargin [0] ); if ( r != mxGetM( pargin [1] )) mexErrMsgTxt ("rows of Xt must be equal to rows of Y") ; if ( r > m || r > n ) mexErrMsgTxt ("rank must be r <= min(m,n)") ; /* ---------------------------------------------------------------- */ /* output */ /* ---------------------------------------------------------------- */ pargout [0] = mxCreateDoubleMatrix(1, L, mxREAL); double *v = mxGetPr( pargout [0] ); ptrdiff_t inc = 1; /* C array indices start from 0 */ for (ptrdiff_t p = 0; p < L; p++) { ptrdiff_t ir = ( I[p] - 1 ) * r; ptrdiff_t jr = ( J[p] - 1 ) * r; v[p] = ddot(&r, Xt+ir, &inc, Y+jr, &inc); } return; }
template <class V1, class V2> double dot_impl( const V1 &v1, const V2 &v2) { assert(v1.size() == v2.size()); if(v1.stride() > 0 && v2.stride() > 0){ return ddot(v1.size(), v1.data(), v1.stride(), v2.data(), v2.stride()); }else{ double ans = 0; for(int i = 0; i < v1.size(); ++i){ ans += v1[i] * v2[i]; } return ans; } }
int main( int argc, char* argv[] ) { int p, rank, slice, size = 8; double r; double* v; double* w; double a, b; MPI_Init( &argc, &argv ); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &rank); slice = size/p; v = (double *)malloc( (slice) * sizeof(double) ); w = (double *)malloc( (slice) * sizeof(double) ); // Initialize v & w int i; for (i = 0; i < slice; i++) { v[i] = 1.0; w[i] = 2.0; } // Initialize alpha & beta a = 3, b = 2; r = ddot(v,w,slice); daxpy(v,w,slice,a,b); // if(rank==0) { printf( "ddot result:\nr = %3.2f\n\n", r); printf("daxpy ( v = %3.2f*v + %3.2f*w ) result:\n", a, b); } for (i = 0; i < slice; i++) printf("%3.2f\n", v[i]); free(v); free(w); MPI_Finalize(); return 0; }
NLuint nlSolve_CG_precond() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ; NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *Ad = h; NLuint its=0; NLdouble rh, alpha, beta; NLdouble b_square = ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLint i; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble accu =0.0; NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,r); daxpy(N,-1.,b,1,r,1); nlCurrentContext->precond_vector_prod(r,d); dcopy(N,d,1,h,1); rh=ddot(N,r,1,h,1); curr_err = ddot(N,r,1,r,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(d,Ad); alpha=rh/ddot(N,d,1,Ad,1); daxpy(N,-alpha,d,1,x,1); daxpy(N,-alpha,Ad,1,r,1); nlCurrentContext->precond_vector_prod(r,h); beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh; dscal(N,beta,d,1); daxpy(N,1.,h,1,d,1); ++its; // calcul de l'erreur courante curr_err = ddot(N,r,1,r,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(d) ; NL_DELETE_ARRAY(h) ; return its; }
/*--------------------------------------------------------- fem_sc_prod - to compute a scalar product of two global vectors ---------------------------------------------------------*/ double fem_sc_prod( /* retruns: scalar product of Vector1 and Vector2 */ int Solver_id, /* in: solver data structure to be used */ int Level_id, /* in: level number */ int Nrdof, /* in: number of vector components */ double* Vector1, /* in: local part of global Vector */ double* Vector2 /* in: local part of global Vector */ ) { const int IONE=1; /*++++++++++++++++ executable statements ++++++++++++++++*/ //return(ddr_sol_sc_prod(Solver_id, Level_id, Nrdof, Vector1, Vector2)); return(ddot(&Nrdof, Vector1, &IONE, Vector2, &IONE)); }
INT NS_DIM_PREFIX dematmul (MULTIGRID *mg, INT fl, INT tl, INT mode, EVECDATA_DESC *x, const EMATDATA_DESC *M, const EVECDATA_DESC *y) { INT i,j,n,ret,level; DOUBLE a; if (x->n!=M->n || M->n!=y->n) return NUM_ERROR; n=M->n; ret=dmatmul(mg,fl,tl,mode,x->vd,M->mm,y->vd); if (ret!=NUM_OK) return ret; for (i=0; i<n; i++) { ret=daxpy(mg,fl,tl,mode,x->vd,EVDD_E(y,tl,i),M->me[i]); if (ret!=NUM_OK) return ret; ret=ddot(mg,fl,tl,mode,y->vd,M->em[i],&a); if (ret!=NUM_OK) return ret;EVDD_E(x,tl,i)=a; for (level=fl; level<=tl; level++) for (j=0; j<n; j++) EVDD_E(x,tl,i)+=EMDD_EE(M,level,i*n+j)*EVDD_E(y,tl,j); } return NUM_OK; }
NLuint nlSolve_CG() { NLdouble* b = nlCurrentContext->b ; NLdouble* x = nlCurrentContext->x ; NLdouble eps = nlCurrentContext->threshold ; NLuint max_iter = nlCurrentContext->max_iterations ; NLint N = nlCurrentContext->n ; NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ; NLuint its=0; NLint i; NLdouble t, tau, sig, rho, gam; NLdouble b_square=ddot(N,b,1,b,1); NLdouble err=eps*eps*b_square; NLdouble accu =0.0; NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n); NLdouble curr_err; nlCurrentContext->matrix_vector_prod(x,g); daxpy(N,-1.,b,1,g,1); dscal(N,-1.,g,1); dcopy(N,g,1,r,1); curr_err = ddot(N,g,1,g,1); while ( curr_err >err && its < max_iter) { if(!(its % 100)) { printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ; } nlCurrentContext->matrix_vector_prod(r,p); rho=ddot(N,p,1,p,1); sig=ddot(N,r,1,p,1); tau=ddot(N,g,1,r,1); t=tau/sig; daxpy(N,t,r,1,x,1); daxpy(N,-t,p,1,g,1); gam=(t*t*rho-tau)/tau; dscal(N,gam,r,1); daxpy(N,1.,g,1,r,1); ++its; curr_err = ddot(N,g,1,g,1); } nlCurrentContext->matrix_vector_prod(x,Ax); for(i = 0 ; i < N ; ++i) accu+=(Ax[i]-b[i])*(Ax[i]-b[i]); printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square)); NL_DELETE_ARRAY(Ax); NL_DELETE_ARRAY(g) ; NL_DELETE_ARRAY(r) ; NL_DELETE_ARRAY(p) ; return its; }
/* This function compute a projection of a vector b onto the null space of a matrix using orthogonal matrices computed from an svd of an original matrix using the sunperf routine dgesvd. Arguments: U - matrix of singular vectors used to compute the projector. The projector is computed as I - UU^T Note: vectors are assumed stored in columns ala FORTRAN m - number of rows in U n - number of columns of U to use in forming the projector. (the output is a copy of the input if n>= m) b - vector to be projected (length m) bp - vector to hold projection (length m) Normal return is 0. Nonzero return indicates and error has been posted with elog_log. Author: Gary Pavlis */ int null_project(double *U,int m, int n, double *b, double *bp) { int i; double val; dcopy(m,b,1,bp,1); if(n>=m) { elog_log(0,"null_project passed illegal U matrix of size %d by %d\nProjection request discarded\n", m,n); return(1); } for(i=0;i<n;i++) { val = ddot(m,(U+i*m),1,b,1); daxpy(m,-val,(U+i*m),1,bp,1); } return(0); }
// Compute the expected topic factor prediction by integrating over topics double integrateFactorVectors(double *cVec, double *dVec, double *logThetaPtrU, double *logThetaPtrM, int numTopicFacs, int numTopicFacsTimesNumUsers, int numTopicFacsTimesNumItems, int KU, int KM) { int blasStride = 1; double y = 0; for (int ku = 0; ku < KU; ku++) { for (int km = 0; km < KM; km++) { double pu = exp(logThetaPtrU[ku]); double pm = exp(logThetaPtrM[km]); y += pu*pm*ddot(&numTopicFacs, cVec + numTopicFacsTimesNumUsers*km, &blasStride, dVec + numTopicFacsTimesNumItems*ku, &blasStride); } } return y; }
static void task_ddot_xy_work( void * arg , TPI_ThreadPool pool ) { int p_size , p_rank ; if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) { struct TaskXY * const t = (struct TaskXY *) arg ; const unsigned n_total = t->number ; const unsigned n_begin = ( n_total * ( p_rank ) ) / p_size ; const unsigned n_end = ( n_total * ( p_rank + 1 ) ) / p_size ; const unsigned n_local = ( n_end - n_begin ); const double * x = t->x_beg + n_begin ; const double * y = t->y_beg + n_begin ; t->xy_sum[ p_rank ] = ddot( n_local , x , y ); } }
void globaldotproduct( long long *localn, // length of local vector segments double *localx, // local segment of vector x double *localy, // local segment of vector y double *dotval, // the value of the dotproduct of the full global x and y long long *localrank, // rank of local process, in range 0 to nprocs-1 long long *nprocs, // total number of processes in dotcomm MPI_Comm *dotcomm // pointer to mpi communicator ) { long long incx = 1ll; long long incy = 1ll; long long gather_iteration = 0ll; double localsum; if( ((int)*localn) != 0) { localsum = ddot(localn, localx, &incx, localy, &incy); } MPI_Allreduce(&localsum, dotval, 1, MPI_DOUBLE, MPI_SUM, *dotcomm); }
int pseudo_inv_solver(double *U, double *Vt, double *s, int m, int n, double *b, double maxcon, double *x) { double sv_cutoff; double *work; int nsvused; int i, j; work = (double *)calloc(n,sizeof(double)); if(work == NULL) elog_die(0,"pseudoinverse solver cannot alloc array of %d doubles\n", n); /*dgesvd returns singular values in descending order so finding the largest is trivial. We use this to establish the sv cutuff */ sv_cutoff = s[0]/maxcon; /* multiply by S-1 * UT */ nsvused = 0; for(j=0;j<n;++j) { if(s[j]<sv_cutoff) break; work[j] = ddot(m,(U+j*m),1,b,1); work[j] /= s[j]; ++nsvused; } for(i=0;i<n;++i) x[i]=0.0; /* This is the right form because of Vt */ for(j=0;j<nsvused;++j) { daxpy(n,work[j],(Vt+j),n,x,1); } free(work); return(nsvused); }
/* * Updates statistics. */ void updateStatistics(pwork* w) { pfloat nry, nrz; stats* info = w->info; /* mu = (s'*z + kap*tau) / (D+1) where s'*z is the duality gap */ info->gap = ddot(w->m, w->s, w->z); info->mu = (info->gap + w->kap*w->tau) / (w->D + 1); info->kapovert = w->kap / w->tau; info->pcost = w->cx / w->tau; info->dcost = -(w->hz + w->by) / w->tau; /* relative duality gap */ if( info->pcost < 0 ){ info->relgap = info->gap / (-info->pcost); } else if( info->dcost > 0 ){ info->relgap = info->gap / info->dcost; } else info->relgap = NAN; /* residuals */ nry = w->p > 0 ? norm2(w->ry, w->p)/w->resy0 : 0.0; nrz = norm2(w->rz, w->m)/w->resz0; info->pres = MAX(nry, nrz) / w->tau; info->dres = norm2(w->rx, w->n)/w->resx0 / w->tau; /* infeasibility measures * * CVXOPT uses the following: * info->pinfres = w->hz + w->by < 0 ? w->hresx / w->resx0 / (-w->hz - w->by) : NAN; * info->dinfres = w->cx < 0 ? MAX(w->hresy/w->resy0, w->hresz/w->resz0) / (-w->cx) : NAN; */ info->pinfres = w->hz + w->by < 0 ? w->hresx/w->resx0 : NAN; info->dinfres = w->cx < 0 ? MAX(w->hresy/w->resy0, w->hresz/w->resz0) : NAN; #if PRINTLEVEL > 2 PRINTTEXT("TAU=%6.4e KAP=%6.4e PINFRES=%6.4e DINFRES=%6.4e\n",w->tau,w->kap,info->pinfres, info->dinfres ); #endif }
void dgemm (double * A, double * B, double * C, int nrowsA, int nrowsB, int ncolsC) { // Calcula o produto de duas matrizes double * ptB, * tB = (double *) malloc(nrowsB * ncolsC * sizeof(double)); if (tB == NULL) { printf("Falha ao alocar a memória de trabalho necessária"); exit(1); } ptB = tB; for (int i = 0 ; i < nrowsB ; ++ i) { for (int j = 0 ; j < ncolsC ; ++ j) { * ptB ++ = * (B + j * ncolsC + i); } } while (nrowsA -- > 0) { ptB = tB; for (int i = 0; i < nrowsB; ++ i) { * C ++ = ddot(A, ptB, ncolsC); ptB += ncolsC; } A += nrowsB; } }
END_TEST START_TEST( using_MKL_DOT ) { PetscErrorCode ierr; int n = 5, incx = 1, incy = 1; double *x, *y, res; PetscMalloc(n*sizeof(double), &x); PetscMalloc(n*sizeof(double), &y); for( int i = 0; i < n; i++) { x[i] = i+3; y[i] = i+6; } res = ddot( &n, x, &incx, y, &incy); fail_unless( res == 210. , "res is %f", res); PetscFree(x); PetscFree(y); }