Exemple #1
0
void Update_lbfgs
   (int   n,               /* I  num unknowns              */
    int   t,               /* I  num vectors to store      */
    real  *s_vec,          /* I  x_new - x_old             */
    real  *y_vec,          /* I  g_new - g_old             */
    real  *Bs_vec,         /* IO scratch space             */
    int   *NUPDT,          /* IO num updates made to B/H   */
    real  *CMPS,           /* IO storage for t s_vecs      */
    real  *CMPY)           /* IO storage for t y_vecs      */
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*   This routine updates the limited memory BFGS approximations for
*   the Hessian B and its inverse H.  The update uses s_vec and
*   y_vec, whose product should be negative to preserve the positive
*   definiteness of the BFGS approximations.  If s'y is not a descent
*   direction, then y is "damped", an idea due to Powell.
*   Then y_vec becomes:
*        y_damped = alpha*y + (1-alpha)*Bs
*
*                                      0.8*s'Bs
*                       where alpha = ----------
*                                     s'Bs - s'y
*
*   If 0 <= s'y <= 1.0E-8 y'y then the update is skipped to prevent
*   division by a small number.
*
*   The L-BFGS structures are currently FORTRAN subroutines that
*   require the variables NUPDT, CMPS, and CMPY.  These are used as
*   work space storage and should not be altered between FORTRAN calls.
*********************************************************************/
{
  real  s_dot_y, sBs, y_dot_y, alpha;


/*-- Damp the estimate if necessary. */

  s_dot_y = ddot (n, s_vec, 1, y_vec, 1);
  multbv_ (&n, &t, s_vec, Bs_vec, NUPDT, CMPS, CMPY);
  sBs = ddot (n, s_vec, 1, Bs_vec, 1);

  if (s_dot_y < (0.2 * sBs)) {
    fprintf (bfgs_fp, "--- damping L-BFGS update\n");
    alpha = 0.8 * sBs / (sBs - s_dot_y);
    dscal (n, alpha, y_vec, 1);
    daxpy (n, (1.0 - alpha), Bs_vec, 1, y_vec, 1);
    s_dot_y = ddot (n, s_vec, 1, y_vec, 1);
  }

/*-- Decide whether to skip the update. */

  y_dot_y = ddot (n, y_vec, 1, y_vec, 1);
  if ((s_dot_y >= 0.0) && (s_dot_y <= (sqrt(MCHEPS) * y_dot_y))) {
    fprintf (bfgs_fp, "--- skipping L-BFGS update\n");
    return;
  }

/*-- Make the updates. */
  updtbh_ (&n, &t, s_vec, y_vec, NUPDT, CMPS, CMPY);

  return;
}
Exemple #2
0
/*
 * Computes residuals.
 *
 * hrx = -A'*y - G'*z;  rx = hrx - c.*tau;  hresx = norm(rx,2);
 * hry = A*x;           ry = hry - b.*tau;  hresy = norm(ry,2);
 * hrz = s + G*x;       rz = hrz - h.*tau;  hresz = norm(rz,2);
 * rt = kappa + c'*x + b'*y + h'*z;
 */ 
void computeResiduals(pwork *w)
{
	/* rx = -A'*y - G'*z - c.*tau */
	if( w->p > 0 ) {
        sparseMtVm(w->A, w->y, w->rx, 1, 0);
        sparseMtVm(w->G, w->z, w->rx, 0, 0);
    } else {
        sparseMtVm(w->G, w->z, w->rx, 1, 0);
    }
	w->hresx = norm2(w->rx, w->n);
	vsubscale(w->n, w->tau, w->c, w->rx);
		
	/* ry = A*x - b.*tau */
	if( w->p > 0 ){
        sparseMV(w->A, w->x, w->ry, 1, 1);
        w->hresy = norm2(w->ry, w->p);
        vsubscale(w->p, w->tau, w->b, w->ry);
    } else {
        w->hresy = 0;
        w->ry = NULL;
	}
    
	/* rz = s + G*x - h.*tau */
	sparseMV(w->G, w->x, w->rz, 1, 1);
	vadd(w->m, w->s, w->rz);
	w->hresz = norm2(w->rz, w->m);
	vsubscale(w->m, w->tau, w->h, w->rz);

	/* rt = kappa + c'*x + b'*y + h'*z; */
	w->cx = ddot(w->n, w->c, w->x);
	w->by = w->p > 0 ? ddot(w->p, w->b, w->y) : 0.0;
	w->hz = ddot(w->m, w->h, w->z);
	w->rt = w->kap + w->cx + w->by + w->hz;    
}
Exemple #3
0
double *cgsolve(int k)
{
	int i, first_i, last_i;
	int n = k * k;
	int maxiters = 1000 > 5*k ? 1000 : k;

	// partition data
	if (n % size) {
		first_i = (n / size + 1) * rank;
		last_i = (rank != size-1 ? first_i+n/size+1 : n);
	} else {
		first_i = n / size * rank;
		last_i = n / size * (rank + 1);
	}

	double *b_vec = (double *)malloc(n * sizeof(double));
	double *r_vec = (double *)malloc(n * sizeof(double));
	double *d_vec = (double *)malloc(n * sizeof(double));
	double *A_vec = (double *)malloc(n * sizeof(double));
	double *x_vec = (double *)malloc(n * sizeof(double));

	for (i=0; i<n; i++) {
		double tmp = cs240_getB(i, n);
		b_vec[i] = tmp;
		r_vec[i] = tmp;
		d_vec[i] = tmp;
		x_vec[i] = 0;
	}


	double normb = sqrt(ddot(b_vec+first_i, b_vec+first_i, last_i-first_i));
	double rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i);
	double relres = 1;

	i = 0;
	while (relres > 1e-6 && i++ < maxiters) {
	/*while (i++ < 1) {*/

		matvec(A_vec, d_vec, k);
		double alpha = rtr / ddot(d_vec+first_i, A_vec+first_i, last_i-first_i);
		daxpy(x_vec, d_vec, 1, alpha, n);
		daxpy(r_vec, A_vec, 1, -1*alpha, n);
		double rtrold = rtr;
		rtr = ddot(r_vec+first_i, r_vec+first_i, last_i-first_i);
		double beta = rtr / rtrold;
		daxpy(d_vec, r_vec, beta, 1, n);
		relres = sqrt(rtr) / normb;
	}
	return x_vec;
}
Exemple #4
0
void loglike (int N, int W, int D, int T, double alpha, double beta, int *w, int *d, int **Nwt, int **Ndt, int *Nt, int *Nd) //
{
	int    i, j, t;
	double llike;
	static int init = 0;
	static double **prob_w_given_t;
	static double **prob_t_given_d;
	static double *Nd_;
	double Nt_;

	if (init==0) {
		init = 1;
		prob_w_given_t = dmat(W,T);
		prob_t_given_d = dmat(D,T);
		Nd_ = dvec(D);
		for (j = 0; j < D; j++) Nd_[j] = Nd[j] + T*alpha;
	}

	for (t = 0; t < T; t++) {
		Nt_ = Nt[t] + W*beta;
		for (i = 0; i < W; i++) prob_w_given_t[i][t] = (Nwt[i][t]+beta) / Nt_;
		for (j = 0; j < D; j++) prob_t_given_d[j][t] = (Ndt[j][t]+alpha)/ Nd_[j];
	}

	llike = 0;
	for (i = 0; i < N; i++)
		llike += log(ddot(T, prob_w_given_t[w[i]], prob_t_given_d[d[i]]));

	printf(">>> llike = %.6e    ", llike);
	printf("pplex = %.4f\n", exp(-llike/N));
}
static void task_ddot_xy_work_blocking( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned block_size   = t->block ;
        const unsigned block_start  = block_size * p_rank ;
        const unsigned block_stride = block_size * p_size ;

        const double * const x_end = t->x_beg + t->number ;
        const double * x = t->x_beg + block_start ;
        const double * y = t->x_beg + block_start ;

        double local = 0.0 ;

        for ( ; x < x_end ; x += block_stride , y += block_stride ) {
            const unsigned n = x_end - x ;
            local += ddot( ( block_size < n ? block_size : n ) , x , y );
        }

        t->xy_sum[ p_rank ] = local ;
    }
}
Exemple #6
0
static int DvechmatDot(void* AA, double x[], int nn, int n, double *v){
  dvechmat* A=(dvechmat*)AA;
  ffinteger ione=1,nnn=nn;
  double dd,*val=A->AA->val;
  dd=ddot(&nnn,val,&ione,x,&ione);
  *v=2*dd*A->alpha;
  return 0;
}
Exemple #7
0
double  KNITRO_EXPORT  KTR_ddot  (const int             n,
                                  const double * const  x,
                                  const int             incx,
                                  const double * const  y,
                                  const int             incy)
{
    return( ddot (n, x, incx, y, incy) );
}
Exemple #8
0
void dgemv (double * A, double * x, double * y, int nrows, int ncols) {
// Calcula o produto de uma matriz por um vetor
	while (nrows -- > 0) {
		* y ++ = ddot(A, x, ncols);
		A += ncols;
		}
	return ;
	}
Exemple #9
0
static int DvechmatDot(void* AA, double x[], int nn, int n, double *v){
  //printf("File %s line %d DvechmatDot with address %d\n",__FILE__, __LINE__,&DvechmatDot);
  dvechmat* A=(dvechmat*)AA;
  ffinteger ione=1,nnn=nn;
  double dd,*val=A->AA->val;
  dd=ddot(&nnn,val,&ione,x,&ione);
  *v=2*dd*A->alpha;
  return 0;
}
Exemple #10
0
double dangle( LWDVector a, LWDVector b )
{
   LWDVector na, nb;

   dcopyv( na, a );
   dnormalize( na );
   dcopyv( nb, b );
   dnormalize( nb );
   return acos( ddot( na, nb ));
}
double inline
cblas_ddot(
  const int     n,
  const double* x,
  const int     incx,
  const double* y,
  const int     incy)
{
  return ddot(n, x, incx, y, incy);
}
Exemple #12
0
/* ************************************************************
   TIME-CRITICAL PROCEDURE -- r=realssqr(x,n)
   Computes r=sum(x_i^2) using BLAS.
   ************************************************************ */
double realssqr(const double *x, const mwIndex n)
{
    mwIndex one=1;
    #ifdef PC
    return ddot(&n,x,&one,x,&one);
    #endif

    #ifdef UNIX
    return ddot_(&n,x,&one,x,&one);
    #endif    
}
Exemple #13
0
double innerproduct(const Vector x, const Vector y)
{
  double res=ddot(&x->len, x->data, &x->stride, y->data, &y->stride);
#ifdef HAVE_MPI
  if (x->comm_size > 1) {
    double r2=res;
    MPI_Allreduce(&r2, &res, 1, MPI_DOUBLE, MPI_SUM, *x->comm);
  }
#endif

  return res;
}
Exemple #14
0
double dotproduct(Vector u, Vector v)
{
  double locres;
  locres = ddot(&u->len, u->data, &u->stride, v->data, &v->stride);
  return locres;

#ifdef HAVE_MPI
  if (u->comm_size > 1) {
    MPI_Allreduce(&locres, &res, 1, MPI_DOUBLE, MPI_SUM, *u->comm);
    return res;
  }
#endif
}
Exemple #15
0
/* compute a part of X*Y */
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin  [ ]
)
{
    double *Xt, *Y, *Z, *I, *J, *v, LL;
    ptrdiff_t m, n, r, L, p, ir, jr, k;
    ptrdiff_t inc = 1;

    if (nargin != 5 || nargout > 1)
        mexErrMsgTxt ("Usage: v = partXY (Xt, Y, I, J, L)") ;

    /* ---------------------------------------------------------------- */
    /* inputs */
    /* ---------------------------------------------------------------- */
    
    Xt = mxGetPr( pargin [0] );
    Y  = mxGetPr( pargin [1] );
    I  = mxGetPr( pargin [2] );
    J  = mxGetPr( pargin [3] );
    LL = mxGetScalar( pargin [4] ); L = (ptrdiff_t) LL;
    m  = mxGetN( pargin [0] );
    n  = mxGetN( pargin [1] );
    r  = mxGetM( pargin [0] ); 
    if ( r != mxGetM( pargin [1] ))
        mexErrMsgTxt ("rows of Xt must be equal to rows of Y") ;
    if ( r > m || r > n )
        mexErrMsgTxt ("rank must be r <= min(m,n)") ;
    
    /* ---------------------------------------------------------------- */
    /* output */
    /* ---------------------------------------------------------------- */

    pargout [0] = mxCreateDoubleMatrix(1, L, mxREAL);
    v = mxGetPr( pargout [0] );
    
    /* C array indices start from 0 */
    for (p = 0; p < L; p++) {
        ir = ( I[p] - 1 ) * r;
        jr = ( J[p] - 1 ) * r;
        /* v[p] = 0.0;
        for (k = 0; k < r; k++)
            v[p] += Xt[ ir + k ] * Y[ jr + k ];*/
        v[p] = ddot(&r, Xt+ir, &inc, Y+jr, &inc);
    }
    
    return;
}
Exemple #16
0
/* compute a part of X*Y */
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin  [ ]
)
{
    if (nargin != 5 || nargout > 1)
        mexErrMsgTxt ("Usage: v = partXY (Xt, Y, I, J, L)") ;

    /* ---------------------------------------------------------------- */
    /* inputs */
    /* ---------------------------------------------------------------- */
    
    double *Xt = mxGetPr( pargin [0] );
    double *Y  = mxGetPr( pargin [1] );
    double *I  = mxGetPr( pargin [2] );
    double *J  = mxGetPr( pargin [3] );
    double  LL = mxGetScalar( pargin [4] ); 
    
    ptrdiff_t L = (ptrdiff_t) LL;
    ptrdiff_t m = mxGetN( pargin [0] );
    ptrdiff_t n = mxGetN( pargin [1] );
    ptrdiff_t r = mxGetM( pargin [0] ); 
    
    if ( r != mxGetM( pargin [1] ))
        mexErrMsgTxt ("rows of Xt must be equal to rows of Y") ;
    if ( r > m || r > n )
        mexErrMsgTxt ("rank must be r <= min(m,n)") ;
    
    /* ---------------------------------------------------------------- */
    /* output */
    /* ---------------------------------------------------------------- */

    pargout [0] = mxCreateDoubleMatrix(1, L, mxREAL);
    double *v = mxGetPr( pargout [0] );
    ptrdiff_t inc = 1;
    
    /* C array indices start from 0 */
    for (ptrdiff_t p = 0; p < L; p++) 
    {
        ptrdiff_t ir = ( I[p] - 1 ) * r;
        ptrdiff_t jr = ( J[p] - 1 ) * r;
        v[p] = ddot(&r, Xt+ir, &inc, Y+jr, &inc);
    }
    
    return;
}
Exemple #17
0
 template <class V1, class V2> double dot_impl(
     const V1 &v1, const V2 &v2) {
   assert(v1.size() == v2.size());
   if(v1.stride() > 0 && v2.stride() > 0){
     return ddot(v1.size(),
                 v1.data(), v1.stride(),
                 v2.data(), v2.stride());
   }else{
     double ans = 0;
     for(int i = 0; i < v1.size(); ++i){
       ans += v1[i] * v2[i];
     }
     return ans;
   }
 }
int main( int argc, char* argv[] )
{
    int p, rank, slice, size = 8;
    double r;
    double* v;
    double* w;
    double a, b;

    MPI_Init( &argc, &argv );
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    slice = size/p;

    v = (double *)malloc( (slice) * sizeof(double) );
    w = (double *)malloc( (slice) * sizeof(double) );


    // Initialize v & w
    int i;
    for (i = 0; i < slice; i++)
    {
        v[i] = 1.0;
        w[i] = 2.0;
    }

    // Initialize alpha & beta
    a = 3, b = 2;

    r = ddot(v,w,slice);
    daxpy(v,w,slice,a,b);

    //
    if(rank==0)
    {
        printf( "ddot result:\nr = %3.2f\n\n", r);
        printf("daxpy ( v = %3.2f*v + %3.2f*w ) result:\n", a, b);
    }

    for (i = 0; i < slice; i++)
        printf("%3.2f\n", v[i]);

    free(v);
    free(w);

    MPI_Finalize();

    return 0;
}
NLuint nlSolve_CG_precond()  {
    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLuint    max_iter = nlCurrentContext->max_iterations ;
    NLint     N        = nlCurrentContext->n ;

    NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *Ad = h;
    NLuint its=0;
    NLdouble rh, alpha, beta;
    NLdouble b_square = ddot(N,b,1,b,1);
    NLdouble err=eps*eps*b_square;
    NLint i;
    NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble accu =0.0;
    NLdouble curr_err;
    

    nlCurrentContext->matrix_vector_prod(x,r);
    daxpy(N,-1.,b,1,r,1);
    nlCurrentContext->precond_vector_prod(r,d);
    dcopy(N,d,1,h,1);
    rh=ddot(N,r,1,h,1);
    curr_err = ddot(N,r,1,r,1);
    while ( curr_err >err && its < max_iter) {
	if(!(its % 100)) {
	   printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ;
	}
        nlCurrentContext->matrix_vector_prod(d,Ad);
        alpha=rh/ddot(N,d,1,Ad,1);
        daxpy(N,-alpha,d,1,x,1);
        daxpy(N,-alpha,Ad,1,r,1);
        nlCurrentContext->precond_vector_prod(r,h);
        beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh;
        dscal(N,beta,d,1);
        daxpy(N,1.,h,1,d,1);
        ++its;
        // calcul de l'erreur courante
        curr_err = ddot(N,r,1,r,1);

    }
    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < N ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square));
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(d) ;
    NL_DELETE_ARRAY(h) ;
    
    return its;
}
/*---------------------------------------------------------
  fem_sc_prod - to compute a scalar product of two global vectors
---------------------------------------------------------*/
double fem_sc_prod( /* retruns: scalar product of Vector1 and Vector2 */
    int Solver_id,        /* in: solver data structure to be used */
    int Level_id,         /* in: level number */
    int Nrdof,           /* in: number of vector components */
    double* Vector1,     /* in: local part of global Vector */
    double* Vector2      /* in: local part of global Vector */
)
{

    const int IONE=1;

    /*++++++++++++++++ executable statements ++++++++++++++++*/

    //return(ddr_sol_sc_prod(Solver_id, Level_id, Nrdof, Vector1, Vector2));

    return(ddot(&Nrdof, Vector1, &IONE, Vector2, &IONE));
}
Exemple #21
0
INT NS_DIM_PREFIX dematmul (MULTIGRID *mg, INT fl, INT tl, INT mode, EVECDATA_DESC *x, const EMATDATA_DESC *M, const EVECDATA_DESC *y)
{
  INT i,j,n,ret,level;
  DOUBLE a;

  if (x->n!=M->n || M->n!=y->n) return NUM_ERROR;
  n=M->n;
  ret=dmatmul(mg,fl,tl,mode,x->vd,M->mm,y->vd); if (ret!=NUM_OK) return ret;
  for (i=0; i<n; i++)
  {
    ret=daxpy(mg,fl,tl,mode,x->vd,EVDD_E(y,tl,i),M->me[i]); if (ret!=NUM_OK) return ret;
    ret=ddot(mg,fl,tl,mode,y->vd,M->em[i],&a); if (ret!=NUM_OK) return ret;EVDD_E(x,tl,i)=a;
    for (level=fl; level<=tl; level++)
      for (j=0; j<n; j++)
        EVDD_E(x,tl,i)+=EMDD_EE(M,level,i*n+j)*EVDD_E(y,tl,j);
  }
  return NUM_OK;
}
NLuint nlSolve_CG() {
    NLdouble* b        = nlCurrentContext->b ;
    NLdouble* x        = nlCurrentContext->x ;
    NLdouble  eps      = nlCurrentContext->threshold ;
    NLuint    max_iter = nlCurrentContext->max_iterations ;
    NLint     N        = nlCurrentContext->n ;

    NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ;
    NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; 
    NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ;
    NLuint its=0;
    NLint i;
    NLdouble t, tau, sig, rho, gam;
    NLdouble b_square=ddot(N,b,1,b,1);
    NLdouble err=eps*eps*b_square;
    NLdouble accu =0.0;
    NLdouble * Ax=NL_NEW_ARRAY(NLdouble,nlCurrentContext->n);
    NLdouble curr_err;
    
    nlCurrentContext->matrix_vector_prod(x,g);
    daxpy(N,-1.,b,1,g,1);
    dscal(N,-1.,g,1);
    dcopy(N,g,1,r,1);
    curr_err = ddot(N,g,1,g,1);
    while ( curr_err >err && its < max_iter) {
	    if(!(its % 100)) {
	        printf ( "%d : %.10e -- %.10e\n", its, curr_err, err ) ;
	    }
        nlCurrentContext->matrix_vector_prod(r,p);
        rho=ddot(N,p,1,p,1);
        sig=ddot(N,r,1,p,1);
        tau=ddot(N,g,1,r,1);
        t=tau/sig;
        daxpy(N,t,r,1,x,1);
        daxpy(N,-t,p,1,g,1);
        gam=(t*t*rho-tau)/tau;
        dscal(N,gam,r,1);
        daxpy(N,1.,g,1,r,1);
        ++its;
        curr_err = ddot(N,g,1,g,1); 
    }
    nlCurrentContext->matrix_vector_prod(x,Ax);
    for(i = 0 ; i < N ; ++i)
        accu+=(Ax[i]-b[i])*(Ax[i]-b[i]);
    printf("in OpenNL : ||Ax-b||/||b|| = %e\n",sqrt(accu)/sqrt(b_square));
    NL_DELETE_ARRAY(Ax);
    NL_DELETE_ARRAY(g) ;
    NL_DELETE_ARRAY(r) ;
    NL_DELETE_ARRAY(p) ;
    return its;
} 
/* This function compute a projection of a vector b onto the 
null space of a matrix using orthogonal matrices computed from
an svd of an original matrix using the sunperf routine 
dgesvd.  

Arguments:
	U - matrix of singular vectors used to compute the projector.
		The projector is computed as I - UU^T 
		Note:  vectors are assumed stored in columns ala FORTRAN
	m - number of rows in U
	n - number of columns of U to use in forming the projector.
		(the output is a copy of the input if n>= m)
	b - vector to be projected (length m)
	bp - vector to hold projection  (length m)

Normal return is 0.   Nonzero return indicates and error has been
posted with elog_log.  

Author:  Gary Pavlis
*/
int null_project(double *U,int m, int n, double *b, double *bp)
{
	int i;
	double val;

	dcopy(m,b,1,bp,1);
	if(n>=m)
	{
		elog_log(0,"null_project passed illegal U matrix of size %d by %d\nProjection request discarded\n",
			m,n);
		return(1);
	}
	for(i=0;i<n;i++)
	{
		val = ddot(m,(U+i*m),1,b,1);
		daxpy(m,-val,(U+i*m),1,bp,1);
	}
	return(0);
}
Exemple #24
0
// Compute the expected topic factor prediction by integrating over topics
double integrateFactorVectors(double *cVec, double *dVec, 
			      double *logThetaPtrU, double *logThetaPtrM, 
			      int numTopicFacs, int numTopicFacsTimesNumUsers,
			      int numTopicFacsTimesNumItems, int KU, int KM) {

   int blasStride = 1;
   double y = 0;   
   for (int ku = 0; ku < KU; ku++) {
      for (int km = 0; km < KM; km++) {	 
	 double pu = exp(logThetaPtrU[ku]);
	 double pm = exp(logThetaPtrM[km]);	 
	 y += pu*pm*ddot(&numTopicFacs, cVec + numTopicFacsTimesNumUsers*km, 
			 &blasStride, dVec + numTopicFacsTimesNumItems*ku, 
			 &blasStride);
      }
   }
   
   return y;
}
static void task_ddot_xy_work( void * arg , TPI_ThreadPool pool )
{
    int p_size , p_rank ;

    if ( ! TPI_Rank( pool , & p_rank , & p_size ) ) {

        struct TaskXY * const t = (struct TaskXY *) arg ;

        const unsigned n_total = t->number ;
        const unsigned n_begin = ( n_total * ( p_rank     ) ) / p_size ;
        const unsigned n_end   = ( n_total * ( p_rank + 1 ) ) / p_size ;
        const unsigned n_local = ( n_end - n_begin );

        const double * x = t->x_beg + n_begin ;
        const double * y = t->y_beg + n_begin ;

        t->xy_sum[ p_rank ] = ddot( n_local , x , y );
    }
}
void globaldotproduct(
    long long *localn,        // length of local vector segments
    double *localx,           // local segment of vector x
    double *localy,           // local segment of vector y
    double *dotval,           // the value of the dotproduct of the full global x and y
    long long *localrank,     // rank of local process, in range 0 to nprocs-1
    long long *nprocs,        // total number of processes in dotcomm
    MPI_Comm *dotcomm         // pointer to mpi communicator
)
{
    long long incx = 1ll;
    long long incy = 1ll;
    long long gather_iteration = 0ll;
    double localsum;
    if( ((int)*localn) != 0)
    {
        localsum = ddot(localn, localx, &incx, localy, &incy);
    }
    MPI_Allreduce(&localsum, dotval, 1, MPI_DOUBLE, MPI_SUM, *dotcomm);
}
int pseudo_inv_solver(double *U, 
	double *Vt,
	double *s,
	int m, 
	int n, 
	double *b, 
	double maxcon, 
	double *x)
{
	double sv_cutoff;
	double *work;
	int nsvused;
	int i, j;

	work = (double *)calloc(n,sizeof(double));
	if(work == NULL) elog_die(0,"pseudoinverse solver cannot alloc array of %d doubles\n",
		n);

	/*dgesvd returns singular values in descending order so
	finding the largest is trivial.  We use this to establish
	the sv cutuff */
	sv_cutoff = s[0]/maxcon;  

	/* multiply by S-1 * UT */
	nsvused = 0;
	for(j=0;j<n;++j)
	{
		if(s[j]<sv_cutoff) break;
		work[j] = ddot(m,(U+j*m),1,b,1);
		work[j] /= s[j];
		++nsvused;
	}
	for(i=0;i<n;++i) x[i]=0.0;
	/* This is the right form because of Vt */
	for(j=0;j<nsvused;++j)
	{
		daxpy(n,work[j],(Vt+j),n,x,1);
	}
	free(work);
	return(nsvused);
}
Exemple #28
0
/* 
 * Updates statistics.
 */
void updateStatistics(pwork* w)
{
	pfloat nry, nrz;
	
	stats* info = w->info;
	
	/* mu = (s'*z + kap*tau) / (D+1) where s'*z is the duality gap */
	info->gap = ddot(w->m, w->s, w->z);
	info->mu = (info->gap + w->kap*w->tau) / (w->D + 1);

	info->kapovert = w->kap / w->tau;
	info->pcost = w->cx / w->tau;
	info->dcost = -(w->hz + w->by) / w->tau;

	/* relative duality gap */
	if( info->pcost < 0 ){ info->relgap = info->gap / (-info->pcost); }
	else if( info->dcost > 0 ){ info->relgap = info->gap / info->dcost; }
	else info->relgap = NAN;

	/* residuals */
    nry = w->p > 0 ? norm2(w->ry, w->p)/w->resy0 : 0.0;
    nrz = norm2(w->rz, w->m)/w->resz0;
	info->pres = MAX(nry, nrz) / w->tau;
	info->dres = norm2(w->rx, w->n)/w->resx0 / w->tau;
    
	/* infeasibility measures
     *
	 * CVXOPT uses the following:
     * info->pinfres = w->hz + w->by < 0 ? w->hresx / w->resx0 / (-w->hz - w->by) : NAN;
     * info->dinfres = w->cx < 0 ? MAX(w->hresy/w->resy0, w->hresz/w->resz0) / (-w->cx) : NAN;
     */
    info->pinfres = w->hz + w->by < 0 ? w->hresx/w->resx0 : NAN;
    info->dinfres = w->cx < 0 ? MAX(w->hresy/w->resy0, w->hresz/w->resz0) : NAN;
	
    
#if PRINTLEVEL > 2
    PRINTTEXT("TAU=%6.4e  KAP=%6.4e  PINFRES=%6.4e  DINFRES=%6.4e\n",w->tau,w->kap,info->pinfres, info->dinfres );
#endif
    
}
Exemple #29
0
void dgemm (double * A, double * B, double * C, int nrowsA, int nrowsB, int ncolsC) {
// Calcula o produto de duas matrizes
	double * ptB, * tB = (double *) malloc(nrowsB * ncolsC * sizeof(double));
	if (tB == NULL) {
		printf("Falha ao alocar a memória de trabalho necessária");
		exit(1);			
		}
	ptB = tB;
	for (int i = 0 ; i < nrowsB ; ++ i) {
		for (int j = 0 ; j < ncolsC ; ++ j) {
			* ptB ++ = * (B + j * ncolsC + i);
			}
		}
	while (nrowsA -- > 0) {
		ptB = tB;
		for (int i = 0; i < nrowsB; ++ i) {
			* C ++ = ddot(A, ptB, ncolsC);
			ptB += ncolsC;
			}
		A += nrowsB;
		}
	}
Exemple #30
0
END_TEST

START_TEST( using_MKL_DOT )
{
  PetscErrorCode ierr;
  
  int n = 5, incx = 1, incy = 1;
  double *x, *y, res;
  PetscMalloc(n*sizeof(double), &x);
  PetscMalloc(n*sizeof(double), &y);
  
  for( int i = 0; i < n; i++)
  {
    x[i] = i+3;
    y[i] = i+6;
  }
  
  res = ddot( &n, x, &incx, y, &incy);
  
  fail_unless( res == 210. , "res is %f", res);
  
  PetscFree(x);
  PetscFree(y);
}