void CONJUGATE_GRADIENT_UPDATE(int N, double *q, double *prev_q_update, double *prev_q_first_stage, int INFO[]) { static int inc = 1; //determine beta double cg_beta = 1.0; if (INFO[11] == 1) { if (INFO[2] == 0) { memcpy(prev_q_first_stage, q, sizeof(double)*N); memcpy(prev_q_update, q, sizeof(double)*N); return; } else { cg_beta = DDOT(&N, q, &inc, q, &inc); cg_beta /= std::fabs( cg_beta - DDOT(&N, q, &inc, prev_q_first_stage, &inc)); memcpy(prev_q_first_stage, q, sizeof(double)*N); } } else { if (INFO[2] == 0) { memcpy(prev_q_update, q, sizeof(double)*N); return; } } //determine new q const double minus_one = -1.0; if (cg_beta != 1.0) DSCAL(&N, &cg_beta, prev_q_update, &inc); DAXPY(&N, &minus_one, prev_q_update, &inc, q, &inc); double quad_a = DDOT(&N, q, &inc, q, &inc); double quad_b = DDOT(&N, q, &inc, prev_q_update, &inc); double cg_lambda = -quad_b / quad_a; if (cg_lambda > 1) cg_lambda = 1; else if (cg_lambda < 0) cg_lambda = 0; static double one = 1.0; DSCAL(&N, &cg_lambda, q, &inc); DAXPY(&N, &one, prev_q_update, &inc, q, &inc); memcpy(prev_q_update, q, sizeof(double)*N); }
void F77_NAME(daxpy)(const int *n, const double *alpha, const double *dx, const int *incx, double *dy, const int *incy) { DAXPY(n, alpha, dx, incx, dy, incy); }
/* Interface to FORTRAN routine DAXPY. */ void IpBlasDaxpy(Index size, Number alpha, const Number *x, Index incX, Number *y, Index incY) { ipfint N=size, INCX=incX, INCY=incY; DAXPY(&N, &alpha, x, &INCX, y, &INCY); }
void SpinAdapted::MatrixDiagonalScale(double d, const Matrix& a, double* b) { //assert (a.Nrows () == a.Ncols () && a.Nrows () == b.Ncols ()); #ifdef BLAS int n = a.Nrows (); DAXPY (n, d, a.Store (), n+1, b, 1); #else //b += d * a; Should add the non-blas analogue #endif }
void SpinAdapted::MatrixScaleAdd (double d, const Matrix& a, Matrix& b) { assert (a.Nrows () == b.Nrows () && a.Ncols () == b.Ncols ()); #ifdef BLAS int n = a.Nrows () * a.Ncols (); assert (n == (b.Nrows () * b.Ncols ())); DAXPY (n, d, a.Store (), 1, b.Store (), 1); #else b += d * a; #endif }
void HLBFGS_UPDATE_Second_Step(int N, int M, double *q, double *s, double *y, double *rho, double *alpha, int bound, int cur_pos, int iter) { if (M <= 0) { return; } int start; double tmp; static int inc = 1; for (int i = 0; i <= bound; i++) { start = iter<=M? i:(cur_pos+1+i)%M; tmp = alpha[i]-rho[start]*DDOT(&N, &y[start*N], &inc, q, &inc); DAXPY(&N, &tmp, &s[start*N], &inc, q, &inc); } }
void HLBFGS_UPDATE_First_Step(int N, int M, double *q, double *s, double *y, double *rho, double *alpha, int bound, int cur_pos, int iter) { if (M <= 0) { return; } int start; double tmp; static int inc = 1; for (int i = bound; i >= 0; i--) { start = iter<=M? cur_pos-bound+i:(cur_pos-(bound-i)+M)%M; alpha[i] = rho[start] * DDOT(&N, q, &inc, &s[start*N], &inc); tmp = -alpha[i]; DAXPY(&N, &tmp, &y[start*N], &inc, q, &inc); } }
void SpinAdapted::MatrixTensorProduct (const Matrix& a_ref, char conjA, Real scaleA, const Matrix& b_ref, char conjB, Real scaleB, Matrix& c, int rowstride, int colstride, bool allocate) { #ifndef BLAS Matrix A; Matrix B; #endif Matrix& a = const_cast<Matrix&>(a_ref); // for BLAS calls Matrix& b = const_cast<Matrix&>(b_ref); int arows = a.Nrows(); int acols = a.Ncols(); // some specialisations #ifdef FAST_MTP // if ((brows == 1) && (bcols == 1)) { double b00 = *b.Store(); if (conjA == 'n') { double* cptr = c.Store()+ rowstride*c.Ncols() + colstride; for (int i=0; i< a.Nrows();i++) DAXPY(a.Ncols(), scaleA * scaleB * b00, a.Store()+i*a.Ncols(), 1, cptr + i*c.Ncols(), 1); return; } else { double* aptr = a.Store(); double* cptr = c.Store() + rowstride*c.Ncols() + colstride; for (int col = 0; col < acols; ++col) { DAXPY(arows, scaleA * scaleB * b00, aptr, acols, cptr, 1); ++aptr; cptr += c.Ncols();//arows; } return; } } // else // abort(); #else try { if (conjA == 'n' && conjB == 'n') { if (allocate) { c.ReSize (a.Nrows () * b.Nrows (), a.Ncols () * b.Ncols ()); Clear (c); } //assert ((c.Nrows () == (a.Nrows () * b.Nrows ())) && (c.Ncols () == (a.Ncols () * b.Ncols ()))); #ifdef BLAS int aRows = a.Nrows (); int aCols = a.Ncols (); int bRows = b.Nrows (); int bCols = b.Ncols (); for (int i = 0; i < aRows; ++i) for (int j = 0; j < aCols; ++j) { Real scale = scaleA * scaleB * a (i+1,j+1); for (int k = 0; k < bRows; ++k) GAXPY (bCols, scale, &b (k+1,1), 1, &c (i * bRows + k+1 +rowstride,j * bCols+1+colstride), 1); } return; #else A = a; B = b; #endif } else if (conjA == 't' && conjB == 'n') { if (allocate) { c.ReSize (a.Ncols () * b.Nrows (), a.Nrows () * b.Ncols ()); Clear (c); } //assert ((c.Nrows () == (a.Ncols () * b.Nrows ())) && (c.Ncols () == (a.Nrows () * b.Ncols ()))); #ifdef BLAS int aRows = a.Ncols (); int aCols = a.Nrows (); int bRows = b.Nrows (); int bCols = b.Ncols (); for (int i = 0; i < aRows; ++i) for (int j = 0; j < aCols; ++j) { Real scale = scaleA * scaleB * a (j+1,i+1); for (int k = 0; k < bRows; ++k) GAXPY (bCols, scale, &b (k+1,1), 1, &c (i * bRows + k+1+rowstride,j * bCols+1+colstride), 1); } return; #else A = a.t (); B = b; #endif } else if (conjA == 'n' && conjB == 't') { if (allocate) { c.ReSize (a.Nrows () * b.Ncols (), a.Ncols () * b.Nrows ()); Clear (c); } //assert ((c.Nrows () == (a.Nrows () * b.Ncols ())) && (c.Ncols () == (a.Ncols () * b.Nrows ()))); #ifdef BLAS int aRows = a.Nrows (); int aCols = a.Ncols (); int bRows = b.Ncols (); int bCols = b.Nrows (); for (int i = 0; i < aRows; ++i) for (int j = 0; j < aCols; ++j) { Real scale = scaleA * scaleB * a (i+1,j+1); for (int k = 0; k < bRows; ++k) GAXPY (bCols, scale, &b (1,k+1), bRows, &c (i * bRows + k+1+rowstride,j * bCols+1+colstride), 1); } return; #else A = a; B = b.t (); #endif } else if (conjA == 't' && conjB == 't') { if (allocate) { c.ReSize (a.Ncols () * b.Ncols (), a.Nrows () * b.Nrows ()); Clear (c); } //assert ((c.Nrows () == (a.Ncols () * b.Ncols ())) && (c.Ncols () == (a.Nrows () * b.Nrows ()))); #ifdef BLAS int aRows = a.Ncols (); int aCols = a.Nrows (); int bRows = b.Ncols (); int bCols = b.Nrows (); for (int i = 0; i < aRows; ++i) for (int j = 0; j < aCols; ++j) { Real scale = scaleA * scaleB * a (j+1,i+1); for (int k = 0; k < bRows; ++k) GAXPY (bCols, scaleA * scaleB * a (j+1,i+1), &b (1,k+1), bRows, &c (i * bRows + k+1+rowstride,j * bCols+1+colstride), 1); } return; #else A = a.t (); B = b.t (); #endif } else abort (); #ifndef BLAS for (int i = 1; i <= A.Nrows (); ++i) for (int j = 1; j <= A.Ncols (); ++j) c.SubMatrix ((i - 1) * B.Nrows () + 1, i * B.Nrows (), (j - 1) * B.Ncols () + 1, j * B.Ncols ()) += (scaleA * scaleB) * A (i,j) * B; #endif } catch (Exception) { pout << Exception::what () << endl; abort (); } #endif }
void denseREKBLAS (MAT * A, double *x, const double *b, double TOL){ int m = A->m, n = A->n, k, i_k, j_k; double val, val2, *z, *rowProb, *colProb; unsigned int *rowSampl, *colSampl; z = (double *) malloc (m * sizeof (double)); rowProb = (double *) malloc (m * sizeof (double)); colProb = (double *) malloc (n * sizeof (double)); rowSampl = (unsigned int *) malloc (BLOCKSIZE * sizeof (unsigned int)); colSampl = (unsigned int *) malloc (BLOCKSIZE * sizeof (unsigned int)); // Copy A to A_transpose MAT *Atransp = createTransp (A); // Compute the sampling probabilities and the samples computeColNorms (A, colProb); // Compute the column probabilities computeColNorms (Atransp, rowProb); // Compute the columns probabilities of Atransp (n x m matrix) // Init alias sampler ALIAS *asRow = createAliasSampler (rowProb, m); ALIAS *asCol = createAliasSampler (colProb, n); // Sample indices mySampler (rowSampl, BLOCKSIZE, asRow); mySampler (colSampl, BLOCKSIZE, asCol); memcpy (z, b, m * sizeof (double)); // Initialize z: z = y; for (k = 0; k < MAXITERS; k++){ if ((k + 1) % BLOCKSIZE == 0 && residError (A, x, b, z) < TOL && residError(Atransp, z, b, b) < TOL){ printf ("-->REK_Dense stopped at %d <--\n", (int)k); break; } else if ((k + 1) % BLOCKSIZE == 0){ // Sample indices mySampler (rowSampl, BLOCKSIZE, asRow); mySampler (colSampl, BLOCKSIZE, asCol); } i_k = rowSampl[k % BLOCKSIZE]; j_k = colSampl[k % BLOCKSIZE]; val = DDOT (m, z, 1, &(A->val)[j_k * m], 1); val /= colProb[j_k]; val = -val; DAXPY (m, val, &(A->val)[j_k * m], 1, z, 1); val2 = DDOT (n, x, 1, &(Atransp->val)[i_k * n], 1); val2 = b[i_k] - z[i_k] - val2; val2 /= rowProb[i_k]; DAXPY (n, val2, &(Atransp->val)[i_k * n], 1, x, 1); } freeSampler (asRow); freeSampler (asCol); freeMAT (Atransp); free (z); free (rowProb); free (colProb); free (rowSampl); free (colSampl); };
void DCHDC(double *a, INT *plda, INT *pp, double *work, INT jpvt[], INT *pjob, INT *info) /*double a[lda][1], work[1];*/ { INT pu, pl, j, k, l, maxl, jtemp; INT inc = 1; double temp; double maxdia; double *ak, *apl, *akk, *aj, *apu, *all, *amaxl; INT swapk, negk, length; INT lda = *plda, p = *pp, job = *pjob; /* ***first executable statement dchdc */ pl = 0; pu = -1; *info = p; if (job != 0) { /* pivoting has been requested. rearrange the */ /* the elements according to jpvt. */ ak = a; apl = a + pl*lda; for(k = 0;k < p ;k++) { akk = ak + k; swapk = jpvt[k] > 0; negk = jpvt[k] < 0; jpvt[k] = k+1; if (negk) { jpvt[k] = -jpvt[k]; } if (swapk) { if (k != pl) { DSWAP(&pl, ak, &inc, apl, &inc); temp = *akk; *akk = apl[pl]; apl[pl] = temp; aj = apl + lda; for(j = pl+1;j < p ;j++) { if (j < k) { temp = aj[pl]; aj[pl] = ak[j]; ak[j] = temp; } else if (j != k) { temp = aj[k]; aj[k] = aj[pl]; aj[pl] = temp; } aj += lda; } /*for(j = pl+1;j < p ;j++)*/ jpvt[k] = jpvt[pl]; jpvt[pl] = k + 1; } /*if (k != pl) */ pl++; apl += lda; } /*if (swapk) */ ak += lda; } /*for(k = 0;k < p ;k++)*/ pu = p - 1; apu = ak = a + (p-1)*lda; for(k = p-1;k>=pl;k--) { akk = ak + k; if (jpvt[k] < 0) { jpvt[k] = -jpvt[k]; if (pu != k) { DSWAP(&k, ak, &inc, apu, &inc); temp = *akk; *akk = apu[pu]; apu[pu] = temp; aj = ak + lda; for(j = k+1;j < p ;j++) { if (j < pu) { temp = aj[k]; aj[k] = apu[j]; apu[j] = temp; } else if (j != pu) { temp = aj[k]; aj[k] = aj[pu]; aj[pu] = temp; } aj += lda; } /*for(j = k+1;j < p ;j++)*/ jtemp = jpvt[k]; jpvt[k] = jpvt[pu]; jpvt[pu] = jtemp; } /*if (pu != k) */ pu--; apu -= lda; } /*if (jpvt[k] < 0) */ ak -= lda; } /*for(k = p-1;k>=pl;k--)*/ } /*if (job != 0)*/ ak = a; for(k = 0;k < p ;k++) { /* reduction loop. */ akk = ak + k; maxdia = *akk; maxl = k; /* determine the pivot element. */ if (k >= pl && k < pu) { all = akk + lda + 1; for(l = k+1;l <= pu ;l++) { if (*all > maxdia) { maxdia = *all; maxl = l; } all += lda + 1; } } /* quit if the pivot element is not positive. */ if (maxdia <= 0.0e0) { *info = k; break; } if (k != maxl) { amaxl = a + maxl*lda; /* start the pivoting and update jpvt. */ DSWAP(&k, ak, &inc, amaxl, &inc); amaxl[maxl] = *akk; *akk = maxdia; jtemp = jpvt[maxl]; jpvt[maxl] = jpvt[k]; jpvt[k] = jtemp; } /*if (k != maxl) */ /* reduction step. pivoting is contained across the rows. */ work[k] = sqrt(*akk); *akk = work[k]; aj = ak + lda; amaxl = a + maxl*lda; for(j = k+1;j < p ;j++) { if (k != maxl) { temp = aj[k]; if (j < maxl) { aj[k] = amaxl[j]; amaxl[j] = temp; } else if (j != maxl) { aj[k] = aj[maxl]; aj[maxl] = temp; } } /*if (k != maxl)*/ aj[k] /= work[k]; work[j] = aj[k]; temp = -aj[k]; length = j - k; DAXPY(&length, &temp, work + k + 1, &inc, aj + k + 1, &inc); aj += lda; } /*for(j = k+1;j < p ;j++)*/ incAndTest((p-k+3)*(p-k)/2,errorExit); ak += lda; }/*for(k = 0;k < p ;k++)*/ /* fall through*/ errorExit: ; } /*dchdc()*/
static int rmvnorm_rng (lua_State *L) { nl_RNG *r = getrng(L); nl_Matrix *m = nl_checkmatrix(L, 1); nl_Matrix *S = nl_checkmatrix(L, 2); nl_Matrix *u; int i, n = m->size; lua_Number *em, *ev, *eu; /* check args */ checkrvector(L, m, 1); luaL_argcheck(L, !S->iscomplex, 2, "real matrix expected"); if (S->ndims == 1) { luaL_argcheck(L, S->size == n, 2, "arguments are not conformable"); for (i = 0, ev = S->data; i < n; i++, ev += S->stride) luaL_argcheck(L, *ev > 0, 2, "variance is not positive"); } else luaL_argcheck(L, S->ndims == 2 && S->dim[0] == n && S->dim[1] == n, 2, "arguments are not conformable"); /* setup destination */ lua_settop(L, 3); if (lua_isnil(L, 3)) u = nl_pushmatrix(L, 0, 1, &n, 1, n, lua_newuserdata(L, n * sizeof(lua_Number))); else { u = nl_checkmatrix(L, 3); checkrvector(L, u, 3); luaL_argcheck(L, u->size == n, 3, "arguments are not conformable"); } /* sample */ if (S->ndims == 1) { em = m->data; ev = S->data; eu = u->data; for (i = 0; i < n; i++) { *eu = gennor(r, *em, *ev); em += m->stride; ev += S->stride; eu += u->stride; } } else { char uplo = 'L', trans = 'N', diag = 'N'; lua_Number one = 1.0; /* u ~ N(0, I_n) */ eu = u->data; for (i = 0; i < n; i++, eu += u->stride) *eu = gennor(r, 0, 1); /* u = S * u */ if (S->stride != 1 /* non-unitary stride? */ || (S->section != NULL /* non-block section? */ && (S->section[0].step != 1 || S->section[1].step != 1))) { nl_Buffer *buf = nl_getbuffer(L, n * n); /* copy S to buffer */ for (i = 0; i < S->size; i++) buf->data.bnum[i] = S->data[nl_mshift(S, i)]; DTRMV(&uplo, &trans, &diag, &n, buf->data.bnum, &n, u->data, &u->stride, 1, 1, 1); nl_freebuffer(buf); } else { int ld = S->section ? S->section[0].ld : S->dim[0]; DTRMV(&uplo, &trans, &diag, &n, S->data, &ld, u->data, &u->stride, 1, 1, 1); } /* u = u + m */ DAXPY(&n, &one, m->data, &m->stride, u->data, &u->stride); } return 1; }