void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { double dummy = 0, beta [2], *px, *C, *Ct, *C2, *fil, *Zt, *zt, done=1.0, *zz, dzero=0.0; cholmod_sparse Amatrix, *A, *Lsparse ; cholmod_factor *L ; cholmod_common Common, *cm ; Int minor, *It2, *Jt2 ; mwIndex l, k2, h, k, i, j, ik, *I, *J, *Jt, *It, *I2, *J2, lfi, *w, *w2, *r; mwSize nnz, nnzlow, m, n; int nz = 0; mwSignedIndex one=1, lfi_si; mxArray *Am, *Bm; char *uplo="L", *trans="N"; /* ---------------------------------------------------------------------- */ /* Only one input. We have to find first the Cholesky factorization. */ /* start CHOLMOD and set parameters */ /* ---------------------------------------------------------------------- */ if (nargin == 1) { cm = &Common ; cholmod_l_start (cm) ; sputil_config (SPUMONI, cm) ; /* convert to packed LDL' when done */ cm->final_asis = FALSE ; cm->final_super = FALSE ; cm->final_ll = FALSE ; cm->final_pack = TRUE ; cm->final_monotonic = TRUE ; /* since numerically zero entries are NOT dropped from the symbolic * pattern, we DO need to drop entries that result from supernodal * amalgamation. */ cm->final_resymbol = TRUE ; cm->quick_return_if_not_posdef = (nargout < 2) ; } /* This will disable the supernodal LL', which will be slow. */ /* cm->supernodal = CHOLMOD_SIMPLICIAL ; */ /* ---------------------------------------------------------------------- */ /* get inputs */ /* ---------------------------------------------------------------------- */ if (nargin > 3) { mexErrMsgTxt ("usage: Z = sinv(A), or Z = sinv(LD, 1)") ; } n = mxGetM (pargin [0]) ; m = mxGetM (pargin [0]) ; if (!mxIsSparse (pargin [0])) { mexErrMsgTxt ("A must be sparse") ; } if (n != mxGetN (pargin [0])) { mexErrMsgTxt ("A must be square") ; } /* Only one input. We have to find first the Cholesky factorization. */ if (nargin == 1) { /* get sparse matrix A, use tril(A) */ A = sputil_get_sparse (pargin [0], &Amatrix, &dummy, -1) ; A->stype = -1 ; /* use lower part of A */ beta [0] = 0 ; beta [1] = 0 ; /* ---------------------------------------------------------------------- */ /* analyze and factorize */ /* ---------------------------------------------------------------------- */ L = cholmod_l_analyze (A, cm) ; cholmod_l_factorize_p (A, beta, NULL, 0, L, cm) ; if (cm->status != CHOLMOD_OK) { mexErrMsgTxt ("matrix is not positive definite") ; } /* ---------------------------------------------------------------------- */ /* convert L to a sparse matrix */ /* ---------------------------------------------------------------------- */ Lsparse = cholmod_l_factor_to_sparse (L, cm) ; if (Lsparse->xtype == CHOLMOD_COMPLEX) { mexErrMsgTxt ("matrix is complex") ; } /* ---------------------------------------------------------------------- */ /* Set the sparse Cholesky factorization in Matlab format */ /* ---------------------------------------------------------------------- */ /*Am = sputil_put_sparse (&Lsparse, cm) ; I = mxGetIr(Am); J = mxGetJc(Am); C = mxGetPr(Am); nnz = mxGetNzmax(Am); */ It2 = Lsparse->i; Jt2 = Lsparse->p; Ct = Lsparse->x; nnz = (mwSize) Lsparse->nzmax; Am = mxCreateSparse(m, m, nnz, mxREAL) ; I = mxGetIr(Am); J = mxGetJc(Am); C = mxGetPr(Am); for (j = 0 ; j < n+1 ; j++) J[j] = (mwIndex) Jt2[j]; for ( i = 0 ; i < nnz ; i++) { I[i] = (mwIndex) It2[i]; C[i] = Ct[i]; } cholmod_l_free_sparse (&Lsparse, cm) ; /*FILE *out1 = fopen( "output1.txt", "w" ); if( out1 != NULL ) fprintf( out1, "Hello %d\n", nnz ); fclose (out1);*/ } else { /* The cholesky factorization is given as an input. */ /* We have to copy it into workspace */ It = mxGetIr(pargin [0]); Jt = mxGetJc(pargin [0]); Ct = mxGetPr(pargin [0]); nnz = mxGetNzmax(pargin [0]); Am = mxCreateSparse(m, m, nnz, mxREAL) ; I = mxGetIr(Am); J = mxGetJc(Am); C = mxGetPr(Am); for (j = 0 ; j < n+1 ; j++) J[j] = Jt[j]; for ( i = 0 ; i < nnz ; i++) { I[i] = It[i]; C[i] = Ct[i]; } } /* Evaluate the sparse inverse */ C[nnz-1] = 1.0/C[J[m-1]]; /* set the last element of sparse inverse */ fil = mxCalloc((mwSize)1,sizeof(double)); zt = mxCalloc((mwSize)1,sizeof(double)); Zt = mxCalloc((mwSize)1,sizeof(double)); zz = mxCalloc((mwSize)1,sizeof(double)); for (j=m-2;j!=-1;j--){ lfi = J[j+1]-(J[j]+1); /* if (lfi > 0) */ if ( J[j+1] > (J[j]+1) ) { /* printf("lfi = %u \n ", lfi); printf("lfi*double = %u \n", (mwSize)lfi*sizeof(double)); printf("lfi*lfi*double = %u \n", (mwSize)lfi*(mwSize)lfi*sizeof(double)); printf("\n \n"); */ fil = mxRealloc(fil,(mwSize)lfi*sizeof(double)); for (i=0;i<lfi;i++) fil[i] = C[J[j]+i+1]; /* take the j'th lower triangular column of the Cholesky */ zt = mxRealloc(zt,(mwSize)lfi*sizeof(double)); /* memory for the sparse inverse elements to be evaluated */ Zt = mxRealloc(Zt,(mwSize)lfi*(mwSize)lfi*sizeof(double)); /* memory for the needed sparse inverse elements */ /* Set the lower triangular for Zt */ k2 = 0; for (k=J[j]+1;k<J[j+1];k++){ ik = I[k]; h = k2; for (l=J[ik];l<=J[ik+1];l++){ if (I[l] == I[ J[j]+h+1 ]){ Zt[h+lfi*k2] = C[l]; h++; } } k2++; } /* evaluate zt = fil*Zt */ lfi_si = (mwSignedIndex) lfi; dsymv(uplo, &lfi_si, &done, Zt, &lfi_si, fil, &one, &dzero, zt, &one); /* Set the evaluated sparse inverse elements, zt, into C */ k=lfi-1; for (i = J[j+1]-1; i!=J[j] ; i--){ C[i] = -zt[k]; k--; } /* evaluate the j'th diagonal of sparse inverse */ dgemv(trans, &one, &lfi_si, &done, fil, &one, zt, &one, &dzero, zz, &one); C[J[j]] = 1.0/C[J[j]] + zz[0]; } else { /* evaluate the j'th diagonal of sparse inverse */ C[J[j]] = 1.0/C[J[j]]; } } /* Free the temporary variables */ mxFree(fil); mxFree(zt); mxFree(Zt); mxFree(zz); /* ---------------------------------------------------------------------- */ /* Permute the elements according to r(q) = 1:n */ /* Done only if the Cholesky was evaluated here */ /* ---------------------------------------------------------------------- */ if (nargin == 1) { Bm = mxCreateSparse(m, m, nnz, mxREAL) ; It = mxGetIr(Bm); Jt = mxGetJc(Bm); Ct = mxGetPr(Bm); /* Ct = C(r,r) */ r = (mwIndex *) L->Perm; /* fill reducing ordering */ w = mxCalloc(m,sizeof(mwIndex)); /* column counts of Am */ /* count entries in each column of Bm */ for (j=0; j<m; j++){ k = r ? r[j] : j ; /* column j of Bm is column k of Am */ for (l=J[j] ; l<J[j+1] ; l++){ i = I[l]; ik = r ? r[i] : i ; /* row i of Bm is row ik of Am */ w[ max(ik,k) ]++; } } cumsum2(Jt, w, m); for (j=0; j<m; j++){ k = r ? r[j] : j ; /* column j of Bm is column k of Am */ for (l=J[j] ; l<J[j+1] ; l++){ i= I[l]; ik = r ? r[i] : i ; /* row i of Bm is row ik of Am */ It [k2 = w[max(ik,k)]++ ] = min(ik,k); Ct[k2] = C[l]; } } mxFree(w); /* ---------------------------------------------------------------------- */ /* Transpose the permuted (upper triangular) matrix Bm into Am */ /* (this way we get sorted columns) */ /* ---------------------------------------------------------------------- */ w = mxCalloc(m,sizeof(mwIndex)); for (i=0 ; i<Jt[m] ; i++) w[It[i]]++; /* row counts of Bm */ cumsum2(J, w, m); /* row pointers */ for (j=0 ; j<m ; j++){ for (i=Jt[j] ; i<Jt[j+1] ; i++){ I[ l=w[ It[i] ]++ ] = j; C[l] = Ct[i]; } } mxFree(w); mxDestroyArray(Bm); } /* ---------------------------------------------------------------------- */ /* Fill the upper triangle of the sparse inverse */ /* ---------------------------------------------------------------------- */ w = mxCalloc(m,sizeof(mwIndex)); /* workspace */ w2 = mxCalloc(m,sizeof(mwIndex)); /* workspace */ for (k=0;k<J[m];k++) w[I[k]]++; /* row counts of the lower triangular */ for (k=0;k<m;k++) w2[k] = w[k] + J[k+1] - J[k] - 1; /* column counts of the sparse inverse */ nnz = (mwSize)2*nnz - m; /* The number of nonzeros in Z */ pargout[0] = mxCreateSparse(m,m,nnz,mxREAL); /* The sparse matrix */ It = mxGetIr(pargout[0]); Jt = mxGetJc(pargout[0]); Ct = mxGetPr(pargout[0]); cumsum2(Jt, w2, m); /* column starting points */ for (j = 0 ; j < m ; j++){ /* fill the upper triangular */ for (k = J[j] ; k < J[j+1] ; k++){ It[l = w2[ I[k]]++] = j ; /* place C(i,j) as entry Ct(j,i) */ if (Ct) Ct[l] = C[k] ; } } for (j = 0 ; j < m ; j++){ /* fill the lower triangular */ for (k = J[j]+1 ; k < J[j+1] ; k++){ It[l = w2[j]++] = I[k] ; /* place C(j,i) as entry Ct(j,i) */ if (Ct) Ct[l] = C[k] ; } } mxFree(w2); mxFree(w); /* ---------------------------------------------------------------------- */ /* return to MATLAB */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */ /* ---------------------------------------------------------------------- */ if (nargin == 1) { cholmod_l_free_factor (&L, cm) ; cholmod_l_finish (cm) ; cholmod_l_print_common (" ", cm) ; } mxDestroyArray(Am); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { // Allocate space. double *Bbar, *delta_tilda_k, *delta_k; double *tmp_stage, *tmp_stage_b; double a, b; mwSignedIndex p; mwSignedIndex i; #ifdef SUPERSAFE mxArray* Bbar_copy = mxDuplicateArray(prhs[0]); // Safe #else mxArray* Bbar_copy = (prhs[0]); // Fast and Dangerous #endif // Read Input p = mxGetM(prhs[0]); Bbar = mxGetPr(Bbar_copy); a = *(mxGetPr(prhs[1])); b = *(mxGetPr(prhs[2])); delta_tilda_k = mxGetPr(prhs[3]); delta_k = mxGetPr(prhs[4]); /* mexPrintf("a %f b %f Bbar(1,1) %f Bbar(1,3) %f", a, b, Bbar[0], Bbar[2]); */ /* print_arr(delta_tilda_k, p, "delta_tilda_k"); */ /* print_arr(delta_k, p, "delta_k"); */ #ifdef SAFE tmp_stage = mxCalloc(p , sizeof(double)); tmp_stage_b = mxCalloc(p , sizeof(double)); #else tmp_stage = mxGetPr(prhs[5]); tmp_stage_b = mxGetPr(prhs[6]); #endif /* Stage 1: Bbar is symmetric. */ // tmp_stage = Bbar * delta_tilda_k; dsymv("U", &p, &one, Bbar, &p, delta_tilda_k, &inc, &zero, tmp_stage, &inc); // tmp_deno = a + b * (delta_k' * tmp_stage); double tmp_deno = a + b * ddot(&p, delta_k, &inc, tmp_stage, &inc); // tmp_stage_b = Bbar' * delta_k; == Bbar * delta_k (since Bbar is symmetric) dsymv("U", &p, &one, Bbar, &p, delta_k, &inc, &zero, tmp_stage_b, &inc); // Bbar = Bbar + (-b/tmp_deno) * tmp_stage * tmp_stage_b'; double b_by_tmp = -b / tmp_deno; dger(&p, &p, &b_by_tmp, tmp_stage, &inc, tmp_stage_b, &inc, Bbar, &p); /* Stage 2: Bbar is no longer symmetric. Need to use dgemv instead of dsymv. */ // tmp_stage = Bbar * delta_k; dgemv("N", &p, &p, &one, Bbar, &p, delta_k, &inc, &zero, tmp_stage, &inc); // tmp_deno = a + b * (delta_tilda_k' * tmp_stage); tmp_deno = a + b * ddot(&p, delta_tilda_k, &inc, tmp_stage, &inc); // tmp_stage_b = Bbar' * delta_tilda_k; dgemv("T", &p, &p, &one, Bbar, &p, delta_tilda_k, &inc, &zero, tmp_stage_b, &inc); // Bbar = Bbar + (-b/tmp_deno) * tmp_stage * tmp_stage_b'; b_by_tmp = -b / tmp_deno; dger(&p, &p, &b_by_tmp, tmp_stage, &inc, tmp_stage_b, &inc, Bbar, &p); // In place edit Bbar and return the value. plhs[0] = Bbar_copy; #ifdef SAFE mxFree(tmp_stage); mxFree(tmp_stage_b); #endif }
// Sample factor vectors // Function written from perspective of sampling user factor vectors with cross-topics // Switch roles of user-item inputs to sample item factor vectors void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser, int KU, int KM, int numUsers, int numItems, double invSigmaSqd, ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, uint32_t* zU, uint32_t* zM){ // Array of random number generators gsl_rng** rngs = getRngArray(); // Extract internals of jagged arrays uint32_t** userExamps; mwSize* userLens; unpackJagged(exampsByUser, &userExamps, &userLens, numUsers); ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs; ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems; ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers; // BLAS constants char uplo[] = "U"; char trans[] = "N"; char diag[] = "N"; ptrdiff_t oneInt = 1; double oneDbl = 1; double zeroDbl = 0; // Compute muBase = LambdaU*muU double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase)); dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt); // Allocate memory for new mean and precision parameters double** muNew[MAX_NUM_THREADS]; double** LambdaNew[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ muNew[thread] = mxMalloc(KM*sizeof(**muNew)); LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew)); for(int i = 0; i < KM; i++){ muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew)); LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew)); } } #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); for(int i = 0; i < KM; i++){ // Initialize new mean to muBase dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt); // Initialize new precision to LambdaU dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt); } // Iterate over user's examples mxArray* exampsArray = mxGetCell(exampsByUser, u); mwSize len = mxGetN(exampsArray); uint32_t* examps = (uint32_t*) mxGetData(exampsArray); for(int j = 0; j < len; j++){ uint32_t e = examps[j]-1; int m = items[e]-1; int userTop = zU[e]-1; int itemTop = zM[e]-1; // Item vector for this rated item double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems; // Compute posterior sufficient statistics for factor vector // Add resid * dVec/sigmaSqd to muNew double resid = resids[e]; resid *= invSigmaSqd; daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt); // Add (dVec * dVec^t)/sigmaSqd to LambdaNew // Exploit symmetric structure of LambdaNew dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], &numTopicFacs); } for(int i = 0; i < KM; i++){ // Compute upper Cholesky factor of LambdaNew ptrdiff_t info; dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info); // Solve for (LambdaNew)^-1*muNew using Cholesky factor dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], &numTopicFacs, &info); // Sample vector of N(0,1) variables gsl_rng* rng = rngs[thread]; double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers; for(int f = 0; f < numTopicFacs; f++) cVec[f] = gsl_ran_gaussian(rng, 1); // Solve for (chol(LambdaNew,'U'))^-1*N(0,1) dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, cVec, &numTopicFacs, &info); // Add muNew to aVec daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt); } } // Clean up mxFree(userExamps); mxFree(userLens); mxFree(muBase); for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ for(int i = 0; i < KM; i++){ mxFree(muNew[thread][i]); mxFree(LambdaNew[thread][i]); } mxFree(muNew[thread]); mxFree(LambdaNew[thread]); } }
void toast::lapack::symv ( char * UPLO, int * N, double * ALPHA, double * A, int * LDA, double * X, int * INCX, double * BETA, double * Y, int * INCY ) { dsymv ( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); return; }