Example #1
0
void mexFunction(int nlhs, mxArray *plhs[],
                 int nrhs, const mxArray *prhs[])
{
    omp_set_num_threads(MAX_NUM_THREADS);

    // Array of static random number generators
    gsl_rng** rngs = getRngArray();

    // Extract input information
    const double* p = mxGetPr(prhs[0]); // K x numDists
    const int K = mxGetM(prhs[0]); // Number of categories
    const uint32_t* cols = (uint32_t*)mxGetData(prhs[1]); // vecLen x 1
    const int vecLen = mxGetM(prhs[1]);

    // Prepare output
    // New array auto-initialized to zeros
    plhs[0] = mxCreateNumericMatrix(vecLen, 1, mxUINT32_CLASS, mxREAL);
    uint32_t* vec = (uint32_t*)mxGetData(plhs[0]);

    #pragma omp parallel for
    for(mwSize e = 0; e < vecLen; e++) {
        // Find correct probability vector
        const double* pVec = p + (K*(cols[e]-1));

        // Generate U(0,1) random number
        const double rnd = gsl_rng_uniform(rngs[omp_get_thread_num()]);

        // Compare rnd to cumulative probability sum to sample topic
        double cumsum = pVec[0];
        uint32_t i = 1;
        for(; (i < K) && (rnd > cumsum); i++)
            cumsum += pVec[i];
        vec[e] = i;
    }
}
// Sample offsets
// Function written from perspective of sample c offsets
// Switch roles of user-item inputs to sample d offsets
void sampleOffsets(uint32_t* users, uint32_t* items,  const mxArray* exampsByUser,
                   int KU, int KM, int numUsers, double invSigmaSqd,
                   double invSigmaSqd0, double c0, double* c, double* d,
                   uint32_t* zU, uint32_t* zM, double* resids){

   // Array of static random number generators
   gsl_rng** rngs = getRngArray();

   // Extract internals of jagged arrays
   uint32_t** userExamps;
   mwSize* userLens;
   unpackJagged(exampsByUser, &userExamps, &userLens, numUsers);

   // Prior term for offsets
   const double ratio = c0*invSigmaSqd0;

   // Allocate memory for storing topic counts
   int* counts[MAX_NUM_THREADS];
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++)
      counts[thread] = mxMalloc(KM*sizeof(*counts));

#pragma omp parallel for
   for(int u = 0; u < numUsers; u++){
      int thread = omp_get_thread_num();
      // Initialize c offsets to 0
      double* cPtr = c + u*KM;
      fillArrayD(cPtr, KM, 0);

      // Initialize topic counts to 0
      fillArrayI(counts[thread], KM, 0);

      // Iterate over user's examples computing sufficient stats
      mwSize len = userLens[u];
      uint32_t* examps = userExamps[u];
      for(int j = 0; j < len; j++){
         uint32_t e = examps[j]-1;
         int i = zM[e]-1;

         if(KU > 0)
            cPtr[i] += (resids[e] - d[(items[e]-1)*KU + (zU[e]-1)]);
         else
            cPtr[i] += resids[e];
         counts[thread][i]++;
      }

      // Sample new offset values using sufficient stats
      for(int i = 0; i < KM; i++){
         double variance = 1.0/(invSigmaSqd0 + counts[thread][i]*invSigmaSqd);
         cPtr[i] = (ratio + cPtr[i]*invSigmaSqd)*variance +
            gsl_ran_gaussian(rngs[omp_get_thread_num()], sqrt(variance));
      }
   }
   // Clean up
   mxFree(userExamps);
   mxFree(userLens);
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++)
      mxFree(counts[thread]);
}
Example #3
0
void mexFunction(int nlhs, mxArray *plhs[],
                 int nrhs, const mxArray *prhs[])
{
   // Extract input information
   const unsigned long seed = *mxGetPr(prhs[0]) + .5;

   mexPrintf("Running seedMexRand with seed = %lu\n", seed);
   gsl_rng** rngs = getRngArray();
   for(unsigned long r = 0; r < MAX_NUM_THREADS; r++){
      gsl_rng_set(rngs[r], 2*(r+seed)+1);
   }
}
Example #4
0
// Sample topic parameters
// Function written from perspective of sampling user topics parameters
// Switch roles of user-item inputs to sample item topics parameters
void sampleTopicParams(const mxArray* exampsByUser, int KU, int numUsers,
                       double alpha, double* logthetaU, uint32_t* zU){

   // Array of static random number generators
   gsl_rng** rngs = getRngArray();

   // Prior term for Dirichlet
   const double ratio = alpha/KU;

   // Allocate memory for storing topic counts
   double* counts[MAX_NUM_THREADS];
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++)
      counts[thread] = mxMalloc(KU*sizeof(**counts));

#pragma omp parallel for
   for(int u = 0; u < numUsers; u++){
      int thread = omp_get_thread_num();
      // Initialize to prior term
      for(int i = 0; i < KU; i++)
         counts[thread][i] = ratio;

      // Iterate over user's examples computing sufficient stats
      mxArray* exampsArray = mxGetCell(exampsByUser, u);
      mwSize len = mxGetN(exampsArray);
      uint32_t* examps = (uint32_t*) mxGetData(exampsArray);
      for(int j = 0; j < len; j++)
         counts[thread][zU[examps[j]-1]-1]++;

      // Sample new topic parameters
      double* logthetaPtr = logthetaU + u*KU;
      gsl_ran_dirichlet(rngs[omp_get_thread_num()], KU, counts[thread], 
			logthetaPtr);
      // Take logs
      for(int i = 0; i < KU; i++)
         logthetaPtr[i] = log(logthetaPtr[i]);
   }
   // Clean up
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++)
      mxFree(counts[thread]);
}
Example #5
0
// Sample topics
// Function written from perspective of sampling user topics
// Switch roles of user-item inputs to sample item topics
void sampleTopics(uint32_t* users, uint32_t* items, ptrdiff_t KU, ptrdiff_t KM,
                  double twoSigmaSqd, double* logthetaU, double* c,
                  double* d, uint32_t* zU, uint32_t* zM, double* resids,
                  mwSize numExamples){
   // Array of static random number generators
   gsl_rng** rngs = getRngArray();

   // Allocate memory for log probabilities
   double* logProb[MAX_NUM_THREADS];
   for(ptrdiff_t thread = 0; thread < MAX_NUM_THREADS; thread++)
     logProb[thread] = mxMalloc(KU*sizeof(**logProb));

#pragma omp parallel for
   for(mwSize e = 0; e < numExamples; e++){
      ptrdiff_t thread = omp_get_thread_num();
      // Compute logthetaU - log likelihood term
      ptrdiff_t u = users[e]-1;
      double* logthetaPtr = logthetaU + u*KU;
      double* dPtr = d + (items[e]-1)*KU;
      double residMinusC;
      if(KM > 0)
         residMinusC = resids[e] - c[u*KM + (zM[e]-1)];
      else
         residMinusC = resids[e];
      double max = -INFINITY;
      for(ptrdiff_t i = 0; i < KU; i++){
         double err = residMinusC - dPtr[i];
         logProb[thread][i] = logthetaPtr[i] - err*err/twoSigmaSqd;
         if(logProb[thread][i] > max)
            max = logProb[thread][i];
      }

      zU[e] = sampleDiscreteLogProb(rngs[thread],
				    logProb[thread], KU, max);
   }
   // Clean up
   for(ptrdiff_t thread = 0; thread < MAX_NUM_THREADS; thread++)
      mxFree(logProb[thread]);
}
Example #6
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){
  //setting up inputs
  //first, set up the structs
  const mxArray* data = mxDuplicateArray(prhs[0]);
  const mxArray* model = mxDuplicateArray(prhs[1]);
  const mxArray* samp = mxDuplicateArray(prhs[2]);
  if (data == NULL || model == NULL || samp == NULL){ mexPrintf("Error duplicating inputs\n"); exit(EXIT_FAILURE); }
  const mxLogical* params = mxGetLogicals(prhs[3]); 
  int numExamples = 0; 
  //then, set up the cell array pointers
  mxArray* exampsByUserItem = NULL; 
  mxArray* sampkuM = NULL; 
  mxArray* samptuM = NULL;   
  mxArray* sampnuM = NULL; 
  //then, the double data matrices (as vectors)
  double* c = NULL;
  double* d = NULL; 
  double* muC = NULL; 
  double* muD = NULL; 
  double* mC = NULL; 
  double* nC = NULL; 
  double* nD = NULL; 
  //the scalar values
  double c0, betaM, gammaM;
  c0 = betaM = gammaM = 0; 
  int length_mC, length_nC, length_kM, length_muC; 
  length_mC = length_nC = length_kM = length_muC = 0; 
  //the uint32_t numerical matrices
  uint32_t* kM = NULL; 
  uint32_t* kU = NULL; 
  const double* resids = mxGetPr(mxGetField(samp, 0, "resids")); 
  const double d0 = (*mxGetPr(mxGetField(model, 0, "d0"))); 
  const double sigmaSqd = (*mxGetPr(mxGetField(model, 0, "sigmaSqd"))); 
  const double sigmaSqd0 = (*mxGetPr(mxGetField(model, 0, "sigmaSqd0"))); 
  const double invsigmaSqd = (*mxGetPr(mxGetField(model, 0, "invsigmaSqd")));
  const double invsigmaSqd0 = (*mxGetPr(mxGetField(model, 0, "invsigmaSqd0")));
  if (params[0]){ //isItemTopic == true
    mexPrintf("Sampling dishes for users\n"); 
    numExamples = (*mxGetPr(mxGetField(data, 0, "numUsers"))); //dereference to get value of numUsers
    exampsByUserItem = mxGetField(data, 0, "exampsByUser"); 
    c = mxGetPr(mxGetField(samp, 0, "c")); 
    d = mxGetPr(mxGetField(samp, 0, "d")); 
    sampkuM = mxGetField(samp, 0, "kuM"); 
    samptuM = mxGetField(samp, 0, "tuM"); 
    sampnuM = mxGetField(samp, 0, "nuM"); 
    length_mC = mxGetN(mxGetField(samp, 0, "mC")); 
    mC = (double*)mxMalloc((length_mC)*sizeof(double));
    memcpy(mC, mxGetPr(mxGetField(samp, 0, "mC")), length_mC*sizeof(double)); 
    length_nC = mxGetN(mxGetField(samp, 0, "nC"));
    nC = (double*)mxMalloc((length_nC)*sizeof(double));
    memcpy(nC, mxGetPr(mxGetField(samp, 0, "nC")), length_nC*sizeof(double)); 
    nD = mxGetPr(mxGetField(samp, 0, "nD")); //we don't modify nD, so it's OK to just have it be a pointer
    length_kM = mxGetN(mxGetField(samp, 0, "kM")); 
    kM = (uint32_t*)mxMalloc((length_kM)*sizeof(uint32_t)); 
    memcpy(kM, (uint32_t*)mxGetData(mxGetField(samp, 0, "kM")), length_kM*sizeof(uint32_t));     
    kU = (uint32_t*)mxGetData(mxGetField(samp, 0, "kU")); //not being modified
    length_muC = mxGetN(mxGetField(samp, 0, "muC")); 
    muC = (double*)mxMalloc((length_muC)*sizeof(double)); 
    memcpy(muC, mxGetPr(mxGetField(samp, 0, "muC")), length_muC*sizeof(double));
    muD = mxGetPr(mxGetField(samp, 0, "muD")); 
    c0 = (*mxGetPr(mxGetField(model, 0, "c0")));
    betaM = (*mxGetPr(mxGetField(model, 0, "betaM")));
    gammaM = (*mxGetPr(mxGetField(model, 0, "gammaM")));
  }
  else{ //isItemTopic == false
    mexPrintf("Sampling dishes for items\n");
    numExamples = (*mxGetPr(mxGetField(data, 0, "numItems"))); 
    exampsByUserItem = mxGetField(data, 0, "exampsByItem"); 
    c = mxGetPr(mxGetField(samp, 0, "d")); 
    d = mxGetPr(mxGetField(samp, 0, "c")); 
    sampkuM = mxGetField(samp, 0, "kjU"); 
    samptuM = mxGetField(samp, 0, "tjU"); 
    sampnuM = mxGetField(samp, 0, "njU"); 
    length_mC = mxGetN(mxGetField(samp, 0, "mD")); 
    mC = (double*)mxMalloc((length_mC)*sizeof(double));
    memcpy(mC, mxGetPr(mxGetField(samp, 0, "mD")), length_mC*sizeof(double)); 
    length_nC = mxGetN(mxGetField(samp, 0, "nD"));
    nC = (double*)mxMalloc((length_nC)*sizeof(double));
    memcpy(nC, mxGetPr(mxGetField(samp, 0, "nD")), length_nC*sizeof(double)); 
    nD = mxGetPr(mxGetField(samp, 0, "nC")); 
    length_kM = mxGetN(mxGetField(samp, 0, "kU")); 
    kM = (uint32_t*)mxMalloc((length_kM)*sizeof(uint32_t)); 
    memcpy(kM, (uint32_t*)mxGetData(mxGetField(samp, 0, "kU")), length_kM*sizeof(uint32_t)); 
    kU = (uint32_t*)mxGetData(mxGetField(samp, 0, "kM")); 
    length_muC = mxGetN(mxGetField(samp, 0, "muD")); 
    muC = (double*)mxMalloc((length_muC)*sizeof(double)); 
    memcpy(muC, mxGetPr(mxGetField(samp, 0, "muD")), length_muC*sizeof(double)); 
    muD = mxGetPr(mxGetField(samp, 0, "muC")); 
    c0 = (*mxGetPr(mxGetField(model, 0, "d0")));
    betaM = (*mxGetPr(mxGetField(model, 0, "betaU")));
    gammaM = (*mxGetPr(mxGetField(model, 0, "gammaU")));
  }
  mexPrintf("Initialized values successfully\n"); 
  //outputs
  mxArray* sampkuM_out = mxCreateCellMatrix(numExamples, 1); 
  omp_set_num_threads(MAX_NUM_THREADS);
  gsl_rng** rngs = getRngArray(); //RNG part
  int thread = omp_get_thread_num(); 
  const gsl_rng* rng = rngs[thread]; 
//#pragma omp parallel for
  for (int uu = 0; uu < numExamples; uu++){
    mwSize kuM_size = mxGetN(mxGetCell(sampkuM, uu)); 
    mwSize tuM_size = mxGetN(mxGetCell(samptuM, uu)); 
    mwSize TuM = mxGetN(mxGetCell(sampnuM, uu)); //initialize TuM when going to new user; TuM = nuM_size
    uint32_t* kuM = (uint32_t*)mxMalloc((kuM_size)*sizeof(uint32_t)); //needs to be modified, so allocating on heap
    memcpy(kuM, (uint32_t*)mxGetData(mxGetCell(sampkuM, uu)), kuM_size*sizeof(uint32_t)); //copying onto heap
    uint32_t* tuM = (uint32_t*)mxMalloc(tuM_size*sizeof(uint32_t)); 
    memcpy(tuM, (uint32_t*)mxGetData(mxGetCell(samptuM, uu)), tuM_size*sizeof(uint32_t));
    uint32_t* examps = (uint32_t*)mxGetData(mxGetCell(exampsByUserItem, uu)); 
    int numExamples_uu = mxGetN(mxGetCell(exampsByUserItem, uu)); //size of examps
    uint32_t** tuM_cell = (uint32_t**)mxMalloc(TuM*sizeof(uint32_t*)); 
    uint32_t tuM_cell_size[TuM];
    for (int i = 0; i < TuM; i++ ){
      tuM_cell[i] = NULL;
      tuM_cell_size[i] = 0; 
    }
    if (tuM_size != numExamples_uu){ mexPrintf("Error: size of tuM matrix for user %d is %d; it should equal numExamples, which is %d\n", uu, tuM_size, numExamples_uu); exit(EXIT_FAILURE); }
    for (mwSize ee_i = 0; ee_i < numExamples_uu; ee_i++){ //numExamples_uu == tuM_size; assembling all the examples for this user sorted by table in a cell array
      if (tuM[ee_i] > TuM){ mexPrintf("Error: table number for user %d, example %d, exceeds number of tables!\n", uu, ee_i); }
      uint32_t* tuM_cell_table = tuM_cell[tuM[ee_i]-1]; 
      if (tuM_cell_table == NULL){ //i.e., the entry is empty thus far
	tuM_cell_table = (uint32_t*)mxMalloc(sizeof(uint32_t));
	tuM_cell_table[0] = examps[ee_i]; 
	tuM_cell[tuM[ee_i]-1] = tuM_cell_table; 
	tuM_cell_size[tuM[ee_i]-1] = 1; 
      }
      else {
	int orig_size = tuM_cell_size[tuM[ee_i]-1]; 
	uint32_t* tuM_cell_expanded = (uint32_t*)mxRealloc(tuM_cell_table, (orig_size+1)*sizeof(uint32_t));
	if (tuM_cell_expanded){ 
	  tuM_cell_expanded[orig_size] = examps[ee_i];
	  tuM_cell[tuM[ee_i]-1] = tuM_cell_expanded; 
	  tuM_cell_size[tuM[ee_i]-1] += 1; 
	}
	else { mexPrintf("Could not enlarge tuM cell array for user %d, example %d\n", uu, ee_i); }
      }
    }
    //sample a new dish for each table
    for (mwSize tt = 0; tt < TuM; tt++ ){
      uint32_t* examp_tuM = tuM_cell[tt];
      int table_size = tuM_cell_size[tt]; 
      if (table_size > 0){
	int old_k = kuM[tt] - 1;
	//update global dish sufficient stats immediately
	mC[old_k] -= 1; 
	if (mC[old_k] < 0){ mC[old_k] = 0; }
	for (mwSize ee_i = 0; ee_i < table_size; ee_i++){ //loop through all examples assigned to table
	  uint32_t ee = examp_tuM[ee_i] - 1;
	  double residC = (params[1]) ? resids[ee] - muD[kU[ee]-1] : resids[ee] - d[kU[ee]-1]; 	  
	  muC[old_k] = updateCRFMu(muC, nC, residC, old_k, false, model); //remove current rating from global sufficient stats
	  //NOTE! this is a hack
	  if (fabs(muC[old_k]) > 30){ int sign = (muC[old_k] > 0) - (muC[old_k] < 0); muC[old_k] = sign*30; }
	  nC[old_k] -= 1; 
	}
	int new_k = sampleDishFull(examp_tuM, resids, mC, muC, muD, kU, nC, nD, betaM, c0, sigmaSqd, invsigmaSqd, invsigmaSqd0, length_mC, table_size, rng);	
	//skipping if condition that checks if length(new_k) > 1
	int empty_dish = linearSearchDouble(nC, 0, length_nC); //see what table we selected      
	if (empty_dish > -1){ new_k = empty_dish; } //if empty_dish is found, then set new_k to it
	kuM[tt] = new_k + 1; //+1 to be consistent with matlab notations
	if (new_k + 1 > length_mC){ //length_mC tells us number of active dishes right now
	  double* mC_expanded = (double*)mxRealloc(mC, (length_mC+1)*sizeof(double)); 
	  if (mC_expanded) { mC = mC_expanded; }
	  length_mC++; 
	  mC[new_k] = 0; //new dish
	} 
	mC[new_k] += 1; 
	if (new_k + 1 > length_nC){
	  double* muC_expanded = (double*)mxRealloc(muC, (length_muC+1)*sizeof(double));
	  if (muC_expanded) { muC = muC_expanded; }
	  double* nC_expanded = (double*)mxRealloc(nC, (length_nC+1)*sizeof(double));
	  if (nC_expanded) { nC = nC_expanded; }
	  length_nC++;
	  length_muC++;
	  muC[new_k] = d0 * sigmaSqd / sigmaSqd0;
	  nC[new_k] = 0;
	}
	for (mwSize ee_i = 0; ee_i < table_size; ee_i++){
	  uint32_t ee = examp_tuM[ee_i] - 1;	  
	  kM[ee] = new_k+1; 
	  double residC = (params[1]) ? resids[ee] - muD[kU[ee]-1] : resids[ee] - d[kU[ee]-1];
	  muC[new_k] = updateCRFMu(muC, nC, residC, new_k, true, model); 
	  //NOTE: this is a hack!
	  if (fabs(muC[new_k]) > 30){ int sign = (muC[new_k] > 0) - (muC[new_k] < 0); muC[new_k] = sign*30; }
	  nC[new_k] += 1; 
	}
      }
    } //close loop that goes through the tables for a given user
    mxArray* kuM_out; 
    kuM_out = mxCreateNumericMatrix(1, TuM, mxUINT32_CLASS, mxREAL); 
    if (kuM_out){
      mxSetPr(kuM_out, kuM); 
      mxSetCell(sampkuM_out, uu, kuM_out); 
    }
  } //close loop over all users
  //set up outputs for remaining variables
  mxArray* mC_out = mxCreateDoubleMatrix(1, length_mC, mxREAL); 
  mxArray* nC_out = mxCreateDoubleMatrix(1, length_nC, mxREAL); 
  mxArray* muC_out = mxCreateDoubleMatrix(1, length_muC, mxREAL); 
  mxArray* kM_out = mxCreateNumericMatrix(1, length_kM, mxUINT32_CLASS, mxREAL);   
  mxSetPr(mC_out, mC); 
  mxSetPr(nC_out, nC);
  mxSetPr(muC_out, muC);
  mxSetPr(kM_out, kM); 
  plhs[0] = sampkuM_out;
  plhs[1] = mC_out;
  plhs[2] = nC_out; 
  plhs[3] = muC_out; 
  plhs[4] = kM_out; 
  mexPrintf("Finished sampling dishes\n"); 
}
// Sample factor vectors
// Function written from perspective of sampling user factor vectors with cross-topics
// Switch roles of user-item inputs to sample item factor vectors
void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser,
			      int KU, int KM, int numUsers, int numItems, double invSigmaSqd, 
			      ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, 
			      uint32_t* zU, uint32_t* zM){
   // Array of random number generators
   gsl_rng** rngs = getRngArray();  
 
   // Extract internals of jagged arrays
   uint32_t** userExamps;
   mwSize* userLens;
   unpackJagged(exampsByUser, &userExamps, &userLens, numUsers);

   ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs;
   ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems;
   ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers;

   // BLAS constants
   char uplo[] = "U";
   char trans[] = "N";
   char diag[] = "N";
   ptrdiff_t oneInt = 1;
   double oneDbl = 1;
   double zeroDbl = 0;

   // Compute muBase = LambdaU*muU
   double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase));
   dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt);

   // Allocate memory for new mean and precision parameters
   double** muNew[MAX_NUM_THREADS];
   double** LambdaNew[MAX_NUM_THREADS];
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      muNew[thread] = mxMalloc(KM*sizeof(**muNew));
      LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew));
      for(int i = 0; i < KM; i++){
	 muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew));
	 LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew));
      }
   }

#pragma omp parallel for
   for(int u = 0; u < numUsers; u++){
      int thread = omp_get_thread_num();
      for(int i = 0; i < KM; i++){
	 // Initialize new mean to muBase
	 dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt);
	 // Initialize new precision to LambdaU
	 dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt);
      }

      // Iterate over user's examples
      mxArray* exampsArray = mxGetCell(exampsByUser, u);
      mwSize len = mxGetN(exampsArray);
      uint32_t* examps = (uint32_t*) mxGetData(exampsArray);
      for(int j = 0; j < len; j++){
	 uint32_t e = examps[j]-1;
	 int m = items[e]-1;
	 int userTop = zU[e]-1;
	 int itemTop = zM[e]-1;

	 // Item vector for this rated item
	 double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems;

	 // Compute posterior sufficient statistics for factor vector
	 // Add resid * dVec/sigmaSqd to muNew
	 double resid = resids[e];
	 resid *= invSigmaSqd;
	 daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt);

	 // Add (dVec * dVec^t)/sigmaSqd to LambdaNew
	 // Exploit symmetric structure of LambdaNew
	 dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], 
	      &numTopicFacs);
      }
      
      for(int i = 0; i < KM; i++){
	 // Compute upper Cholesky factor of LambdaNew
	 ptrdiff_t info;
	 dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info);
	 
	 // Solve for (LambdaNew)^-1*muNew using Cholesky factor
	 dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], 
		&numTopicFacs, &info);
	 
	 // Sample vector of N(0,1) variables
	 gsl_rng* rng = rngs[thread];
	 double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers;
	 for(int f = 0; f < numTopicFacs; f++)
	    cVec[f] = gsl_ran_gaussian(rng, 1);
	 
	 // Solve for (chol(LambdaNew,'U'))^-1*N(0,1)
	 dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], 
		&numTopicFacs, cVec, &numTopicFacs, &info);
	 
	 // Add muNew to aVec
	 daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt);
      }
   }
   // Clean up
   mxFree(userExamps);
   mxFree(userLens);
   mxFree(muBase);
   for(int thread = 0; thread < MAX_NUM_THREADS; thread++){
      for(int i = 0; i < KM; i++){
	 mxFree(muNew[thread][i]);
	 mxFree(LambdaNew[thread][i]);
      }
      mxFree(muNew[thread]);
      mxFree(LambdaNew[thread]);
   }
}