void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { omp_set_num_threads(MAX_NUM_THREADS); // Array of static random number generators gsl_rng** rngs = getRngArray(); // Extract input information const double* p = mxGetPr(prhs[0]); // K x numDists const int K = mxGetM(prhs[0]); // Number of categories const uint32_t* cols = (uint32_t*)mxGetData(prhs[1]); // vecLen x 1 const int vecLen = mxGetM(prhs[1]); // Prepare output // New array auto-initialized to zeros plhs[0] = mxCreateNumericMatrix(vecLen, 1, mxUINT32_CLASS, mxREAL); uint32_t* vec = (uint32_t*)mxGetData(plhs[0]); #pragma omp parallel for for(mwSize e = 0; e < vecLen; e++) { // Find correct probability vector const double* pVec = p + (K*(cols[e]-1)); // Generate U(0,1) random number const double rnd = gsl_rng_uniform(rngs[omp_get_thread_num()]); // Compare rnd to cumulative probability sum to sample topic double cumsum = pVec[0]; uint32_t i = 1; for(; (i < K) && (rnd > cumsum); i++) cumsum += pVec[i]; vec[e] = i; } }
// Sample offsets // Function written from perspective of sample c offsets // Switch roles of user-item inputs to sample d offsets void sampleOffsets(uint32_t* users, uint32_t* items, const mxArray* exampsByUser, int KU, int KM, int numUsers, double invSigmaSqd, double invSigmaSqd0, double c0, double* c, double* d, uint32_t* zU, uint32_t* zM, double* resids){ // Array of static random number generators gsl_rng** rngs = getRngArray(); // Extract internals of jagged arrays uint32_t** userExamps; mwSize* userLens; unpackJagged(exampsByUser, &userExamps, &userLens, numUsers); // Prior term for offsets const double ratio = c0*invSigmaSqd0; // Allocate memory for storing topic counts int* counts[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++) counts[thread] = mxMalloc(KM*sizeof(*counts)); #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); // Initialize c offsets to 0 double* cPtr = c + u*KM; fillArrayD(cPtr, KM, 0); // Initialize topic counts to 0 fillArrayI(counts[thread], KM, 0); // Iterate over user's examples computing sufficient stats mwSize len = userLens[u]; uint32_t* examps = userExamps[u]; for(int j = 0; j < len; j++){ uint32_t e = examps[j]-1; int i = zM[e]-1; if(KU > 0) cPtr[i] += (resids[e] - d[(items[e]-1)*KU + (zU[e]-1)]); else cPtr[i] += resids[e]; counts[thread][i]++; } // Sample new offset values using sufficient stats for(int i = 0; i < KM; i++){ double variance = 1.0/(invSigmaSqd0 + counts[thread][i]*invSigmaSqd); cPtr[i] = (ratio + cPtr[i]*invSigmaSqd)*variance + gsl_ran_gaussian(rngs[omp_get_thread_num()], sqrt(variance)); } } // Clean up mxFree(userExamps); mxFree(userLens); for(int thread = 0; thread < MAX_NUM_THREADS; thread++) mxFree(counts[thread]); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { // Extract input information const unsigned long seed = *mxGetPr(prhs[0]) + .5; mexPrintf("Running seedMexRand with seed = %lu\n", seed); gsl_rng** rngs = getRngArray(); for(unsigned long r = 0; r < MAX_NUM_THREADS; r++){ gsl_rng_set(rngs[r], 2*(r+seed)+1); } }
// Sample topic parameters // Function written from perspective of sampling user topics parameters // Switch roles of user-item inputs to sample item topics parameters void sampleTopicParams(const mxArray* exampsByUser, int KU, int numUsers, double alpha, double* logthetaU, uint32_t* zU){ // Array of static random number generators gsl_rng** rngs = getRngArray(); // Prior term for Dirichlet const double ratio = alpha/KU; // Allocate memory for storing topic counts double* counts[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++) counts[thread] = mxMalloc(KU*sizeof(**counts)); #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); // Initialize to prior term for(int i = 0; i < KU; i++) counts[thread][i] = ratio; // Iterate over user's examples computing sufficient stats mxArray* exampsArray = mxGetCell(exampsByUser, u); mwSize len = mxGetN(exampsArray); uint32_t* examps = (uint32_t*) mxGetData(exampsArray); for(int j = 0; j < len; j++) counts[thread][zU[examps[j]-1]-1]++; // Sample new topic parameters double* logthetaPtr = logthetaU + u*KU; gsl_ran_dirichlet(rngs[omp_get_thread_num()], KU, counts[thread], logthetaPtr); // Take logs for(int i = 0; i < KU; i++) logthetaPtr[i] = log(logthetaPtr[i]); } // Clean up for(int thread = 0; thread < MAX_NUM_THREADS; thread++) mxFree(counts[thread]); }
// Sample topics // Function written from perspective of sampling user topics // Switch roles of user-item inputs to sample item topics void sampleTopics(uint32_t* users, uint32_t* items, ptrdiff_t KU, ptrdiff_t KM, double twoSigmaSqd, double* logthetaU, double* c, double* d, uint32_t* zU, uint32_t* zM, double* resids, mwSize numExamples){ // Array of static random number generators gsl_rng** rngs = getRngArray(); // Allocate memory for log probabilities double* logProb[MAX_NUM_THREADS]; for(ptrdiff_t thread = 0; thread < MAX_NUM_THREADS; thread++) logProb[thread] = mxMalloc(KU*sizeof(**logProb)); #pragma omp parallel for for(mwSize e = 0; e < numExamples; e++){ ptrdiff_t thread = omp_get_thread_num(); // Compute logthetaU - log likelihood term ptrdiff_t u = users[e]-1; double* logthetaPtr = logthetaU + u*KU; double* dPtr = d + (items[e]-1)*KU; double residMinusC; if(KM > 0) residMinusC = resids[e] - c[u*KM + (zM[e]-1)]; else residMinusC = resids[e]; double max = -INFINITY; for(ptrdiff_t i = 0; i < KU; i++){ double err = residMinusC - dPtr[i]; logProb[thread][i] = logthetaPtr[i] - err*err/twoSigmaSqd; if(logProb[thread][i] > max) max = logProb[thread][i]; } zU[e] = sampleDiscreteLogProb(rngs[thread], logProb[thread], KU, max); } // Clean up for(ptrdiff_t thread = 0; thread < MAX_NUM_THREADS; thread++) mxFree(logProb[thread]); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){ //setting up inputs //first, set up the structs const mxArray* data = mxDuplicateArray(prhs[0]); const mxArray* model = mxDuplicateArray(prhs[1]); const mxArray* samp = mxDuplicateArray(prhs[2]); if (data == NULL || model == NULL || samp == NULL){ mexPrintf("Error duplicating inputs\n"); exit(EXIT_FAILURE); } const mxLogical* params = mxGetLogicals(prhs[3]); int numExamples = 0; //then, set up the cell array pointers mxArray* exampsByUserItem = NULL; mxArray* sampkuM = NULL; mxArray* samptuM = NULL; mxArray* sampnuM = NULL; //then, the double data matrices (as vectors) double* c = NULL; double* d = NULL; double* muC = NULL; double* muD = NULL; double* mC = NULL; double* nC = NULL; double* nD = NULL; //the scalar values double c0, betaM, gammaM; c0 = betaM = gammaM = 0; int length_mC, length_nC, length_kM, length_muC; length_mC = length_nC = length_kM = length_muC = 0; //the uint32_t numerical matrices uint32_t* kM = NULL; uint32_t* kU = NULL; const double* resids = mxGetPr(mxGetField(samp, 0, "resids")); const double d0 = (*mxGetPr(mxGetField(model, 0, "d0"))); const double sigmaSqd = (*mxGetPr(mxGetField(model, 0, "sigmaSqd"))); const double sigmaSqd0 = (*mxGetPr(mxGetField(model, 0, "sigmaSqd0"))); const double invsigmaSqd = (*mxGetPr(mxGetField(model, 0, "invsigmaSqd"))); const double invsigmaSqd0 = (*mxGetPr(mxGetField(model, 0, "invsigmaSqd0"))); if (params[0]){ //isItemTopic == true mexPrintf("Sampling dishes for users\n"); numExamples = (*mxGetPr(mxGetField(data, 0, "numUsers"))); //dereference to get value of numUsers exampsByUserItem = mxGetField(data, 0, "exampsByUser"); c = mxGetPr(mxGetField(samp, 0, "c")); d = mxGetPr(mxGetField(samp, 0, "d")); sampkuM = mxGetField(samp, 0, "kuM"); samptuM = mxGetField(samp, 0, "tuM"); sampnuM = mxGetField(samp, 0, "nuM"); length_mC = mxGetN(mxGetField(samp, 0, "mC")); mC = (double*)mxMalloc((length_mC)*sizeof(double)); memcpy(mC, mxGetPr(mxGetField(samp, 0, "mC")), length_mC*sizeof(double)); length_nC = mxGetN(mxGetField(samp, 0, "nC")); nC = (double*)mxMalloc((length_nC)*sizeof(double)); memcpy(nC, mxGetPr(mxGetField(samp, 0, "nC")), length_nC*sizeof(double)); nD = mxGetPr(mxGetField(samp, 0, "nD")); //we don't modify nD, so it's OK to just have it be a pointer length_kM = mxGetN(mxGetField(samp, 0, "kM")); kM = (uint32_t*)mxMalloc((length_kM)*sizeof(uint32_t)); memcpy(kM, (uint32_t*)mxGetData(mxGetField(samp, 0, "kM")), length_kM*sizeof(uint32_t)); kU = (uint32_t*)mxGetData(mxGetField(samp, 0, "kU")); //not being modified length_muC = mxGetN(mxGetField(samp, 0, "muC")); muC = (double*)mxMalloc((length_muC)*sizeof(double)); memcpy(muC, mxGetPr(mxGetField(samp, 0, "muC")), length_muC*sizeof(double)); muD = mxGetPr(mxGetField(samp, 0, "muD")); c0 = (*mxGetPr(mxGetField(model, 0, "c0"))); betaM = (*mxGetPr(mxGetField(model, 0, "betaM"))); gammaM = (*mxGetPr(mxGetField(model, 0, "gammaM"))); } else{ //isItemTopic == false mexPrintf("Sampling dishes for items\n"); numExamples = (*mxGetPr(mxGetField(data, 0, "numItems"))); exampsByUserItem = mxGetField(data, 0, "exampsByItem"); c = mxGetPr(mxGetField(samp, 0, "d")); d = mxGetPr(mxGetField(samp, 0, "c")); sampkuM = mxGetField(samp, 0, "kjU"); samptuM = mxGetField(samp, 0, "tjU"); sampnuM = mxGetField(samp, 0, "njU"); length_mC = mxGetN(mxGetField(samp, 0, "mD")); mC = (double*)mxMalloc((length_mC)*sizeof(double)); memcpy(mC, mxGetPr(mxGetField(samp, 0, "mD")), length_mC*sizeof(double)); length_nC = mxGetN(mxGetField(samp, 0, "nD")); nC = (double*)mxMalloc((length_nC)*sizeof(double)); memcpy(nC, mxGetPr(mxGetField(samp, 0, "nD")), length_nC*sizeof(double)); nD = mxGetPr(mxGetField(samp, 0, "nC")); length_kM = mxGetN(mxGetField(samp, 0, "kU")); kM = (uint32_t*)mxMalloc((length_kM)*sizeof(uint32_t)); memcpy(kM, (uint32_t*)mxGetData(mxGetField(samp, 0, "kU")), length_kM*sizeof(uint32_t)); kU = (uint32_t*)mxGetData(mxGetField(samp, 0, "kM")); length_muC = mxGetN(mxGetField(samp, 0, "muD")); muC = (double*)mxMalloc((length_muC)*sizeof(double)); memcpy(muC, mxGetPr(mxGetField(samp, 0, "muD")), length_muC*sizeof(double)); muD = mxGetPr(mxGetField(samp, 0, "muC")); c0 = (*mxGetPr(mxGetField(model, 0, "d0"))); betaM = (*mxGetPr(mxGetField(model, 0, "betaU"))); gammaM = (*mxGetPr(mxGetField(model, 0, "gammaU"))); } mexPrintf("Initialized values successfully\n"); //outputs mxArray* sampkuM_out = mxCreateCellMatrix(numExamples, 1); omp_set_num_threads(MAX_NUM_THREADS); gsl_rng** rngs = getRngArray(); //RNG part int thread = omp_get_thread_num(); const gsl_rng* rng = rngs[thread]; //#pragma omp parallel for for (int uu = 0; uu < numExamples; uu++){ mwSize kuM_size = mxGetN(mxGetCell(sampkuM, uu)); mwSize tuM_size = mxGetN(mxGetCell(samptuM, uu)); mwSize TuM = mxGetN(mxGetCell(sampnuM, uu)); //initialize TuM when going to new user; TuM = nuM_size uint32_t* kuM = (uint32_t*)mxMalloc((kuM_size)*sizeof(uint32_t)); //needs to be modified, so allocating on heap memcpy(kuM, (uint32_t*)mxGetData(mxGetCell(sampkuM, uu)), kuM_size*sizeof(uint32_t)); //copying onto heap uint32_t* tuM = (uint32_t*)mxMalloc(tuM_size*sizeof(uint32_t)); memcpy(tuM, (uint32_t*)mxGetData(mxGetCell(samptuM, uu)), tuM_size*sizeof(uint32_t)); uint32_t* examps = (uint32_t*)mxGetData(mxGetCell(exampsByUserItem, uu)); int numExamples_uu = mxGetN(mxGetCell(exampsByUserItem, uu)); //size of examps uint32_t** tuM_cell = (uint32_t**)mxMalloc(TuM*sizeof(uint32_t*)); uint32_t tuM_cell_size[TuM]; for (int i = 0; i < TuM; i++ ){ tuM_cell[i] = NULL; tuM_cell_size[i] = 0; } if (tuM_size != numExamples_uu){ mexPrintf("Error: size of tuM matrix for user %d is %d; it should equal numExamples, which is %d\n", uu, tuM_size, numExamples_uu); exit(EXIT_FAILURE); } for (mwSize ee_i = 0; ee_i < numExamples_uu; ee_i++){ //numExamples_uu == tuM_size; assembling all the examples for this user sorted by table in a cell array if (tuM[ee_i] > TuM){ mexPrintf("Error: table number for user %d, example %d, exceeds number of tables!\n", uu, ee_i); } uint32_t* tuM_cell_table = tuM_cell[tuM[ee_i]-1]; if (tuM_cell_table == NULL){ //i.e., the entry is empty thus far tuM_cell_table = (uint32_t*)mxMalloc(sizeof(uint32_t)); tuM_cell_table[0] = examps[ee_i]; tuM_cell[tuM[ee_i]-1] = tuM_cell_table; tuM_cell_size[tuM[ee_i]-1] = 1; } else { int orig_size = tuM_cell_size[tuM[ee_i]-1]; uint32_t* tuM_cell_expanded = (uint32_t*)mxRealloc(tuM_cell_table, (orig_size+1)*sizeof(uint32_t)); if (tuM_cell_expanded){ tuM_cell_expanded[orig_size] = examps[ee_i]; tuM_cell[tuM[ee_i]-1] = tuM_cell_expanded; tuM_cell_size[tuM[ee_i]-1] += 1; } else { mexPrintf("Could not enlarge tuM cell array for user %d, example %d\n", uu, ee_i); } } } //sample a new dish for each table for (mwSize tt = 0; tt < TuM; tt++ ){ uint32_t* examp_tuM = tuM_cell[tt]; int table_size = tuM_cell_size[tt]; if (table_size > 0){ int old_k = kuM[tt] - 1; //update global dish sufficient stats immediately mC[old_k] -= 1; if (mC[old_k] < 0){ mC[old_k] = 0; } for (mwSize ee_i = 0; ee_i < table_size; ee_i++){ //loop through all examples assigned to table uint32_t ee = examp_tuM[ee_i] - 1; double residC = (params[1]) ? resids[ee] - muD[kU[ee]-1] : resids[ee] - d[kU[ee]-1]; muC[old_k] = updateCRFMu(muC, nC, residC, old_k, false, model); //remove current rating from global sufficient stats //NOTE! this is a hack if (fabs(muC[old_k]) > 30){ int sign = (muC[old_k] > 0) - (muC[old_k] < 0); muC[old_k] = sign*30; } nC[old_k] -= 1; } int new_k = sampleDishFull(examp_tuM, resids, mC, muC, muD, kU, nC, nD, betaM, c0, sigmaSqd, invsigmaSqd, invsigmaSqd0, length_mC, table_size, rng); //skipping if condition that checks if length(new_k) > 1 int empty_dish = linearSearchDouble(nC, 0, length_nC); //see what table we selected if (empty_dish > -1){ new_k = empty_dish; } //if empty_dish is found, then set new_k to it kuM[tt] = new_k + 1; //+1 to be consistent with matlab notations if (new_k + 1 > length_mC){ //length_mC tells us number of active dishes right now double* mC_expanded = (double*)mxRealloc(mC, (length_mC+1)*sizeof(double)); if (mC_expanded) { mC = mC_expanded; } length_mC++; mC[new_k] = 0; //new dish } mC[new_k] += 1; if (new_k + 1 > length_nC){ double* muC_expanded = (double*)mxRealloc(muC, (length_muC+1)*sizeof(double)); if (muC_expanded) { muC = muC_expanded; } double* nC_expanded = (double*)mxRealloc(nC, (length_nC+1)*sizeof(double)); if (nC_expanded) { nC = nC_expanded; } length_nC++; length_muC++; muC[new_k] = d0 * sigmaSqd / sigmaSqd0; nC[new_k] = 0; } for (mwSize ee_i = 0; ee_i < table_size; ee_i++){ uint32_t ee = examp_tuM[ee_i] - 1; kM[ee] = new_k+1; double residC = (params[1]) ? resids[ee] - muD[kU[ee]-1] : resids[ee] - d[kU[ee]-1]; muC[new_k] = updateCRFMu(muC, nC, residC, new_k, true, model); //NOTE: this is a hack! if (fabs(muC[new_k]) > 30){ int sign = (muC[new_k] > 0) - (muC[new_k] < 0); muC[new_k] = sign*30; } nC[new_k] += 1; } } } //close loop that goes through the tables for a given user mxArray* kuM_out; kuM_out = mxCreateNumericMatrix(1, TuM, mxUINT32_CLASS, mxREAL); if (kuM_out){ mxSetPr(kuM_out, kuM); mxSetCell(sampkuM_out, uu, kuM_out); } } //close loop over all users //set up outputs for remaining variables mxArray* mC_out = mxCreateDoubleMatrix(1, length_mC, mxREAL); mxArray* nC_out = mxCreateDoubleMatrix(1, length_nC, mxREAL); mxArray* muC_out = mxCreateDoubleMatrix(1, length_muC, mxREAL); mxArray* kM_out = mxCreateNumericMatrix(1, length_kM, mxUINT32_CLASS, mxREAL); mxSetPr(mC_out, mC); mxSetPr(nC_out, nC); mxSetPr(muC_out, muC); mxSetPr(kM_out, kM); plhs[0] = sampkuM_out; plhs[1] = mC_out; plhs[2] = nC_out; plhs[3] = muC_out; plhs[4] = kM_out; mexPrintf("Finished sampling dishes\n"); }
// Sample factor vectors // Function written from perspective of sampling user factor vectors with cross-topics // Switch roles of user-item inputs to sample item factor vectors void sampleTopicFactorVectors(uint32_t* items, double* resids, const mxArray* exampsByUser, int KU, int KM, int numUsers, int numItems, double invSigmaSqd, ptrdiff_t numTopicFacs, double* LambdaU, double* muU, double* c, double* d, uint32_t* zU, uint32_t* zM){ // Array of random number generators gsl_rng** rngs = getRngArray(); // Extract internals of jagged arrays uint32_t** userExamps; mwSize* userLens; unpackJagged(exampsByUser, &userExamps, &userLens, numUsers); ptrdiff_t numTopicFacsSqd = numTopicFacs*numTopicFacs; ptrdiff_t numTopicFacsTimesNumItems = numTopicFacs*numItems; ptrdiff_t numTopicFacsTimesNumUsers = numTopicFacs*numUsers; // BLAS constants char uplo[] = "U"; char trans[] = "N"; char diag[] = "N"; ptrdiff_t oneInt = 1; double oneDbl = 1; double zeroDbl = 0; // Compute muBase = LambdaU*muU double* muBase = mxMalloc(numTopicFacs*sizeof(*muBase)); dsymv(uplo, &numTopicFacs, &oneDbl, LambdaU, &numTopicFacs, muU, &oneInt, &zeroDbl, muBase, &oneInt); // Allocate memory for new mean and precision parameters double** muNew[MAX_NUM_THREADS]; double** LambdaNew[MAX_NUM_THREADS]; for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ muNew[thread] = mxMalloc(KM*sizeof(**muNew)); LambdaNew[thread] = mxMalloc(KM*sizeof(**LambdaNew)); for(int i = 0; i < KM; i++){ muNew[thread][i] = mxMalloc(numTopicFacs*sizeof(***muNew)); LambdaNew[thread][i] = mxMalloc(numTopicFacsSqd*sizeof(***LambdaNew)); } } #pragma omp parallel for for(int u = 0; u < numUsers; u++){ int thread = omp_get_thread_num(); for(int i = 0; i < KM; i++){ // Initialize new mean to muBase dcopy(&numTopicFacs, muBase, &oneInt, muNew[thread][i], &oneInt); // Initialize new precision to LambdaU dcopy(&numTopicFacsSqd, LambdaU, &oneInt, LambdaNew[thread][i], &oneInt); } // Iterate over user's examples mxArray* exampsArray = mxGetCell(exampsByUser, u); mwSize len = mxGetN(exampsArray); uint32_t* examps = (uint32_t*) mxGetData(exampsArray); for(int j = 0; j < len; j++){ uint32_t e = examps[j]-1; int m = items[e]-1; int userTop = zU[e]-1; int itemTop = zM[e]-1; // Item vector for this rated item double* dVec = d + m*numTopicFacs + userTop*numTopicFacsTimesNumItems; // Compute posterior sufficient statistics for factor vector // Add resid * dVec/sigmaSqd to muNew double resid = resids[e]; resid *= invSigmaSqd; daxpy(&numTopicFacs, &resid, dVec, &oneInt, muNew[thread][itemTop], &oneInt); // Add (dVec * dVec^t)/sigmaSqd to LambdaNew // Exploit symmetric structure of LambdaNew dsyr(uplo, &numTopicFacs, &invSigmaSqd, dVec, &oneInt, LambdaNew[thread][itemTop], &numTopicFacs); } for(int i = 0; i < KM; i++){ // Compute upper Cholesky factor of LambdaNew ptrdiff_t info; dpotrf(uplo, &numTopicFacs, LambdaNew[thread][i], &numTopicFacs, &info); // Solve for (LambdaNew)^-1*muNew using Cholesky factor dpotrs(uplo, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, muNew[thread][i], &numTopicFacs, &info); // Sample vector of N(0,1) variables gsl_rng* rng = rngs[thread]; double* cVec = c + u*numTopicFacs + i*numTopicFacsTimesNumUsers; for(int f = 0; f < numTopicFacs; f++) cVec[f] = gsl_ran_gaussian(rng, 1); // Solve for (chol(LambdaNew,'U'))^-1*N(0,1) dtrtrs(uplo, trans, diag, &numTopicFacs, &oneInt, LambdaNew[thread][i], &numTopicFacs, cVec, &numTopicFacs, &info); // Add muNew to aVec daxpy(&numTopicFacs, &oneDbl, muNew[thread][i], &oneInt, cVec, &oneInt); } } // Clean up mxFree(userExamps); mxFree(userLens); mxFree(muBase); for(int thread = 0; thread < MAX_NUM_THREADS; thread++){ for(int i = 0; i < KM; i++){ mxFree(muNew[thread][i]); mxFree(LambdaNew[thread][i]); } mxFree(muNew[thread]); mxFree(LambdaNew[thread]); } }