void removeUV() { int u,f; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d012=UNALL(u); int i; int dall=UNALL(u); double NuS = 1.0/sqrt(dall); double lNuSY[NFEATURES]; double sumY[NFEATURES]; ZERO(sumY); ZERO(lNuSY); int j; for(j=0;j<dall;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } int d0=UNTRAIN(u); for(f=0;f<NFEATURES;f++) lNuSY[f] = NuS * sumY[f]; double bUu=bU[u]; for(i=0; i<d012;i++) { int m=userent[base0+i]&USER_MOVIEMASK; err[base0+i]-=(bU[u] + bV[m]); for (f=0; f<NFEATURES; f++) err[base0+i]-=((sU[u][f] + lNuSY[f]) * sV[m][f]); } } }
void removeUV() { int u,f; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d012=UNALL(u); int i; int dall=UNALL(u); double NuS = 1.0/sqrt(dall); double lNuSY[NFEATURES]; double sumY[NFEATURES]; ZERO(sumY); ZERO(lNuSY); int j; for(j=0;j<dall;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } int d0=UNTRAIN(u); for(f=0;f<NFEATURES;f++) lNuSY[f] = NuS * sumY[f]; //double bUu=bU[u]; //for(i=0; i<d012;i++) { //int m=userent[base0+i]&USER_MOVIEMASK; //err[base0+i]-=(bU[u] + bV[m]); //for (f=0; f<NFEATURES; f++) //err[base0+i]-=((sU[u][f] + lNuSY[f]) * sV[m][f]); //} // For all rated movies for(i=0;i<d012;i++) { int entloc = base0+i; unsigned int sdloc = sdbin[entloc]; int m=userent[entloc]&USER_MOVIEMASK; int day=userent[entloc]>>(USER_LMOVIEMASK+3); double devuhat = DEVuHat[entloc]; // Figure out the current error err[entloc] -= (bU[u] + bV[m] + bVbin[m][dbin(day)] + sdbU[sdloc] + alphabU[u] * devuhat); for (f=0; f<NFEATURES; f++) err[entloc] -= (( sU[u][f] + sdsU[sdloc+f*NENTRIES] + alphasU[u][f] * devuhat + lNuSY[f]) * sV[m][f]); } } }
void usertimemovie() { lg("User Time(Movie)\n"); int day0[NMOVIES]; ZERO(day0); // It is OK to look on all data for day0 because it is always known int i; for(i=0;i<NENTRIES;i++) { int m=userent[i]&USER_MOVIEMASK; int day=userent[i]>>(USER_LMOVIEMASK+3); if(!day0[m] || day0[m]>day) day0[m]=day; } int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); // compute explanatory variable double usertime[NMOVIES]; int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); usertime[j]=DTIME(day-day0[m]); } userXX(usertime,&err[base],d0,d012,USERTIMEMOVIE_ALPHA); } }
void movieXuser(double *xuser, double alpha) { // Remove average but only use training data double avg[NMOVIES]; int moviecount[NMOVIES]; ZERO(avg); ZERO(moviecount); int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d0=UNTRAIN(u); int j; double xu=xuser[u]; for(j=0;j<d0;j++) { int m=userent[base+j]&USER_MOVIEMASK; avg[m]+=xu; moviecount[m]++; } } int m; for(m=0;m<NMOVIES;m++) avg[m]/=moviecount[m]; // compute unbiased estimator double theta[NMOVIES]; double var[NMOVIES]; ZERO(theta); ZERO(var); for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d0=UNTRAIN(u); int j; double xu=xuser[u]; for(j=0;j<d0;j++) { int m=userent[base+j]&USER_MOVIEMASK; // compute explanatory variable double x=xu-avg[m]; theta[m]+=err[base+j]*x; var[m]+=x*x; } } for(m=0; m<NMOVIES; m++) theta[m]=(theta[m]/(var[m]+1.e-20))*moviecount[m]/(moviecount[m]+alpha); //predict for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int j; double xu=xuser[u]; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; double x=xu-avg[m]; err[base+j]-=theta[m]*x; } } }
void removeUV() { int u,f; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d012=UNALL(u); int i; int dall=UNALL(u); int j,j2; for(i=0; i<d012;i++) { int m=userent[base0+i]&USER_MOVIEMASK; int r=(userent[base0+i]>>USER_LMOVIEMASK)&7; r++; err[base0+i] = r - (GLOBAL_MEAN + wbU[u] + wbV[m]); } } }
void removeUV() { int u; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d012=UNALL(u); int i; for(i=0; i<d012;i++) { int m=userent[base0+i]&USER_MOVIEMASK; err[base0+i]-=sU[u]*sV[m]; } } }
void removeUV() { computeU(); int u; for(u=0; u<NUSERS; u++) { int base0=useridx[u][0]; unsigned int *ent=&userent[base0]; int d012=UNALL(u); int i; double sUu=sU[u]; for(i=0; i<d012; i++) { err[base0+i]-=sUu*sV[ent[i]&USER_MOVIEMASK]; } } }
double rmseprobe() { int k=2; int u, f; int n=0; double s=0.; int i; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int base0=useridx[u][0]; int dall=UNALL(u); double NuS = 1.0/sqrt(dall); double lNuSY[NFEATURES]; double sumY[NFEATURES]; ZERO(sumY); ZERO(lNuSY); int j; for(j=0;j<dall;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } int d0=UNTRAIN(u); for(j=0;j<d0;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) lNuSY[f] = NuS * sumY[f]; } for(i=1;i<k;i++) base+=useridx[u][i]; int d=useridx[u][k]; //s+=fvsqr(&err[base],d); for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; double e=err[base+i]; for (f=0; f<NFEATURES; f++) { e-=(bU[u][f] + bV[m][f]); e-=((sU[u][f] + lNuSY[f]) * sV[m][f]); } s+=e*e; } n+=d; } return sqrt(s/n); }
void userXmovie(double *xmovie, double alpha) { int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); // compute avg explanatory variable double x[NMOVIES]; int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; x[j]=xmovie[m]; } userXX(x,&err[base],d0,d012,alpha); } }
void usertime() { lg("User Time\n"); int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); // It is OK to look on all data for day0 because it is always known int day0=uivmin(&userent[base],UNTOTAL(u))>>(USER_LMOVIEMASK+3); // compute explanatory variable double usertime[NMOVIES]; int j; for(j=0;j<d012;j++) { int day=userent[base+j]>>(USER_LMOVIEMASK+3); usertime[j]=DTIME(day-day0); } userXX(usertime,&err[base],d0,d012,USERTIME_ALPHA); } }
void uavg() { lg("User avg centering\n"); ZERO(useravg); int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d0=UNTRAIN(u); int i; for(i=0; i<d0;i++) useravg[u]+=err[base+i]; useravg[u]/=d0+USERAVG_ALPHA; } for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int i; for(i=0; i<d012;i++) { err[base++]-=useravg[u]; } } }
void score_setup() { int i,u; //weight_time_setup(); if(load_model) { fpV=fopen(fnameV,"rb"); fpU=fopen(fnameU,"rb"); if(fpV || fpU) { lg("Loading %s and %s\n",fnameV,fnameU); if(!fpV || !fpU) error("Cant open both files"); } } int day0[NMOVIES]; ZERO(day0); // It is OK to look on all data for day0 because it is always known for(i=0;i<NENTRIES;i++) { int m=userent[i]&USER_MOVIEMASK; int day=userent[i]>>(USER_LMOVIEMASK+3); if(!day0[m] || day0[m]>day) day0[m]=day; } DEVuHat = (float *) malloc(NENTRIES*sizeof(float)); sdbU = (float *) malloc(NENTRIES*sizeof(float)); sdsU = (float *) malloc(((unsigned int)NENTRIES)*((unsigned int)NFEATURES)*sizeof(float)); memset(DEVuHat,0,NENTRIES*sizeof(float)); memset(sdbU,0,NENTRIES*sizeof(float)); memset(sdsU,0,((unsigned int)NENTRIES)*((unsigned int)NFEATURES)*sizeof(float)); ZERO(sdbin); int tcount[100000]; ZERO(tcount); ZERO(avgdate); ZERO(avgdevu); ZERO(ucnt); int j; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); // compute explanatory variable for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); if ( day < minday ) minday = day; if ( day > maxday ) maxday = day; //usertime[j]=DTIME(day-day0[m]); if ( day < 0 ) ;//toosmall++; else if ( day < 100000-1 ) tcount[day]++; else ;//toobig++; ucnt[u]++; avgdate[u] += day; } } printf("minday: %d, maxday: %d\n", minday, maxday); fflush(stdout); for(u=0;u<NUSERS;u++) { // 1) Find the average date of rating for every customer. In this step I include the probe dates also in the calculation of the average. avgdate[u] /= ucnt[u]; //printf("U: %d, avgdate: %f, ucnt: %d\n", u, avgdate[u], ucnt[u]); //fflush(stdout); } for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); //2) For every rating [i] in the data set (including probe) I calculate DEVu[i]: // DEVu[i] = sign(t[i] - t_mean_for_customer) * powf(abs(t[i] - t_mean_for_customer), 0.4); double DEVu = sign(day - avgdate[u]) * powf(abs(day - avgdate[u]), 0.4); avgdevu[u] += DEVu; //printf("U: %d, M: %d, day: %d, uavg: %f, DEVu: %f\n", u, m, day, avgdate[u], DEVu); //fflush(stdout); } } for(u=0;u<NUSERS;u++) { //3) Find the average DEVu[i] for every customer. His/hers probe DEVu[i] values are also included. avgdevu[u] /= ucnt[u]; //printf("U: %d, avgdevu: %f, avgdate: %f, ucnt: %d\n", u, avgdevu[u], avgdate[u], ucnt[u]); //fflush(stdout); } ZERO(maxDEVuHat); for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); // 2) For every rating [i] in the data set (including probe) I calculate DEVu[i]: // DEVu[i] = sign(t[i] - t_mean_for_customer) * powf(abs(t[i] - t_mean_for_customer), 0.4); double DEVu = sign(day - avgdate[u]) * powf(abs(day - avgdate[u]), 0.4); // 4) Subtract every customer's average DEVu_avg value from every time deviation: // DEVu_hat[i] = DEVu[i] - DEVu_avg_for_customer; double DEVuHat = DEVu - avgdevu[u]; //printf("U: %d, M: %d, ndevu: %f, day: %d, uavg: %f, DEVu: %f\n", u, m, DEVuHat, day, avgdate[u], DEVu); //fflush(stdout); // Get the max absolute value of a user's devu_hat values...maxDevu_hat... double tDEVu = fabs(DEVuHat); if ( tDEVu > maxDEVuHat[u] ) maxDEVuHat[u] = tDEVu; } } // Compute and store DEVuHats and create single day bin numbering per user int daysBinValue[maxday+1]; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int d0=UNTRAIN(u); int j; ZERO(daysBinValue); int dcount=0; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); DEVuHat[base+j] = devuHat(day,u); if ( daysBinValue[day] == 0 ) { sdbin[base+j] = base+j; daysBinValue[day] = base+j; if ( daysBinValue[day] > NENTRIES ) { printf("Days bin v: %d\n", daysBinValue[day]); fflush(stdout); } dcount++; } else { if ( daysBinValue[day] > NENTRIES ) { printf("Days bin v: %d\n", daysBinValue[day]); fflush(stdout); } sdbin[base+j] = daysBinValue[day]; } } } //for (i=minday; i < maxday; i++ ) { //printf("day: %d, count: %d\n", i, tcount[i]); //fflush(stdout); //} }
int doAllFeatures() { /* Initial biases */ { int u,m; for(u=0;u<NUSERS;u++) { bU[u]=0.0; } for(m=0;m<NMOVIES;m++) { bV[m]=0.0; } } /* Initial estimation for current feature */ { int u,m,f; double uvInit = sqrt(GLOBAL_MEAN/NFEATURES); for(u=0;u<NUSERS;u++) { for(f=0;f<NFEATURES;f++) { sU[u][f]= uvInit * (rand()%14000 + 2000) * 0.000001235f; } } for(m=0;m<NMOVIES;m++) { for(f=0;f<NFEATURES;f++) { sV[m][f]= uvInit * (rand()%14000 + 2000) * -0.000001235f; sY[m][f]=0.0; } } } /* Optimize current feature */ double nrmse=2., last_rmse=10.; double prmse = 0, last_prmse=0; double thr=sqrt(1.-E); int loopcount=0; double Gamma1 = G1; double Gamma2 = G2; while( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 15) && loopcount < 40 ) { last_rmse=nrmse; last_prmse=prmse; clock_t t0=clock(); loopcount++; double aErrAvg=0; double astepSuAvg=0; double astepSvAvg=0; double astepSyAvg=0; double abU=0, abV=0, asU=0, asV=0, asY=0; int n1=0, n2=0, n3=0; int u,m, f; for(u=0;u<NUSERS;u++) { // Calculate sumY and NuSY for each factor double sumY[NFEATURES]; ZERO(sumY); double lNuSY[NFEATURES]; ZERO(lNuSY); int base0=useridx[u][0]; int d0=UNTRAIN(u); int j; int f; int dall=UNALL(u); double NuS = 1.0/sqrt(dall); for(j=0;j<dall;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } for(f=0;f<NFEATURES;f++) { lNuSY[f] = NuS * sumY[f]; } double ycontrib[NFEATURES]; ZERO(ycontrib); // For all rated movies for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // Figure out the current error double ee=err[base0+j]; double e2 = ee; e2 -= (bU[u] + bV[m]); for (f=0; f<NFEATURES; f++) e2 -= ((sU[u][f]+lNuSY[f])*sV[m][f]); //int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; //r++; //double rui = r - e2; //if ( rui > 5.00 ) //e2 += (rui-5.0); //else if (rui < 1.0) //e2 -= (1.0 - rui); // Train the biases double bUu = bU[u]; double bVm = bV[m]; //bU[u] += Gamma1 * (e2 - bUu * L6); //bV[m] += Gamma1 * (e2 - bVm * L6); bU[u] += Gamma1 * (e2 - bUu * LbU); bV[m] += Gamma1 * (e2 - bVm * LbV); aErrAvg+=fabs(e2); abU += fabs(bU[u]); abV += fabs(bV[m]); n1++; // update U V and slope component of Y double yfactor = NuS; for (f=0; f<NFEATURES; f++) { double sUu = sU[u][f]; double sVm = sV[m][f]; //sU[u][f] += ((Gamma2) * ((e2 * sVm) - L7 * sUu)); //sV[m][f] += ((Gamma2) * ((e2 * (sUu + lNuSY[f])) - L7 * sVm)); sU[u][f] += ((Gamma2) * ((e2 * sVm) - LsU * sUu)); sV[m][f] += ((Gamma2) * ((e2 * (sUu + lNuSY[f])) - LsV * sVm)); asU += fabs(sU[u][f]); asV += fabs(sV[m][f]); astepSuAvg+=fabs(e2 * sV[m][f]); astepSvAvg+=fabs(e2 * sU[u][f]); n2++; ycontrib[f] += e2 * sVm * yfactor; } } // Train Ys over all known movies for user for(j=0;j<dall;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for (f=0; f<NFEATURES; f++) { double sYm = sY[m][f]; sY[m][f] += Gamma2 * (ycontrib[f] - LsY * sYm); asY += fabs(sY[m][f]); astepSyAvg+=fabs(ycontrib[f]); n3++; } } } aErrAvg/=n1; astepSuAvg/=n2; astepSvAvg/=n2; astepSyAvg/=n3; abU/=n1, abV/=n1, asU/=n2, asV/=n2, asY/=n2; double bUREG = 1.9074 / 100.0 * aErrAvg / abU; double bVREG = 1.9074 / 100.0 * aErrAvg / abV; double sUREG = 1.9074 / 100.0 * astepSuAvg / asU; double sVREG = 1.9074 / 100.0 * astepSvAvg / asV; double sYREG = 1.9074 / 100.0 * astepSyAvg / asY; printf("NREG - bU: %f bV: %f, sU: %f, sV: %f, sY: %f\n", bUREG, bVREG, sUREG, sVREG, sYREG); // Report rmse for main loop nrmse=0.; int ntrain=0; int elcnt=0; int k=2; int n=0; double s=0.; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d0=UNTRAIN(u); int j; // Setup the Ys again double sumY[NFEATURES]; ZERO(sumY); double lNuSY[NFEATURES]; ZERO(lNuSY); int dall=UNALL(u); double NuS = 1.0/sqrt(dall); for(j=0;j<dall;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } for(f=0;f<NFEATURES;f++) lNuSY[f] = NuS * sumY[f]; for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; double ee = err[base0+j]; double e2 = ee; e2 -= (bU[u] + bV[m]); for (f=0; f<NFEATURES; f++) e2 -= ( (sU[u][f] + lNuSY[f]) * sV[m][f]); //int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; //r++; //double rui = r - e2; //if ( rui > 5.00 ) //e2 += (rui-5.0); //else if (rui < 1.0) //e2 -= (1.0 - rui); if( elcnt++ == 5000 ) { printf("0 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[0], sV[m][0], sU[u][0], bU[u], bV[m], sY[m][0],u, m); printf("1 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[1], sV[m][1], sU[u][1], bU[u], bV[m], sY[m][1],u, m); printf("2 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[2], sV[m][2], sU[u][2], bU[u], bV[m], sY[m][2],u, m); printf("3 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[3], sV[m][3], sU[u][3], bU[u], bV[m], sY[m][3],u, m); fflush(stdout); } nrmse+=e2*e2; } ntrain+=d0; // Sum up probe rmse int i; int base=useridx[u][0]; for(i=1;i<k;i++) base+=useridx[u][i]; int d=useridx[u][k]; for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; double e=err[base+i]; e-=(bU[u] + bV[m]); for (f=0; f<NFEATURES; f++) e-=((sU[u][f] + lNuSY[f]) * sV[m][f]); //int r=(userent[base+i]>>USER_LMOVIEMASK)&7; //r++; //double rui = r - e; //if ( rui > 5.00 ) //e += (rui-5.0); //else if (rui < 1.0) //e -= (1.0 - rui); s+=e*e; } n+=d; } nrmse=sqrt(nrmse/ntrain); prmse = sqrt(s/n); lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); Gamma1 *= 0.90; Gamma2 *= 0.90; } /* Perform a final iteration in which the errors are clipped and stored */ removeUV(); //if(save_model) { //dappend_bin(fnameV,sV,NMOVIES); //dappend_bin(fnameU,sU,NUSERS); //} return 1; }
int doAllFeatures() { int cloop=0; /* Initial weight factors */ int i, j, h, c, r; for (i=0; i < NMOVIES; i++) { for (r=0; r<5; r++) { for (c=0; c < NFACTORS; c++) { Aic[i][r][c] = 0.02 * randn() - 0.01; // Normal Distribution } } } for (c=0; c < NFACTORS; c++) { for (j=0; j < TOTAL_FEATURES; j++) { //vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution //vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution //vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution //vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution //vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution Bcj[c][j] = 0.2/3.0 * randn() - 0.1/3.0; // Normal Distribution } } /* Initial biases */ for(i=0;i<TOTAL_FEATURES;i++) { hidbiases[i]=0.0; } for (j=0; j<NMOVIES; j++) { unsigned int mtot = moviercount[j*5+0] + moviercount[j*5+1] + moviercount[j*5+2] + moviercount[j*5+3] + moviercount[j*5+4]; for (i=0; i<5; i++) { visbiases[j][i] = log( ((double)moviercount[j*5+i]) / ((double) mtot) ); //printf("mrc: %d, mc %d, log:%f frac: %f\n", moviercount[j*5+i], moviecount[j] , log( moviercount[j*5+i] /(double) moviecount[j]), //(moviercount[j*5+i] /(double) moviecount[j]) ); } } /* Optimize current feature */ double nrmse=2., last_rmse=10.; double prmse = 0, last_prmse=0; double s; //double s2; int n; int loopcount=0; double EpsilonW = epsilonw; double EpsilonVB = epsilonvb; double EpsilonHB = epsilonhb; double Momentum = momentum; ZERO(Ainc); ZERO(Binc); ZERO(visbiasinc); ZERO(hidbiasinc); int tSteps = 1; //while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80 ) { while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80 ) { //if ( loopcount >= 10 ) //tSteps = 1 + loopcount / 5; last_rmse=nrmse; last_prmse=prmse; clock_t t0=clock(); loopcount++; int ntrain = 0; nrmse = 0.0; s = 0.0; //s2 = 0.0; n = 0; if ( loopcount > 5 ) Momentum = finalmomentum; //* CDpos =0, CDneg=0 (matrices) ZERO(Apos); ZERO(Aneg); ZERO(Bpos); ZERO(Bneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); ZERO(moviecount); int u,m, f; for(u=0;u<NUSERS;u++) { //* CDpos =0, CDneg=0 (matrices) ZERO(negvisprobs); ZERO(nvp2); //* perform steps 1 to 8 int base0=useridx[u][0]; int d0=UNTRAIN(u); int dall=UNALL(u); // For all rated movies, accumulate contributions to hidden units double sumW[TOTAL_FEATURES]; ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; moviecount[m]++; // 1. get one data point from data set. // 2. use values of this data point to set state of visible neurons Si int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // Add to the bias contribution for set visible units posvisact[m][r] += 1.0; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // sum_j(W[i][j] * v[0][j])) //sumW[h] += vishid[m][r][h]; sumW[h] += Wij(m,r,h); } } // Sample the hidden units state after computing probabilities for(h=0;h<TOTAL_FEATURES;h++) { // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases); // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j])) poshidprobs[h] = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h])); // sample h[0][i] from Q(h[0][i] = 1 | v[0]) if ( poshidprobs[h] > (rand()/(double)(RAND_MAX)) ) { poshidstates[h]=1; poshidact[h] += 1.0; } else { poshidstates[h]=0; } //poshidact[h] += poshidprobs[h]; } // Load up a copy of poshidstates for use in loop for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = poshidstates[h]; // Make T Contrastive Divergence steps int stepT = 0; do { // Determine if this is the last pass through this loop int finalTStep = (stepT+1 >= tSteps); // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction // for all visible units j: int r; int count = d0; count += useridx[u][1]; // too compute probe errors for(j=0;j<count;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for(h=0;h<TOTAL_FEATURES;h++) { if ( curposhidstates[h] == 1 ) { for(r=0;r<5;r++) { //negvisprobs[m][r] += vishid[m][r][h]; negvisprobs[m][r] += Wij(m,r,h); } } //for(r=0;r<5;r++) //negvisprobs[m][r] += poshidprobs[h] * vishid[m][r][h]; if ( loopcount >= 10 ) { for(r=0;r<5;r++) //nvp2[m][r] += poshidprobs[h] * vishid[m][r][h]; nvp2[m][r] += poshidprobs[h] * Wij(m,r,h); } } // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i])) negvisprobs[m][0] = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0])); negvisprobs[m][1] = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1])); negvisprobs[m][2] = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2])); negvisprobs[m][3] = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3])); negvisprobs[m][4] = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4])); // Normalize probabilities double tsum = negvisprobs[m][0] + negvisprobs[m][1] + negvisprobs[m][2] + negvisprobs[m][3] + negvisprobs[m][4]; if ( tsum != 0 ) { negvisprobs[m][0] /= tsum; negvisprobs[m][1] /= tsum; negvisprobs[m][2] /= tsum; negvisprobs[m][3] /= tsum; negvisprobs[m][4] /= tsum; } if ( loopcount >= 10 ) { nvp2[m][0] = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0])); nvp2[m][1] = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1])); nvp2[m][2] = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2])); nvp2[m][3] = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3])); nvp2[m][4] = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4])); double tsum2 = nvp2[m][0] + nvp2[m][1] + nvp2[m][2] + nvp2[m][3] + nvp2[m][4]; if ( tsum2 != 0 ) { nvp2[m][0] /= tsum2; nvp2[m][1] /= tsum2; nvp2[m][2] /= tsum2; nvp2[m][3] /= tsum2; nvp2[m][4] /= tsum2; } } // sample v[1][j] from P(v[1][j] = 1 | h[0]) double randval = (rand()/(double)(RAND_MAX)); if ( (randval -= negvisprobs[m][0]) <= 0.0 ) negvissoftmax[m] = 0; else if ( (randval -= negvisprobs[m][1]) <= 0.0 ) negvissoftmax[m] = 1; else if ( (randval -= negvisprobs[m][2]) <= 0.0 ) negvissoftmax[m] = 2; else if ( (randval -= negvisprobs[m][3]) <= 0.0 ) negvissoftmax[m] = 3; else //if ( (randval -= negvisprobs[m][4]) <= 0.0 ) negvissoftmax[m] = 4; //negvisact[m*5+0] += negvisprobs[m*5+0]; //negvisact[m*5+1] += negvisprobs[m*5+1]; //negvisact[m*5+2] += negvisprobs[m*5+2]; //negvisact[m*5+3] += negvisprobs[m*5+3]; //negvisact[m*5+4] += negvisprobs[m*5+4]; // if in training data then train on it if ( j < d0 && finalTStep ) negvisact[m][negvissoftmax[m]] += 1.0; } // 6. compute state of hidden neurons Sj again using Si from 5 step. // For all rated movies accumulate contributions to hidden units from sampled visible units ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { //sumW[h] += vishid[m][negvissoftmax[m]][h]; sumW[h] += Wij(m,negvissoftmax[m],h); //sumW[h] += vishid[m][0][h] * negvisprobs[m*5+0]; //sumW[h] += vishid[m][1][h] * negvisprobs[m*5+1]; //sumW[h] += vishid[m][2][h] * negvisprobs[m*5+2]; //sumW[h] += vishid[m][3][h] * negvisprobs[m*5+3]; //sumW[h] += vishid[m][4][h] * negvisprobs[m*5+4]; } } // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j])) neghidprobs[h] = 1./(1 + exp(-sumW[h] - hidbiases[h])); // Experimentally sample the hidden units state again TODO: What is best? if ( neghidprobs[h] > (rand()/(double)(RAND_MAX)) ) { neghidstates[h]=1; if ( finalTStep ) neghidact[h] += 1.0; } else { neghidstates[h]=0; } //if ( finalTStep ) //neghidact[h] += neghidprobs[h]; } // Compute error rmse and prmse before we start iterating on T if ( stepT == 0 ) { // Compute rmse on training data for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) if ( loopcount < 10 ) { double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4]; double vdelta = (((double)r)-expectedV); nrmse += (vdelta * vdelta); } else { double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); nrmse += (vdelta * vdelta); } } ntrain+=d0; // Sum up probe rmse int base=useridx[u][0]; for(i=1;i<2;i++) base+=useridx[u][i]; int d=useridx[u][2]; for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; int r=(userent[base+i]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) if ( loopcount < 10 ) { double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4]; double vdelta = (((double)r)-expectedV); s+=vdelta*vdelta; } else { double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); s+=vdelta*vdelta; } } n+=d; } // If looping again, load the curposvisstates if ( !finalTStep ) { for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = neghidstates[h]; ZERO(negvisprobs); } // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can // increase with learning steps to achieve better accuracy. } while ( ++stepT < tSteps ); // Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidstates[h] == 1 ) { // 4. now Si and Sj values can be used to compute (Si.Sj)0 here () means just values not average //* accumulate CDpos = CDpos + (Si.Sj)0 //CDpos[m][r][h] += 1.0; for (c=0; c < NFACTORS; c++) { Apos[m][r][c] += Bcj[c][h]; Bpos[c][h] += Aic[m][r][c]; } } //CDpos[m][r][h] += poshidprobs[h]; // 7. now use Si and Sj to compute (Si.Sj)1 (fig.3) //TODO - This is experimental!!!!!!! //CDneg[m][negvissoftmax[m]][h] += neghidprobs[h]; //CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h]; if ( neghidstates[h] == 1 ) { for (c=0; c < NFACTORS; c++) { Aneg[m][negvissoftmax[m]][c] += Bcj[c][h]; Bneg[c][h] += Aic[m][negvissoftmax[m]][c]; } } } } // Update weights and biases after batch // //int bsize = 1000; int bsize = 100; if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) { int numcases = u % bsize; numcases++; cloop++; //if ( numcases != bsize ) printf("u: %d, numcases: %d\n", u, numcases); // Update A factors for(m=0;m < NMOVIES;m++) { if ( moviecount[m] == 0 ) continue; // for all c factors for(c=0;c < NFACTORS; c++) { // for all softmax int rr; for(rr=0;rr<5;rr++) { //# At the end compute average of CDpos and CDneg by dividing them by number of data points. //# Compute CD = < Si.Sj >0 < Si.Sj >n = CDpos CDneg double Ap = Apos[m][rr][c]; double An = Aneg[m][rr][c]; if ( Ap != 0.0 || An != 0.0 ) { Ap /= ((double)moviecount[m]); An /= ((double)moviecount[m]); // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]') //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0) //e.g between data and reconstruction. //double preW = vishid[m][rr][h]; Ainc[m][rr][c] = Momentum * Ainc[m][rr][c] + EpsilonW * ((Ap - An) - weightcost * Aic[m][rr][c]); Aic[m][rr][c] += Ainc[m][rr][c]; //if ( cloop % 50 == 0 && c == 7 ) //printf("Aic: %f\t m: %d\t r: %d\t c: %d\n", Aic[m][rr][c], m, rr, c); //printf("W: %f preW: %f, CDp: %f, CDn: %f, m: %d, r: %d, h: %d, nhp: %f, nvp: %f\n", vishid[m][rr][h], preW, CDp, CDn, m, rr, h, //neghidprobs[h],negvisprobs[m*5+rr] //); } } } // Update visible softmax biases // c += epsilon * (v[0] - v[1])$ // for all softmax int rr; for(rr=0;rr<5;rr++) { if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) { posvisact[m][rr] /= ((double)moviecount[m]); negvisact[m][rr] /= ((double)moviecount[m]); visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr])); //visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]); visbiases[m][rr] += visbiasinc[m][rr]; //printf("vb: %f, pa: %f, na: %f\n", visbiases[(m*5+rr)], posvisact[(m*5+rr)], negvisact[(m*5+rr)]); } } } // Update B factors for(c=0;c<NFACTORS;c++) { // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { //# At the end compute average of CDpos and CDneg by dividing them by number of data points. //# Compute CD = < Si.Sj >0 < Si.Sj >n = CDpos CDneg double Bp = Bpos[c][h]; double Bn = Bneg[c][h]; if ( Bp != 0.0 || Bn != 0.0 ) { Bp /= ((double)numcases); Bn /= ((double)numcases); // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]') //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0) //e.g between data and reconstruction. //double preW = vishid[m][rr][h]; Binc[c][h] = Momentum * Binc[c][h] + EpsilonW * ((Bp - Bn) - weightcost * Bcj[c][h]); Bcj[c][h] += Binc[c][h]; //if ( cloop % 50 == 0 && h == 7 ) //printf("Bcj: %f\t c: %d\t h: %d\n", Bcj[c][h], c, h); //printf("W: %f preW: %f, CDp: %f, CDn: %f, m: %d, r: %d, h: %d, nhp: %f, nvp: %f\n", vishid[m][rr][h], preW, CDp, CDn, m, rr, h, //neghidprobs[h],negvisprobs[m*5+rr] //); } } } // Update hidden biases // b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1])) for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidact[h] != 0.0 || neghidact[h] != 0.0 ) { //poshidact[h] /= ((double)(numcases*ntrain*5)); //neghidact[h] /= ((double)(numcases*ntrain*5)); poshidact[h] /= ((double)(numcases)); neghidact[h] /= ((double)(numcases)); //poshidact[h] /= ((double)(mcount)); //neghidact[h] /= ((double)(mcount)); hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h])); //hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]); hidbiases[h] += hidbiasinc[h]; //printf("hb: %f, pa: %f, na: %f, d0:%d\n", hidbiases[h], poshidact[h], neghidact[h], d0); } } ZERO(Apos); ZERO(Aneg); ZERO(Bpos); ZERO(Bneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); //ZERO(poscnt); //ZERO(negcnt); ZERO(moviecount); //mcount = 0; } } nrmse=sqrt(nrmse/ntrain); prmse = sqrt(s/n); //double prmse2 = sqrt(s2/n); //lg("%f\t%f\t%f\t%f\n",nrmse,prmse,prmse2,(clock()-t0)/(double)CLOCKS_PER_SEC); lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); if ( loopcount > 6 ) { EpsilonW *= 0.90; EpsilonVB *= 0.90; EpsilonHB *= 0.90; } //else if ( loopcount > 5 ) { //EpsilonW *= 0.82; //EpsilonVB *= 0.82; //EpsilonHB *= 0.82; //} //printf("dd: %d %d %d %d %d\n", dd[0], dd[1], dd[2], dd[3], dd[4]); } /* Perform a final iteration in which the errors are clipped and stored */ recordErrors(); //if(save_model) { //dappend_bin(fnameV,sV,NMOVIES); //dappend_bin(fnameU,sU,NUSERS); //} return 1; }
void recordErrors() { int u,h,f, j, i; for(u=0;u<NUSERS;u++) { //* CDpos =0, CDneg=0 (matrices) ZERO(negvisprobs); //* perform steps 1 to 8 int base0=useridx[u][0]; int d0=UNTRAIN(u); int dall=UNALL(u); // For all rated movies, accumulate contributions to hidden units double sumW[TOTAL_FEATURES]; ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // 1. get one data point from data set. // 2. use values of this data point to set state of visible neurons Si int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // sum_j(W[i][j] * v[0][j])) //sumW[h] += vishid[m][r][h]; sumW[h] += Wij(m,r,h); } } // Compute the hidden probabilities for(h=0;h<TOTAL_FEATURES;h++) { // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j])) poshidprobs[h] = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h])); } // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction // for all visible units j: int r; int count = dall; for(j=0;j<count;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for(h=0;h<TOTAL_FEATURES;h++) { for(r=0;r<5;r++) //negvisprobs[m][r] += poshidprobs[h] * vishid[m][r][h]; negvisprobs[m][r] += poshidprobs[h] * Wij(m,r,h); } // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i])) negvisprobs[m][0] = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0])); negvisprobs[m][1] = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1])); negvisprobs[m][2] = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2])); negvisprobs[m][3] = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3])); negvisprobs[m][4] = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4])); // Normalize probabilities double tsum = negvisprobs[m][0] + negvisprobs[m][1] + negvisprobs[m][2] + negvisprobs[m][3] + negvisprobs[m][4]; if ( tsum != 0 ) { negvisprobs[m][0] /= tsum; negvisprobs[m][1] /= tsum; negvisprobs[m][2] /= tsum; negvisprobs[m][3] /= tsum; negvisprobs[m][4] /= tsum; } } // Compute and save error residuals for(i=0; i<dall;i++) { int m=userent[base0+i]&USER_MOVIEMASK; int r=(userent[base0+i]>>USER_LMOVIEMASK)&7; double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4]; double vdelta = (((double)r)-expectedV); err[base0+i] = vdelta; } } }
int doAllFeatures() { /* Initial biases */ { int u,m; for(u=0;u<NUSERS;u++) { wbU[u]=0.0; } for(m=0;m<NMOVIES;m++) { wbV[m]=0.0; } } float swbU[NUSERS]; float swbV[NMOVIES]; /* Optimize current feature */ float nrmse=2., last_rmse=10.; float prmse = 0, last_prmse=0; int loopcount=0; float Gamma0 = G0; while( ( (prmse<=last_prmse) || loopcount < 6) ) { last_rmse=nrmse; last_prmse=prmse; nrmse=0.; clock_t t0=clock(); loopcount++; int u,m,j; // Save the prior wbU and wbV for when the RMSE gets worse for(u=0;u<NUSERS;u++) { swbU[u] = wbU[u]; } for(m=0;m<NMOVIES;m++) { swbV[m]=wbV[m]; } // Train for(u=0;u<NUSERS;u++) { //if (u%10000 == 0) { //printf("On user: %d\n", u); //fflush(stdout); //} int d0 = UNTRAIN(u); int base0=useridx[u][0]; // For all rated movies for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // Figure out the current error int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; r++; //float ee=err[base0+j]; //float e2 = ee; float e2; e2 = r - (GLOBAL_MEAN + wbU[u] + wbV[m]); // Train the biases float wbUu = wbU[u]; float wbVm = wbV[m]; wbU[u] += Gamma0 * (e2 - wbUu * L4); wbV[m] += Gamma0 * (e2 - wbVm * L4); } } // Report rmse for main loop nrmse=0.; int ntrain=0; int elcnt=0; int k=2; int n=0; float s=0.; int i; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d012=UNALL(u); int i; int dall=UNALL(u); int d0 = UNTRAIN(u); for(i=0; i<d0;i++) { int m=userent[base0+i]&USER_MOVIEMASK; int r=(userent[base0+i]>>USER_LMOVIEMASK)&7; r++; //float ee=err[base0+j]; float e2; e2 = r - (GLOBAL_MEAN + wbU[u] + wbV[m]); nrmse+=e2*e2; ntrain++; } // Attempt to compute probe RMSE int base=useridx[u][0]; for(i=1;i<k;i++) base+=useridx[u][i]; int d=useridx[u][k]; int boffset = base-base0; for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; //float ee = err[base+i]; float e; int r=(userent[base+i]>>USER_LMOVIEMASK)&7; r++; e = r - (GLOBAL_MEAN + wbU[u] + wbV[m]); s+=e*e; } n+=d; } nrmse=sqrt(nrmse/ntrain); prmse = sqrt(s/n); lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); Gamma0 *= 0.90; } // Restore the best parameters int u, m; for(u=0;u<NUSERS;u++) { wbU[u] = swbU[u]; } for(m=0;m<NMOVIES;m++) { wbV[m] = swbV[m]; } /* Perform a final iteration in which the errors are clipped and stored */ removeUV(); return 1; }
computemix(char *fnames[], int nscores, double *xty) { #ifdef HOLDOUT lg("With holdout\n"); #endif int ns2=nscores+2; int ns1=nscores+1; FILE *fp[NSCORES]; openfiles(fp,fnames,nscores); double xtx[NSCORES+2][NSCORES+2]; ZERO(xtx); int u; for(u=0; u<NUSERS; u++) { PROGRESS(u,NUSERS); int base=useridx[u][0]; #ifdef HOLDOUT if(aopt) error("cant do holdout with -a"); int d0=UNTRAIN(u); int d1=UNALL(u)-d0; #else int d0=0; int d1=UNTRAIN(u); #endif seekfiles(fp,nscores, d0); base+=d0; int j; for(j=0;j<d1;j++) { unsigned int dd=userent[base+j]; int r = (dd>>USER_LMOVIEMASK)&7; float s[NSCORES+2]; readfiles(fp,s,nscores); int f; for(f=0;f<nscores;f++) s[f]=r-s[f]; s[nscores]=1.; s[nscores+1]=r; int ff; for(f=0;f<ns2;f++) { for(ff=0;ff<ns2;ff++) xtx[f][ff] +=s[f]*s[ff]; } } int d2=UNTOTAL(u)-(d1+d0); seekfiles(fp,nscores, d2); } closefiles(fp,nscores); int count=xtx[nscores][nscores]; int j1,j2; for(j1=0;j1<nscores;j1++) lg("File %d RMSE %f\n",j1,sqrt((xtx[j1][j1]+xtx[ns1][ns1]-2*xtx[ns1][j1])/count)); double avgs[NSCORES+2],std[NSCORES+2]; for(j1=0;j1<ns2;j1++) { avgs[j1]=xtx[nscores][j1]/count; std[j1]=sqrt(xtx[j1][j1]/count-avgs[j1]*avgs[j1]); } for(j1=0;j1<ns2;j1++) lg("%f\t",avgs[j1]); lg("\n"); for(j1=0;j1<ns2;j1++) lg("%f\t",std[j1]); lg("\n"); lg("-------------------------------------------------\n"); for(j1=0;j1<ns2;j1++) { for(j2=0;j2<ns2;j2++) { lg("%f\t",(xtx[j1][j2]/count-avgs[j1]*avgs[j2])/(std[j1]*std[j2]+1.e-6)); } lg("\n"); } lg("-------------------------------------------------\n"); double eavgs[NSCORES],estd[NSCORES]; for(j1=0;j1<nscores;j1++) { eavgs[j1]=avgs[ns1]-avgs[j1]; estd[j1]=sqrt((xtx[ns1][ns1]+ xtx[j1][j1]-2*xtx[j1][ns1])/count); } for(j1=0;j1<nscores;j1++) lg("%f\t",eavgs[j1]); lg("\n"); for(j1=0;j1<nscores;j1++) lg("%f\t",estd[j1]); lg("\n"); lg("-------------------------------------------------\n"); for(j1=0;j1<nscores;j1++) { for(j2=0;j2<nscores;j2++) { lg("%f\t",((xtx[j1][j2]+xtx[ns1][ns1]-xtx[ns1][j1]-xtx[ns1][j2])/count-eavgs[j1]*eavgs[j2])/(estd[j1]*estd[j2]+1.e-6)); } lg("\n"); } char TRANS='N'; char UFLO='U'; int M=ns1; int N=ns1; int NRHS=1; double A[NSCORES+1][NSCORES+1]; int LDA=NSCORES+1; double B[NSCORES+1]; int LDB=NSCORES+1; double S[NSCORES+1]; double RCOND=0.00001; // singular values below this are treated as zero. int RANK; double WORK[1000]; int LWORK=1000; int INFO; for(j1=0;j1<ns1;j1++) { B[j1]=xtx[ns1][j1]; for(j2=0;j2<ns1;j2++) A[j1][j2]=xtx[j1][j2]; } for(j1=0;j1<ns1;j1++) A[j1][j1]+=LAMBDA; /*dgesv_(&N,&NRHS,A,&LDA,IPIV,B,&LDB,&INFO);*/ /*dgels_(&TRANS,&M,&N,&NRHS,A,&LDA,B,&LDB,WORK,&LWORK,&INFO);*/ /*dgelss_( &M, &N, &NRHS, A, &LDA, B, &LDB, S, &RCOND, &RANK, WORK, &LWORK, &INFO );*/ dposv_(&UFLO,&N,&NRHS,A,&LDA,B,&LDB,&INFO); if(INFO) error("failed %d\n",INFO); for(j1=0;j1<=nscores;j1++) xty[j1]=B[j1]; lg("Check that the matrix inversion worked:\n"); for(j1=0;j1<=nscores;j1++) { double sum=LAMBDA*B[j1]; for(j2=0;j2<=nscores;j2++) sum+=xtx[j1][j2]*B[j2]; lg("%f\t%f\n",sum,xtx[nscores+1][j1]); } }
int doAllFeatures() { /* Initial weights */ int i, j, h; for (j=0; j<NMOVIES; j++) { for (i=0; i<TOTAL_FEATURES; i++) { vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution } } /* Initial biases */ for(i=0;i<TOTAL_FEATURES;i++) { hidbiases[i]=0.0; } for (j=0; j<NMOVIES; j++) { unsigned int mtot = moviercount[j*SOFTMAX+0] + moviercount[j*SOFTMAX+1] + moviercount[j*SOFTMAX+2] + moviercount[j*SOFTMAX+3] + moviercount[j*SOFTMAX+4]; for (i=0; i<SOFTMAX; i++) { visbiases[j][i] = log( ((double)moviercount[j*SOFTMAX+i]) / ((double) mtot) ); } } /* Optimize current feature */ double nrmse=2., last_rmse=10.; double prmse = 0, last_prmse=0; double s; int n; int loopcount=0; double EpsilonW = epsilonw; double EpsilonVB = epsilonvb; double EpsilonHB = epsilonhb; double Momentum = momentum; ZERO(CDinc); ZERO(visbiasinc); ZERO(hidbiasinc); int tSteps = 1; // Iterate through the model while the RMSE is decreasing //while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80 ) { while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80 ) { if ( loopcount >= 10 ) tSteps = 3 + (loopcount - 10)/5; last_rmse=nrmse; last_prmse=prmse; clock_t t0=clock(); loopcount++; int ntrain = 0; nrmse = 0.0; s = 0.0; n = 0; if ( loopcount > 5 ) Momentum = finalmomentum; //* CDpos =0, CDneg=0 (matrices) ZERO(CDpos); ZERO(CDneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); ZERO(moviecount); int u,m, f; for(u=0;u<NUSERS;u++) { //* Clear summations for probabilities ZERO(negvisprobs); ZERO(nvp2); //* perform steps 1 to 8 int base0=useridx[u][0]; int d0=UNTRAIN(u); int dall=UNALL(u); // For all rated movies, accumulate contributions to hidden units double sumW[TOTAL_FEATURES]; ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; moviecount[m]++; // 1. get one data point from data set. // 2. use values of this data point to set state of visible neurons Si int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // Add to the bias contribution for set visible units posvisact[m][r] += 1.0; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // sum_j(W[i][j] * v[0][j])) sumW[h] += vishid[m][r][h]; } } // Sample the hidden units state after computing probabilities for(h=0;h<TOTAL_FEATURES;h++) { // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases); // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j])) poshidprobs[h] = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h])); // sample h[0][i] from Q(h[0][i] = 1 | v[0]) if ( poshidprobs[h] > (rand()/(double)(RAND_MAX)) ) { poshidstates[h]=1; poshidact[h] += 1.0; } else { poshidstates[h]=0; } } // Load up a copy of poshidstates for use in loop for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = poshidstates[h]; // Make T Contrastive Divergence steps int stepT = 0; do { // Determine if this is the last pass through this loop int finalTStep = (stepT+1 >= tSteps); // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction // for all visible units j: int r; int count = d0; count += useridx[u][2]; // too compute probe errors for(j=0;j<count;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for(h=0;h<TOTAL_FEATURES;h++) { // Accumulate Weight values for sampled hidden states == 1 if ( curposhidstates[h] == 1 ) { for(r=0;r<SOFTMAX;r++) { negvisprobs[m][r] += vishid[m][r][h]; } } // Compute more accurate probabilites for RMSE reporting if ( stepT == 0 ) { for(r=0;r<SOFTMAX;r++) nvp2[m][r] += poshidprobs[h] * vishid[m][r][h]; } } // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i])) // Softmax elements are handled individually here negvisprobs[m][0] = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0])); negvisprobs[m][1] = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1])); negvisprobs[m][2] = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2])); negvisprobs[m][3] = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3])); negvisprobs[m][4] = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4])); // Normalize probabilities double tsum = negvisprobs[m][0] + negvisprobs[m][1] + negvisprobs[m][2] + negvisprobs[m][3] + negvisprobs[m][4]; if ( tsum != 0 ) { negvisprobs[m][0] /= tsum; negvisprobs[m][1] /= tsum; negvisprobs[m][2] /= tsum; negvisprobs[m][3] /= tsum; negvisprobs[m][4] /= tsum; } // Compute and Normalize more accurate RMSE reporting probabilities if ( stepT == 0) { nvp2[m][0] = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0])); nvp2[m][1] = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1])); nvp2[m][2] = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2])); nvp2[m][3] = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3])); nvp2[m][4] = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4])); double tsum2 = nvp2[m][0] + nvp2[m][1] + nvp2[m][2] + nvp2[m][3] + nvp2[m][4]; if ( tsum2 != 0 ) { nvp2[m][0] /= tsum2; nvp2[m][1] /= tsum2; nvp2[m][2] /= tsum2; nvp2[m][3] /= tsum2; nvp2[m][4] /= tsum2; } } // sample v[1][j] from P(v[1][j] = 1 | h[0]) double randval = (rand()/(double)(RAND_MAX)); if ( (randval -= negvisprobs[m][0]) <= 0.0 ) negvissoftmax[m] = 0; else if ( (randval -= negvisprobs[m][1]) <= 0.0 ) negvissoftmax[m] = 1; else if ( (randval -= negvisprobs[m][2]) <= 0.0 ) negvissoftmax[m] = 2; else if ( (randval -= negvisprobs[m][3]) <= 0.0 ) negvissoftmax[m] = 3; else //if ( (randval -= negvisprobs[m][4]) <= 0.0 ) negvissoftmax[m] = 4; // if in training data then train on it if ( j < d0 && finalTStep ) negvisact[m][negvissoftmax[m]] += 1.0; } // 6. compute state of hidden neurons Sj again using Si from 5 step. // For all rated movies accumulate contributions to hidden units from sampled visible units ZERO(sumW); for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { sumW[h] += vishid[m][negvissoftmax[m]][h]; } } // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j])) neghidprobs[h] = 1./(1 + exp(-sumW[h] - hidbiases[h])); // Sample the hidden units state again. if ( neghidprobs[h] > (rand()/(double)(RAND_MAX)) ) { neghidstates[h]=1; if ( finalTStep ) neghidact[h] += 1.0; } else { neghidstates[h]=0; } } // Compute error rmse and prmse before we start iterating on T if ( stepT == 0 ) { // Compute rmse on training data for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); nrmse += (vdelta * vdelta); } ntrain+=d0; // Sum up probe rmse int base=useridx[u][0]; for(i=1;i<2;i++) base+=useridx[u][i]; int d=useridx[u][2]; for(i=0; i<d;i++) { int m=userent[base+i]&USER_MOVIEMASK; int r=(userent[base+i]>>USER_LMOVIEMASK)&7; //# Compute some error function like sum of squared difference between Si in 1) and Si in 5) double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4]; double vdelta = (((double)r)-expectedV); s+=vdelta*vdelta; } n+=d; } // If looping again, load the curposvisstates if ( !finalTStep ) { for ( h=0; h < TOTAL_FEATURES; h++ ) curposhidstates[h] = neghidstates[h]; ZERO(negvisprobs); } // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can // increase with learning steps to achieve better accuracy. } while ( ++stepT < tSteps ); // Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; int r=(userent[base0+j]>>USER_LMOVIEMASK)&7; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidstates[h] == 1 ) { // 4. now Si and Sj values can be used to compute (Si.Sj)0 here () means just values not average //* accumulate CDpos = CDpos + (Si.Sj)0 CDpos[m][r][h] += 1.0; } // 7. now use Si and Sj to compute (Si.Sj)1 (fig.3) CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h]; } } // Update weights and biases after batch // int bsize = 100; if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) { int numcases = u % bsize; numcases++; // Update weights for(m=0;m<NMOVIES;m++) { if ( moviecount[m] == 0 ) continue; // for all hidden units h: for(h=0;h<TOTAL_FEATURES;h++) { // for all softmax int rr; for(rr=0;rr<SOFTMAX;rr++) { //# At the end compute average of CDpos and CDneg by dividing them by number of data points. //# Compute CD = < Si.Sj >0 < Si.Sj >n = CDpos CDneg double CDp = CDpos[m][rr][h]; double CDn = CDneg[m][rr][h]; if ( CDp != 0.0 || CDn != 0.0 ) { CDp /= ((double)moviecount[m]); CDn /= ((double)moviecount[m]); // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]') //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0) //e.g between data and reconstruction. CDinc[m][rr][h] = Momentum * CDinc[m][rr][h] + EpsilonW * ((CDp - CDn) - weightcost * vishid[m][rr][h]); vishid[m][rr][h] += CDinc[m][rr][h]; } } } // Update visible softmax biases // c += epsilon * (v[0] - v[1])$ // for all softmax int rr; for(rr=0;rr<SOFTMAX;rr++) { if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) { posvisact[m][rr] /= ((double)moviecount[m]); negvisact[m][rr] /= ((double)moviecount[m]); visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr])); //visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]); visbiases[m][rr] += visbiasinc[m][rr]; } } } // Update hidden biases // b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1])) for(h=0;h<TOTAL_FEATURES;h++) { if ( poshidact[h] != 0.0 || neghidact[h] != 0.0 ) { poshidact[h] /= ((double)(numcases)); neghidact[h] /= ((double)(numcases)); hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h])); //hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]); hidbiases[h] += hidbiasinc[h]; } } ZERO(CDpos); ZERO(CDneg); ZERO(poshidact); ZERO(neghidact); ZERO(posvisact); ZERO(negvisact); ZERO(moviecount); } } nrmse=sqrt(nrmse/ntrain); prmse = sqrt(s/n); printf("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); if ( TOTAL_FEATURES == 200 ) { if ( loopcount > 6 ) { EpsilonW *= 0.90; EpsilonVB *= 0.90; EpsilonHB *= 0.90; } else if ( loopcount > 5 ) { // With 200 hidden variables, you need to slow things down a little more EpsilonW *= 0.50; // This could probably use some more optimization EpsilonVB *= 0.50; EpsilonHB *= 0.50; } else if ( loopcount > 2 ) { EpsilonW *= 0.70; EpsilonVB *= 0.70; EpsilonHB *= 0.70; } } else { // The 100 hidden variable case if ( loopcount > 8 ) { EpsilonW *= 0.92; EpsilonVB *= 0.92; EpsilonHB *= 0.92; } else if ( loopcount > 6 ) { EpsilonW *= 0.90; EpsilonVB *= 0.90; EpsilonHB *= 0.90; } else if ( loopcount > 2 ) { EpsilonW *= 0.78; EpsilonVB *= 0.78; EpsilonHB *= 0.78; } } } /* Perform a final iteration in which the errors are clipped and stored */ recordErrors(); //if(save_model) { //dappend_bin(fnameV,sV,NMOVIES); //dappend_bin(fnameU,sU,NUSERS); //} return 1; }
void movietimeuser() { lg("Movie Time(User)\n"); ZERO(day0); // It is OK to look on all data for day0 because it is always known int u; for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNTOTAL(u); int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); if(!day0[u] || day0[u]>day) day0[u]=day; } } // Remove average but only use training data double avg[NMOVIES]; int moviecount[NMOVIES]; ZERO(avg); ZERO(moviecount); for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d0=UNTRAIN(u); int j; for(j=0;j<d0;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); avg[m]+=DTIME(day-day0[u]); moviecount[m]++; } } int m; for(m=0;m<NMOVIES;m++) avg[m]/=moviecount[m]; // compute unbiased estimator double theta[NMOVIES]; double var[NMOVIES]; ZERO(theta); ZERO(var); for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d0=UNTRAIN(u); int j; for(j=0;j<d0;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); // compute explanatory variable double x=DTIME(day-day0[u])-avg[m]; theta[m]+=err[base+j]*x; var[m]+=x*x; } } for(m=0; m<NMOVIES; m++) theta[m]=(theta[m]/(var[m]+1.e-20))*moviecount[m]/(moviecount[m]+MOVIETIME_ALPHA); //predict for(u=0;u<NUSERS;u++) { int base=useridx[u][0]; int d012=UNALL(u); int j; for(j=0;j<d012;j++) { int m=userent[base+j]&USER_MOVIEMASK; int day=userent[base+j]>>(USER_LMOVIEMASK+3); double x=DTIME(day-day0[u])-avg[m]; err[base+j]-=theta[m]*x; } } }
int doAllFeatures() { /* Initial biases */ { int u,m,f; for(u=0;u<NUSERS;u++) { for(f=0;f<NFEATURES;f++) bU[u][f]=drand48()*0.01-0.005; } for(m=0;m<NMOVIES;m++) { for(f=0;f<NFEATURES;f++) bV[m][f]=drand48()*0.01-0.005; } } /* Initial estimation for current feature */ { int u,m,f; for(u=0;u<NUSERS;u++) { for(f=0;f<NFEATURES;f++) sU[u][f]=drand48()*0.1-0.04; } for(m=0;m<NMOVIES;m++) { for(f=0;f<NFEATURES;f++) { sV[m][f]=drand48()*0.05-0.025; sY[m][f]=drand48()*0.02-0.01; } } } /* Optimize current feature */ double nrmse=2., last_rmse=10.; double thr=sqrt(1.-E); int loopcount=0; //thr=sqrt(1.-E2); double Gamma2 = G2; double Gamma0 = G0; while( ( nrmse < (last_rmse-E) ) || loopcount++ < 20) { last_rmse=nrmse; clock_t t0=clock(); int u,m, f; for(u=0;u<NUSERS;u++) { // Calculate sumY and NuSY for each factor double sumY[NFEATURES]; ZERO(sumY); double lNuSY[NFEATURES]; ZERO(lNuSY); int base0=useridx[u][0]; int d0=UNTRAIN(u); int j; int f; int dall=UNALL(u); double NuS = 1.0/sqrt(dall); for(j=0;j<d0;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } //if ( loopcount > 1 ) { //printf("sumY: %f\n", sumY); //fflush(stdout); //} for(j=0;j<d0;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) { lNuSY[f] = NuS * sumY[f]; //if ( loopcount > 1 ) { //printf("lNuSY: %f\n", lNuSY[f]); //fflush(stdout); //} } } double ycontrib[NFEATURES]; ZERO(ycontrib); // For all rated movies double bdampen = d0/1.1; for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; // Figure out the current error double ee=err[base0+j]; double e2 = ee; for (f=0; f<NFEATURES; f++) { e2 -= (bU[u][f] + bV[m][f]); e2 -= ((sU[u][f]+lNuSY[f])*sV[m][f]); } // update U V and slope component of Y //double yfactor = NuS/sqrt(moviecount[m]); //double yfactor = NuS; double yfactor = NuS/d0; for (f=0; f<NFEATURES; f++) { // Train the biases double bUu = bU[u][f]; double bVm = bV[m][f]; bU[u][f] += Gamma0 * (e2 - bUu * L4) / bdampen; bV[m][f] += Gamma0 * (e2 - bVm * L4) / bdampen; double sUu = sU[u][f]; double sVm = sV[m][f]; sU[u][f] += (Gamma2 * ((e2 * sVm) - L8 * sUu)); sV[m][f] += (Gamma2 * ((e2 * (sUu + lNuSY[f])) - L8 * sVm)); //printf("sU: %f\n", sU[u][f]); //printf("sV: %f\n", sV[m][f]); //fflush(stdout); ycontrib[f] += e2 * sVm * yfactor; //printf("ycont: %f\n", ycontrib[f]); //fflush(stdout); } } // Train Ys over all known movies for user for(j=0;j<dall;j++) { int m=userent[base0+j]&USER_MOVIEMASK; for (f=0; f<NFEATURES; f++) { double sYm = sY[m][f]; sY[m][f] += Gamma2 * (ycontrib[f] - L7 * sYm); //printf("before sY: %f\tycon: %f\tG2*ycon: %f\treg: %f\n", sY[m][f], ycontrib[f], (G2_Y*ycontrib[f]), G2_Y*L7_Y*sYm); //printf("after sY: %f\tycon: %f\tG2*ycon: %f\treg: %f\n", sY[m][f], ycontrib[f], (G2_Y*ycontrib[f]), G2_Y*L7_Y*sYm); //printf("sY: %f\tycon: %f\tG2*ycon: %f\n", sY[m][f], ycontrib[f], (G2*ycontrib[f])); //fflush(stdout); } } } // Report rmse for main loop nrmse=0.; int ntrain=0; int elcnt=0; for(u=0;u<NUSERS;u++) { int base0=useridx[u][0]; int d0=UNTRAIN(u); int j; // Setup the Ys again double sumY[NFEATURES]; ZERO(sumY); double lNuSY[NFEATURES]; ZERO(lNuSY); int dall=UNALL(u); double NuS = 1.0/sqrt(dall); for(j=0;j<d0;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) sumY[f]+=sY[mm][f]; } for(j=0;j<d0;j++) { int mm=userent[base0+j]&USER_MOVIEMASK; for(f=0;f<NFEATURES;f++) lNuSY[f] = NuS * sumY[f]; } for(j=0;j<d0;j++) { int m=userent[base0+j]&USER_MOVIEMASK; double ee = err[base0+j]; double e2 = ee; for (f=0; f<NFEATURES; f++) { e2 -= (bU[u][f] + bV[m][f]); e2 -= ( (sU[u][f] + lNuSY[f]) * sV[m][f]); } if( elcnt++ == 5000 ) { printf("0 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[0], sV[m][0], sU[u][0], bU[u][0], bV[m][0], sY[m][0],u, m); printf("1 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[1], sV[m][1], sU[u][1], bU[u][1], bV[m][1], sY[m][1],u, m); printf("2 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[2], sV[m][2], sU[u][2], bU[u][2], bV[m][2], sY[m][2],u, m); printf("3 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[3], sV[m][3], sU[u][3], bU[u][3], bV[m][3], sY[m][3],u, m); fflush(stdout); } /* if( e > 5.0 || e < -5.0 ) { printf("bad EE: %f\tU: %d\tM: %d\tNuSY: %f\te: %f\t sV: %f\tsU: %f\tbU: %f\tbV: %f\n", ee, u, m, NuSY, e, new_sV[m], new_sU[u], bUu, bVm); fflush(stdout); } */ nrmse+=e2*e2; } ntrain+=d0; } nrmse=sqrt(nrmse/ntrain); double prmse = rmseprobe(); lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC); //rmse_print(0); if ( loopcount < 6 ) { Gamma2 *= 0.95; Gamma0 *= 0.95; } else if ( loopcount < 14 ) { Gamma2 *= 0.92; Gamma0 *= 0.92; } else { Gamma2 *= 0.90; Gamma0 *= 0.90; } } /* Perform a final iteration in which the errors are clipped and stored */ removeUV(); //if(save_model) { //dappend_bin(fnameV,sV,NMOVIES); //dappend_bin(fnameU,sU,NUSERS); //} return 1; }