Пример #1
0
void removeUV()
{
	int u,f;
	for(u=0;u<NUSERS;u++) {
		int base0=useridx[u][0];
		int d012=UNALL(u);
		int i;

		int dall=UNALL(u);
		double NuS = 1.0/sqrt(dall);
		double lNuSY[NFEATURES];
		double sumY[NFEATURES];
		ZERO(sumY);
		ZERO(lNuSY);
		int j;
		for(j=0;j<dall;j++) {
			int mm=userent[base0+j]&USER_MOVIEMASK;
			for(f=0;f<NFEATURES;f++)
				sumY[f]+=sY[mm][f];
		}
		int d0=UNTRAIN(u);
		for(f=0;f<NFEATURES;f++) 
			lNuSY[f] = NuS * sumY[f]; 


        double bUu=bU[u];
		for(i=0; i<d012;i++) {
			int m=userent[base0+i]&USER_MOVIEMASK;
			err[base0+i]-=(bU[u] + bV[m]);
			for (f=0; f<NFEATURES; f++)
			    err[base0+i]-=((sU[u][f] + lNuSY[f]) * sV[m][f]);
		}
	}
}
Пример #2
0
void removeUV()
{
	int u,f;
	for(u=0;u<NUSERS;u++) {
		int base0=useridx[u][0];
		int d012=UNALL(u);
		int i;

		int dall=UNALL(u);
		double NuS = 1.0/sqrt(dall);
		double lNuSY[NFEATURES];
		double sumY[NFEATURES];
		ZERO(sumY);
		ZERO(lNuSY);
		int j;
		for(j=0;j<dall;j++) {
			int mm=userent[base0+j]&USER_MOVIEMASK;
			for(f=0;f<NFEATURES;f++)
				sumY[f]+=sY[mm][f];
		}
		int d0=UNTRAIN(u);
		for(f=0;f<NFEATURES;f++) 
			lNuSY[f] = NuS * sumY[f]; 


        //double bUu=bU[u];
		//for(i=0; i<d012;i++) {
			//int m=userent[base0+i]&USER_MOVIEMASK;
			//err[base0+i]-=(bU[u] + bV[m]);
			//for (f=0; f<NFEATURES; f++)
			    //err[base0+i]-=((sU[u][f] + lNuSY[f]) * sV[m][f]);
		//}

		// For all rated movies
		for(i=0;i<d012;i++) {
			int entloc = base0+i;
			unsigned int sdloc = sdbin[entloc];
			int m=userent[entloc]&USER_MOVIEMASK;
			int day=userent[entloc]>>(USER_LMOVIEMASK+3);
			double devuhat = DEVuHat[entloc];

			// Figure out the current error
			err[entloc] -= (bU[u] + bV[m] + bVbin[m][dbin(day)] + sdbU[sdloc] + alphabU[u] * devuhat);
			for (f=0; f<NFEATURES; f++)
				err[entloc] -= (( sU[u][f] + sdsU[sdloc+f*NENTRIES] +  alphasU[u][f] * devuhat + lNuSY[f]) * sV[m][f]);
		}
	}
}
Пример #3
0
void usertimemovie()
{
	lg("User Time(Movie)\n");
	int day0[NMOVIES];
	ZERO(day0);
	// It is OK to look on all data for day0 because it is always known
	int i;
	for(i=0;i<NENTRIES;i++) {
		int m=userent[i]&USER_MOVIEMASK;
		int day=userent[i]>>(USER_LMOVIEMASK+3);
		if(!day0[m] || day0[m]>day) day0[m]=day;
	}
	
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int d0=UNTRAIN(u);
		// compute explanatory variable
		double usertime[NMOVIES];
		int j;
		for(j=0;j<d012;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;	
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			usertime[j]=DTIME(day-day0[m]);
		}
		userXX(usertime,&err[base],d0,d012,USERTIMEMOVIE_ALPHA);
	}
}
Пример #4
0
void movieXuser(double *xuser, double alpha)
{
	// Remove average but only use training data
	double avg[NMOVIES];
	int moviecount[NMOVIES];
	ZERO(avg);
	ZERO(moviecount);
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d0=UNTRAIN(u);
		int j;
		double xu=xuser[u];
		for(j=0;j<d0;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			avg[m]+=xu;
			moviecount[m]++;
		}
	}
	int m;
	for(m=0;m<NMOVIES;m++) avg[m]/=moviecount[m];

	// compute unbiased estimator
	double theta[NMOVIES];
	double var[NMOVIES];
	ZERO(theta);
	ZERO(var);
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d0=UNTRAIN(u);
		int j;
		double xu=xuser[u];
		for(j=0;j<d0;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			// compute explanatory variable
			double x=xu-avg[m];
			theta[m]+=err[base+j]*x;
			var[m]+=x*x;
		}
	}
	for(m=0; m<NMOVIES; m++)
		theta[m]=(theta[m]/(var[m]+1.e-20))*moviecount[m]/(moviecount[m]+alpha);
	
	//predict
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int j;
		double xu=xuser[u];
		for(j=0;j<d012;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			double x=xu-avg[m];
			err[base+j]-=theta[m]*x;
		}
	}
}
Пример #5
0
void removeUV() {
	int u,f;
	for(u=0;u<NUSERS;u++) {
		int base0=useridx[u][0];
		int d012=UNALL(u);
		int i;
		int dall=UNALL(u);

		int j,j2;

		for(i=0; i<d012;i++) {
			int m=userent[base0+i]&USER_MOVIEMASK;

			int r=(userent[base0+i]>>USER_LMOVIEMASK)&7;
			r++;

			err[base0+i] = r - (GLOBAL_MEAN + wbU[u] + wbV[m]);
		}
	}
}
Пример #6
0
void removeUV()
{
	int u;
	for(u=0;u<NUSERS;u++) {
		int base0=useridx[u][0];
		int d012=UNALL(u);
		int i;
		for(i=0; i<d012;i++) {
			int m=userent[base0+i]&USER_MOVIEMASK;
			err[base0+i]-=sU[u]*sV[m];
		}
	}
}
Пример #7
0
void removeUV()
{
    computeU();
    int u;
    for(u=0; u<NUSERS; u++) {
        int base0=useridx[u][0];
        unsigned int *ent=&userent[base0];
        int d012=UNALL(u);
        int i;
        double sUu=sU[u];
        for(i=0; i<d012; i++) {
            err[base0+i]-=sUu*sV[ent[i]&USER_MOVIEMASK];
        }
    }
}
Пример #8
0
double rmseprobe()
{
	int k=2;
    int u, f;
    int n=0;
    double s=0.;
    int i;

	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int base0=useridx[u][0];

		int dall=UNALL(u);
		double NuS = 1.0/sqrt(dall);
		double lNuSY[NFEATURES];
		double sumY[NFEATURES];
		ZERO(sumY);
		ZERO(lNuSY);
		int j;
		for(j=0;j<dall;j++) {
			int mm=userent[base0+j]&USER_MOVIEMASK;
			for(f=0;f<NFEATURES;f++)
				sumY[f]+=sY[mm][f];
		}
		int d0=UNTRAIN(u);
		for(j=0;j<d0;j++) {
			int mm=userent[base0+j]&USER_MOVIEMASK;
			for(f=0;f<NFEATURES;f++) 
				lNuSY[f] = NuS * sumY[f]; 
		}

		for(i=1;i<k;i++) base+=useridx[u][i];
		int d=useridx[u][k];
		//s+=fvsqr(&err[base],d);

		for(i=0; i<d;i++) {
			int m=userent[base+i]&USER_MOVIEMASK;
			double e=err[base+i];
			for (f=0; f<NFEATURES; f++) {
			    e-=(bU[u][f] + bV[m][f]);
			    e-=((sU[u][f] + lNuSY[f]) * sV[m][f]);
			}
			s+=e*e;
		}
		n+=d;
	}
    return sqrt(s/n);
}
Пример #9
0
void userXmovie(double *xmovie, double alpha)
{
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int d0=UNTRAIN(u);
		// compute avg explanatory variable
		double x[NMOVIES];
		int j;
		for(j=0;j<d012;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;	
			x[j]=xmovie[m];
		}
		
		userXX(x,&err[base],d0,d012,alpha);
	}
}
Пример #10
0
void usertime()
{
	lg("User Time\n");
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int d0=UNTRAIN(u);
		// It is OK to look on all data for day0 because it is always known
		int day0=uivmin(&userent[base],UNTOTAL(u))>>(USER_LMOVIEMASK+3);
		// compute explanatory variable
		double usertime[NMOVIES];
		int j;
		for(j=0;j<d012;j++) {
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			usertime[j]=DTIME(day-day0);
		}
		
		userXX(usertime,&err[base],d0,d012,USERTIME_ALPHA);
	}
}
Пример #11
0
void uavg()
{
	lg("User avg centering\n");
	ZERO(useravg);
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d0=UNTRAIN(u);
		int i;
		for(i=0; i<d0;i++)
			useravg[u]+=err[base+i];
		useravg[u]/=d0+USERAVG_ALPHA;
	}
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int i;
		for(i=0; i<d012;i++) {
			err[base++]-=useravg[u];
		}
	}
}
Пример #12
0
void score_setup()
{
	int i,u;
    //weight_time_setup();
	if(load_model) {
		fpV=fopen(fnameV,"rb");
		fpU=fopen(fnameU,"rb");
		if(fpV || fpU) {
			lg("Loading %s and %s\n",fnameV,fnameU);
			if(!fpV || !fpU)
				error("Cant open both files");
		}
	}

    int day0[NMOVIES];
    ZERO(day0);
    // It is OK to look on all data for day0 because it is always known
    for(i=0;i<NENTRIES;i++) {
        int m=userent[i]&USER_MOVIEMASK;
        int day=userent[i]>>(USER_LMOVIEMASK+3);
        if(!day0[m] || day0[m]>day) day0[m]=day;
    }
    DEVuHat = (float *) malloc(NENTRIES*sizeof(float));
    sdbU    = (float *) malloc(NENTRIES*sizeof(float));
    sdsU    = (float *) malloc(((unsigned int)NENTRIES)*((unsigned int)NFEATURES)*sizeof(float));
	memset(DEVuHat,0,NENTRIES*sizeof(float));
	memset(sdbU,0,NENTRIES*sizeof(float));
	memset(sdsU,0,((unsigned int)NENTRIES)*((unsigned int)NFEATURES)*sizeof(float));
	ZERO(sdbin);

	int tcount[100000];
	ZERO(tcount);
	ZERO(avgdate);
	ZERO(avgdevu);
	ZERO(ucnt);
	int j;
    for(u=0;u<NUSERS;u++) {
        int base=useridx[u][0];
        int d012=UNALL(u);
        int d0=UNTRAIN(u);
        // compute explanatory variable
        for(j=0;j<d012;j++) {
            int m=userent[base+j]&USER_MOVIEMASK;
            int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			if ( day < minday ) 
				minday = day;
			if ( day > maxday ) 
				maxday = day;
            //usertime[j]=DTIME(day-day0[m]);
			if ( day < 0 )
				;//toosmall++;
			else if ( day < 100000-1 )
			    tcount[day]++;
			else 
				;//toobig++;
			ucnt[u]++;
			avgdate[u] += day;
        }
    }
	printf("minday: %d, maxday: %d\n", minday, maxday);
	fflush(stdout);
    for(u=0;u<NUSERS;u++) {
        // 1) Find the average date of rating for every customer. In this step I include the probe dates also in the calculation of the average.
		avgdate[u] /= ucnt[u];
	    //printf("U: %d, avgdate: %f, ucnt: %d\n", u, avgdate[u], ucnt[u]);
		//fflush(stdout);
	}
    for(u=0;u<NUSERS;u++) {
        int base=useridx[u][0];
        int d012=UNALL(u);
        int d0=UNTRAIN(u);
        int j;
        for(j=0;j<d012;j++) {
            int m=userent[base+j]&USER_MOVIEMASK;
            int day=userent[base+j]>>(USER_LMOVIEMASK+3);
  			//2) For every rating [i] in the data set (including probe) I calculate DEVu[i]:
      		//   DEVu[i] = sign(t[i] - t_mean_for_customer) * powf(abs(t[i] - t_mean_for_customer), 0.4);
			double DEVu = sign(day - avgdate[u]) * powf(abs(day - avgdate[u]), 0.4);
			avgdevu[u] += DEVu;
	        //printf("U: %d, M: %d, day: %d, uavg: %f, DEVu: %f\n", u, m, day, avgdate[u], DEVu);
		    //fflush(stdout);

        }
    }
    for(u=0;u<NUSERS;u++) {
        //3) Find the average DEVu[i] for every customer. His/hers probe DEVu[i] values are also included.
		avgdevu[u] /= ucnt[u];
	    //printf("U: %d, avgdevu: %f, avgdate: %f, ucnt: %d\n", u, avgdevu[u], avgdate[u], ucnt[u]);
		//fflush(stdout);
	}
	ZERO(maxDEVuHat);
    for(u=0;u<NUSERS;u++) {
        int base=useridx[u][0];
        int d012=UNALL(u);
        int d0=UNTRAIN(u);
        int j;
        for(j=0;j<d012;j++) {
            int m=userent[base+j]&USER_MOVIEMASK;
            int day=userent[base+j]>>(USER_LMOVIEMASK+3);

  			//  2) For every rating [i] in the data set (including probe) I calculate DEVu[i]:
      		//     DEVu[i] = sign(t[i] - t_mean_for_customer) * powf(abs(t[i] - t_mean_for_customer), 0.4);
			double DEVu = sign(day - avgdate[u]) * powf(abs(day - avgdate[u]), 0.4);

            //  4) Subtract every customer's average DEVu_avg value from every time deviation:
            //     DEVu_hat[i] = DEVu[i] - DEVu_avg_for_customer;
            double DEVuHat = DEVu - avgdevu[u];

	        //printf("U: %d, M: %d, ndevu: %f, day: %d, uavg: %f, DEVu: %f\n", u, m, DEVuHat, day, avgdate[u], DEVu);
		    //fflush(stdout);

	    	// Get the max absolute value of a user's devu_hat values...maxDevu_hat...
			double tDEVu = fabs(DEVuHat);
			if ( tDEVu > maxDEVuHat[u] )
				maxDEVuHat[u] = tDEVu;
        }
    }
	
	// Compute and store DEVuHats and create single day bin numbering per user
	int daysBinValue[maxday+1];
    for(u=0;u<NUSERS;u++) {
        int base=useridx[u][0];
        int d012=UNALL(u);
        int d0=UNTRAIN(u);
        int j;
		ZERO(daysBinValue);
		int dcount=0;
        for(j=0;j<d012;j++) {
            int m=userent[base+j]&USER_MOVIEMASK;
            int day=userent[base+j]>>(USER_LMOVIEMASK+3);

            DEVuHat[base+j] = devuHat(day,u);

			if ( daysBinValue[day] == 0 ) {
				sdbin[base+j] = base+j;
			    daysBinValue[day] = base+j;
				if ( daysBinValue[day] > NENTRIES ) {
					printf("Days bin v: %d\n", daysBinValue[day]);
					fflush(stdout);
				}
				dcount++;
			} else {
				if ( daysBinValue[day] > NENTRIES ) {
					printf("Days bin v: %d\n", daysBinValue[day]);
					fflush(stdout);
				}
				sdbin[base+j] = daysBinValue[day];
			}
        }
    }

	//for (i=minday; i < maxday; i++ ) {
	    //printf("day: %d, count: %d\n", i, tcount[i]);
		//fflush(stdout);
	//}

}
Пример #13
0
int doAllFeatures()
{
	/* Initial biases */
	{
		int u,m;
		
		for(u=0;u<NUSERS;u++) {
			bU[u]=0.0;
		}
		for(m=0;m<NMOVIES;m++) {
			bV[m]=0.0;
		}
	}
	
	
	/* Initial estimation for current feature */
	{
		int u,m,f;
		
		double uvInit = sqrt(GLOBAL_MEAN/NFEATURES);
		for(u=0;u<NUSERS;u++) {
			for(f=0;f<NFEATURES;f++) {
			    sU[u][f]= uvInit * (rand()%14000 + 2000) * 0.000001235f;
			}
		}
		for(m=0;m<NMOVIES;m++) {
			for(f=0;f<NFEATURES;f++) {
			    sV[m][f]= uvInit * (rand()%14000 + 2000) * -0.000001235f;
			    sY[m][f]=0.0;
			}
		}
	}
	
	/* Optimize current feature */
	double nrmse=2., last_rmse=10.;
	double prmse = 0, last_prmse=0;
	double thr=sqrt(1.-E);
	int loopcount=0;
	double Gamma1 = G1;
	double Gamma2 = G2;
	while( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 15) && loopcount < 40  )  {
		last_rmse=nrmse;
		last_prmse=prmse;
		clock_t t0=clock();
		loopcount++;

		double aErrAvg=0;
		double astepSuAvg=0;
		double astepSvAvg=0;
		double astepSyAvg=0;
		double abU=0, abV=0, asU=0, asV=0, asY=0;
		int n1=0, n2=0, n3=0;
		

		int u,m, f;
		for(u=0;u<NUSERS;u++) {

			// Calculate sumY and NuSY for each factor
			double sumY[NFEATURES];
			ZERO(sumY);
			double lNuSY[NFEATURES];
			ZERO(lNuSY);
			int base0=useridx[u][0];
			int d0=UNTRAIN(u);
			int j;
			int f;
			int dall=UNALL(u);
			double NuS = 1.0/sqrt(dall);
			for(j=0;j<dall;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++)
					sumY[f]+=sY[mm][f];
			}
			for(f=0;f<NFEATURES;f++) {
				lNuSY[f] = NuS * sumY[f]; 
			}

			double ycontrib[NFEATURES];
			ZERO(ycontrib);

			// For all rated movies
			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;

				// Figure out the current error
				double ee=err[base0+j];
				double e2 = ee;
				e2 -= (bU[u] + bV[m]);
				for (f=0; f<NFEATURES; f++)
					e2 -= ((sU[u][f]+lNuSY[f])*sV[m][f]);
				//int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
				//r++;
				//double rui = r - e2;
				//if ( rui > 5.00 )
					//e2 += (rui-5.0);
				//else if (rui < 1.0)
					//e2 -= (1.0 - rui);

				// Train the biases
				double bUu = bU[u];
				double bVm = bV[m];
				//bU[u] += Gamma1 * (e2 - bUu * L6);
				//bV[m] += Gamma1 * (e2 - bVm * L6);
				bU[u] += Gamma1 * (e2 - bUu * LbU);
				bV[m] += Gamma1 * (e2 - bVm * LbV);

		aErrAvg+=fabs(e2);
		abU += fabs(bU[u]);
		abV += fabs(bV[m]);
		n1++;

				// update U V and slope component of Y
				double yfactor = NuS;
				for (f=0; f<NFEATURES; f++) {
					double sUu = sU[u][f];
					double sVm = sV[m][f];

					//sU[u][f] += ((Gamma2) * ((e2 * sVm) - L7 * sUu));
					//sV[m][f] += ((Gamma2) * ((e2 * (sUu + lNuSY[f])) - L7 * sVm));
					sU[u][f] += ((Gamma2) * ((e2 * sVm) - LsU * sUu));
					sV[m][f] += ((Gamma2) * ((e2 * (sUu + lNuSY[f])) - LsV * sVm));
		asU += fabs(sU[u][f]);
		asV += fabs(sV[m][f]);
		astepSuAvg+=fabs(e2 * sV[m][f]);
		astepSvAvg+=fabs(e2 * sU[u][f]);
		n2++;


					ycontrib[f] += e2 * sVm * yfactor;
				}
			}

			// Train Ys over all known movies for user
			for(j=0;j<dall;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				for (f=0; f<NFEATURES; f++) {
					double sYm = sY[m][f];
					sY[m][f] += Gamma2 * (ycontrib[f] - LsY * sYm);
		asY += fabs(sY[m][f]);
		astepSyAvg+=fabs(ycontrib[f]);
		n3++;
				}
			}
		}

		aErrAvg/=n1;
		astepSuAvg/=n2;
		astepSvAvg/=n2;
		astepSyAvg/=n3;
		abU/=n1, abV/=n1, asU/=n2, asV/=n2, asY/=n2;
		double bUREG = 1.9074 / 100.0 * aErrAvg / abU;
		double bVREG = 1.9074 / 100.0 * aErrAvg / abV;
		double sUREG = 1.9074 / 100.0 * astepSuAvg / asU;
		double sVREG = 1.9074 / 100.0 * astepSvAvg / asV;
		double sYREG = 1.9074 / 100.0 * astepSyAvg / asY;
		printf("NREG - bU: %f bV: %f, sU: %f, sV: %f, sY: %f\n", bUREG, bVREG, sUREG, sVREG, sYREG);

		// Report rmse for main loop
		nrmse=0.;
		int ntrain=0;
		int elcnt=0;
		int k=2;
		int n=0;
		double s=0.;
		for(u=0;u<NUSERS;u++) {
			int base0=useridx[u][0];
			int d0=UNTRAIN(u);
			int j;

			// Setup the Ys again
			double sumY[NFEATURES];
			ZERO(sumY);
			double lNuSY[NFEATURES];
			ZERO(lNuSY);
			int dall=UNALL(u);
			double NuS = 1.0/sqrt(dall);
			for(j=0;j<dall;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++)
					sumY[f]+=sY[mm][f];
			}
			for(f=0;f<NFEATURES;f++) 
				lNuSY[f] = NuS * sumY[f]; 

			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				double ee = err[base0+j];
				double e2 = ee;
				e2 -= (bU[u] + bV[m]);
				for (f=0; f<NFEATURES; f++)
					e2 -= ( (sU[u][f] + lNuSY[f]) * sV[m][f]);
				//int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
				//r++;
				//double rui = r - e2;
				//if ( rui > 5.00 )
					//e2 += (rui-5.0);
				//else if (rui < 1.0)
					//e2 -= (1.0 - rui);

if( elcnt++ == 5000 ) {
    printf("0 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[0], sV[m][0], sU[u][0], bU[u], bV[m], sY[m][0],u, m);
    printf("1 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[1], sV[m][1], sU[u][1], bU[u], bV[m], sY[m][1],u, m);
    printf("2 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[2], sV[m][2], sU[u][2], bU[u], bV[m], sY[m][2],u, m);
    printf("3 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[3], sV[m][3], sU[u][3], bU[u], bV[m], sY[m][3],u, m);
	fflush(stdout);
}

				nrmse+=e2*e2;
			}
			ntrain+=d0;

			// Sum up probe rmse
			int i;
			int base=useridx[u][0];
			for(i=1;i<k;i++) base+=useridx[u][i];
			int d=useridx[u][k];
			for(i=0; i<d;i++) {
				int m=userent[base+i]&USER_MOVIEMASK;
				double e=err[base+i];
				e-=(bU[u] + bV[m]);
				for (f=0; f<NFEATURES; f++)
					e-=((sU[u][f] + lNuSY[f]) * sV[m][f]);
				//int r=(userent[base+i]>>USER_LMOVIEMASK)&7;
				//r++;
				//double rui = r - e;
				//if ( rui > 5.00 )
					//e += (rui-5.0);
				//else if (rui < 1.0)
					//e -= (1.0 - rui);
				s+=e*e;
			}
			n+=d;
		}
		nrmse=sqrt(nrmse/ntrain);
		prmse = sqrt(s/n);
		
		lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);
		Gamma1 *= 0.90;
		Gamma2 *= 0.90;
	}
	
	/* Perform a final iteration in which the errors are clipped and stored */
	removeUV();
	
	//if(save_model) {
		//dappend_bin(fnameV,sV,NMOVIES);
		//dappend_bin(fnameU,sU,NUSERS);
	//}
	
	return 1;
}
Пример #14
0
int doAllFeatures()
{
int cloop=0;
	/* Initial weight factors */
	int i, j, h, c, r;
    for (i=0; i < NMOVIES; i++) {
	    for (r=0; r<5; r++) {
	        for (c=0; c < NFACTORS; c++) {
				Aic[i][r][c] = 0.02 * randn() - 0.01; // Normal Distribution
			}
		}
	}
    for (c=0; c < NFACTORS; c++) {
	    for (j=0; j < TOTAL_FEATURES; j++) {
			//vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution
			//vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution
			//vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution
			//vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution
			//vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution
			Bcj[c][j] = 0.2/3.0 * randn() - 0.1/3.0; // Normal Distribution
	    }

	}

	/* Initial biases */
	for(i=0;i<TOTAL_FEATURES;i++) {
		hidbiases[i]=0.0;
	}
    for (j=0; j<NMOVIES; j++) {
		unsigned int mtot = moviercount[j*5+0] + moviercount[j*5+1] + moviercount[j*5+2] + moviercount[j*5+3] + moviercount[j*5+4];
	    for (i=0; i<5; i++) {
		    visbiases[j][i] = log( ((double)moviercount[j*5+i]) / ((double) mtot) );
//printf("mrc: %d, mc %d, log:%f frac: %f\n", moviercount[j*5+i], moviecount[j] , log( moviercount[j*5+i] /(double) moviecount[j]), 
//(moviercount[j*5+i] /(double) moviecount[j]) );
		}
	}

	
	/* Optimize current feature */
	double nrmse=2., last_rmse=10.;
	double prmse = 0, last_prmse=0;
	double s;
	//double s2;
	int n;
	int loopcount=0;
	double EpsilonW  = epsilonw;
	double EpsilonVB = epsilonvb;
	double EpsilonHB = epsilonhb;
	double Momentum  = momentum;
	ZERO(Ainc);
	ZERO(Binc);
	ZERO(visbiasinc);
	ZERO(hidbiasinc);
	int tSteps = 1;

	//while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80  )  {
	while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80  )  {

		//if ( loopcount >= 10 )
			//tSteps = 1 + loopcount / 5;

		last_rmse=nrmse;
		last_prmse=prmse;
		clock_t t0=clock();
		loopcount++;
		int ntrain = 0;
		nrmse = 0.0;
		s  = 0.0;
		//s2 = 0.0;
		n = 0;

		if ( loopcount > 5 )
			Momentum = finalmomentum;


		//* CDpos =0, CDneg=0 (matrices)
		ZERO(Apos);
		ZERO(Aneg);
		ZERO(Bpos);
		ZERO(Bneg);
		ZERO(poshidact);
		ZERO(neghidact);
		ZERO(posvisact);
		ZERO(negvisact);
		ZERO(moviecount);

		int u,m, f;
		for(u=0;u<NUSERS;u++) {

			//* CDpos =0, CDneg=0 (matrices)
			ZERO(negvisprobs);
			ZERO(nvp2);

		    //* perform steps 1 to 8

			int base0=useridx[u][0];
			int d0=UNTRAIN(u);
			int dall=UNALL(u);

			// For all rated movies, accumulate contributions to hidden units
			double sumW[TOTAL_FEATURES];
			ZERO(sumW);
			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				moviecount[m]++;

   				// 1. get one data point from data set.
   				// 2. use values of this data point to set state of visible neurons Si
				int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;

				// Add to the bias contribution for set visible units
				posvisact[m][r] += 1.0;
 
				// for all hidden units h:
				for(h=0;h<TOTAL_FEATURES;h++) {
        			// sum_j(W[i][j] * v[0][j]))
			    	//sumW[h]  += vishid[m][r][h];
			    	sumW[h]  += Wij(m,r,h);
				}
			}

			// Sample the hidden units state after computing probabilities
			for(h=0;h<TOTAL_FEATURES;h++) {

   			    // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si
			    // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases);
				// compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j]))
				poshidprobs[h]  = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h]));

				// sample h[0][i] from Q(h[0][i] = 1 | v[0])
				if  ( poshidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
					poshidstates[h]=1;
					poshidact[h] += 1.0;
				} else {
					poshidstates[h]=0;
				}
				//poshidact[h] += poshidprobs[h];
			}

			// Load up a copy of poshidstates for use in loop
			for ( h=0; h < TOTAL_FEATURES; h++ ) 
				curposhidstates[h] = poshidstates[h];

			// Make T Contrastive Divergence steps
			int stepT = 0;
			do {
				// Determine if this is the last pass through this loop
				int finalTStep = (stepT+1 >= tSteps);
				
				// 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction
				// for all visible units j:
				int r;
				int count = d0;
				count += useridx[u][1];  // too compute probe errors
				for(j=0;j<count;j++) {
					int m=userent[base0+j]&USER_MOVIEMASK;
					for(h=0;h<TOTAL_FEATURES;h++) {
						if ( curposhidstates[h] == 1 ) {
							for(r=0;r<5;r++) {
								//negvisprobs[m][r]  += vishid[m][r][h];
								negvisprobs[m][r]  += Wij(m,r,h);
							}
						}
						//for(r=0;r<5;r++) 
							//negvisprobs[m][r]  += poshidprobs[h] * vishid[m][r][h];
						if ( loopcount >= 10 ) {
							for(r=0;r<5;r++) 
								//nvp2[m][r] += poshidprobs[h] * vishid[m][r][h];
								nvp2[m][r] += poshidprobs[h] * Wij(m,r,h);
						}
					}

					// compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i]))
					negvisprobs[m][0]  = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0]));
					negvisprobs[m][1]  = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1]));
					negvisprobs[m][2]  = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2]));
					negvisprobs[m][3]  = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3]));
					negvisprobs[m][4]  = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4]));

					// Normalize probabilities
					double tsum  = 
					  negvisprobs[m][0] +
					  negvisprobs[m][1] +
					  negvisprobs[m][2] +
					  negvisprobs[m][3] +
					  negvisprobs[m][4];
					if ( tsum != 0 ) {
						negvisprobs[m][0]  /= tsum;
						negvisprobs[m][1]  /= tsum;
						negvisprobs[m][2]  /= tsum;
						negvisprobs[m][3]  /= tsum;
						negvisprobs[m][4]  /= tsum;
					}
					if ( loopcount >= 10 ) {
						nvp2[m][0]  = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0]));
						nvp2[m][1]  = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1]));
						nvp2[m][2]  = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2]));
						nvp2[m][3]  = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3]));
						nvp2[m][4]  = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4]));
						double tsum2  = 
						  nvp2[m][0] +
						  nvp2[m][1] +
						  nvp2[m][2] +
						  nvp2[m][3] +
						  nvp2[m][4];
						if ( tsum2 != 0 ) {
							nvp2[m][0]  /= tsum2;
							nvp2[m][1]  /= tsum2;
							nvp2[m][2]  /= tsum2;
							nvp2[m][3]  /= tsum2;
							nvp2[m][4]  /= tsum2;
						}
					}

					// sample v[1][j] from P(v[1][j] = 1 | h[0])
					double randval = (rand()/(double)(RAND_MAX));
					if ( (randval -= negvisprobs[m][0]) <= 0.0 )
						negvissoftmax[m] = 0;
					else if ( (randval -= negvisprobs[m][1]) <= 0.0 )
						negvissoftmax[m] = 1;
					else if ( (randval -= negvisprobs[m][2]) <= 0.0 )
						negvissoftmax[m] = 2;
					else if ( (randval -= negvisprobs[m][3]) <= 0.0 )
						negvissoftmax[m] = 3;
					else //if ( (randval -= negvisprobs[m][4]) <= 0.0 )
						negvissoftmax[m] = 4;
					//negvisact[m*5+0] += negvisprobs[m*5+0];
					//negvisact[m*5+1] += negvisprobs[m*5+1];
					//negvisact[m*5+2] += negvisprobs[m*5+2];
					//negvisact[m*5+3] += negvisprobs[m*5+3];
					//negvisact[m*5+4] += negvisprobs[m*5+4];

					// if in training data then train on it
					if ( j < d0 && finalTStep )  
						negvisact[m][negvissoftmax[m]] += 1.0;
				}


				// 6. compute state of hidden neurons Sj again using Si from 5 step.
				// For all rated movies accumulate contributions to hidden units from sampled visible units
				ZERO(sumW);
				for(j=0;j<d0;j++) {
					int m=userent[base0+j]&USER_MOVIEMASK;
	 
					// for all hidden units h:
					for(h=0;h<TOTAL_FEATURES;h++) {
						//sumW[h]  += vishid[m][negvissoftmax[m]][h];
						sumW[h]  += Wij(m,negvissoftmax[m],h);
						//sumW[h]  += vishid[m][0][h] * negvisprobs[m*5+0];
						//sumW[h]  += vishid[m][1][h] * negvisprobs[m*5+1];
						//sumW[h]  += vishid[m][2][h] * negvisprobs[m*5+2];
						//sumW[h]  += vishid[m][3][h] * negvisprobs[m*5+3];
						//sumW[h]  += vishid[m][4][h] * negvisprobs[m*5+4];
					}
				}
				// for all hidden units h:
				for(h=0;h<TOTAL_FEATURES;h++) {
					// compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j]))
					neghidprobs[h]  = 1./(1 + exp(-sumW[h] - hidbiases[h]));

					// Experimentally sample the hidden units state again TODO: What is best?
					if  ( neghidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
						neghidstates[h]=1;
						if ( finalTStep )
							neghidact[h] += 1.0;
					} else {
						neghidstates[h]=0;
					}
					//if ( finalTStep )
						//neghidact[h] += neghidprobs[h];
				}

				// Compute error rmse and prmse before we start iterating on T
				if ( stepT == 0 ) {

					// Compute rmse on training data
					for(j=0;j<d0;j++) {
						int m=userent[base0+j]&USER_MOVIEMASK;
						int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
		 
						//# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
						if ( loopcount < 10 ) {
						    double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4];
						    double vdelta = (((double)r)-expectedV);
						    nrmse += (vdelta * vdelta);
						} else {
						    double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
						    double vdelta = (((double)r)-expectedV);
						    nrmse += (vdelta * vdelta);
						}
					}
					ntrain+=d0;

					// Sum up probe rmse
					int base=useridx[u][0];
					for(i=1;i<2;i++) base+=useridx[u][i];
					int d=useridx[u][2];
					for(i=0; i<d;i++) {
						int m=userent[base+i]&USER_MOVIEMASK;
						int r=(userent[base+i]>>USER_LMOVIEMASK)&7;
						//# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
						if ( loopcount < 10 ) {
							double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4];
							double vdelta = (((double)r)-expectedV);
							s+=vdelta*vdelta;
						} else {
							double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
							double vdelta = (((double)r)-expectedV);
							s+=vdelta*vdelta;
						}
					}
					n+=d;
				}

				// If looping again, load the curposvisstates
				if ( !finalTStep ) {
					for ( h=0; h < TOTAL_FEATURES; h++ ) 
						curposhidstates[h] = neghidstates[h];
					ZERO(negvisprobs);
				}

			  // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can 
			  //    increase with learning steps to achieve better accuracy.

			} while ( ++stepT < tSteps );



			// Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T
			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
 
				// for all hidden units h:
				for(h=0;h<TOTAL_FEATURES;h++) {
					if ( poshidstates[h] == 1 ) {
						// 4. now Si and Sj values can be used to compute (Si.Sj)0  here () means just values not average
						//* accumulate CDpos = CDpos + (Si.Sj)0
						//CDpos[m][r][h] += 1.0;
						for (c=0; c < NFACTORS; c++) {
							Apos[m][r][c] += Bcj[c][h];
							Bpos[c][h]    += Aic[m][r][c];
						}
					}
					//CDpos[m][r][h] += poshidprobs[h];

					// 7. now use Si and Sj to compute (Si.Sj)1 (fig.3)
					//TODO - This is experimental!!!!!!!
					//CDneg[m][negvissoftmax[m]][h] += neghidprobs[h];
					//CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h];
					if ( neghidstates[h] == 1 ) {
						for (c=0; c < NFACTORS; c++) {
							Aneg[m][negvissoftmax[m]][c] += Bcj[c][h];
							Bneg[c][h]                   += Aic[m][negvissoftmax[m]][c];
						}
					}
				}
			}


			// Update weights and biases after batch
			//
			//int bsize = 1000;
			int bsize = 100;
			if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) {
				int numcases = u % bsize;
				numcases++;
cloop++;
				//if ( numcases != bsize ) printf("u: %d, numcases: %d\n", u, numcases);

				// Update A factors
				for(m=0;m < NMOVIES;m++) {
					if ( moviecount[m] == 0 ) continue;

					// for all c factors
					for(c=0;c < NFACTORS; c++) {
						// for all softmax
						int rr;
						for(rr=0;rr<5;rr++) {
							//# At the end compute average of CDpos and CDneg by dividing them by number of data points.
							//# Compute CD = < Si.Sj >0  < Si.Sj >n = CDpos  CDneg
							double Ap = Apos[m][rr][c];
							double An = Aneg[m][rr][c];
							if ( Ap != 0.0 || An != 0.0 ) {
								Ap /= ((double)moviecount[m]);
								An /= ((double)moviecount[m]);

								// W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]')
								//# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0)
								//e.g between data and reconstruction.
//double preW = vishid[m][rr][h];
								Ainc[m][rr][c] = Momentum * Ainc[m][rr][c] + EpsilonW * ((Ap - An) - weightcost * Aic[m][rr][c]);
								Aic[m][rr][c] += Ainc[m][rr][c];
//if ( cloop % 50 == 0 && c == 7 )
//printf("Aic: %f\t m: %d\t r: %d\t c: %d\n", Aic[m][rr][c], m, rr, c);
//printf("W: %f preW: %f, CDp: %f, CDn: %f, m: %d, r: %d, h: %d, nhp: %f, nvp: %f\n", vishid[m][rr][h], preW, CDp, CDn, m, rr, h,
//neghidprobs[h],negvisprobs[m*5+rr]
//);
							} 
						}
					}

					// Update visible softmax biases
					// c += epsilon * (v[0] - v[1])$
					// for all softmax
					int rr;
					for(rr=0;rr<5;rr++) {
						if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) {
							posvisact[m][rr] /= ((double)moviecount[m]);
							negvisact[m][rr] /= ((double)moviecount[m]);
							visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]));
							//visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]);
							visbiases[m][rr]  += visbiasinc[m][rr];
//printf("vb: %f, pa: %f, na: %f\n", visbiases[(m*5+rr)], posvisact[(m*5+rr)], negvisact[(m*5+rr)]);
						}
					}
				}

				// Update B factors
				for(c=0;c<NFACTORS;c++) {

					// for all hidden units h:
					for(h=0;h<TOTAL_FEATURES;h++) {

						//# At the end compute average of CDpos and CDneg by dividing them by number of data points.
						//# Compute CD = < Si.Sj >0  < Si.Sj >n = CDpos  CDneg
						double Bp = Bpos[c][h];
						double Bn = Bneg[c][h];
						if ( Bp != 0.0 || Bn != 0.0 ) {
							Bp /= ((double)numcases);
							Bn /= ((double)numcases);

							// W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]')
							//# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0)
							//e.g between data and reconstruction.
//double preW = vishid[m][rr][h];
							Binc[c][h] = Momentum * Binc[c][h] + EpsilonW * ((Bp - Bn) - weightcost * Bcj[c][h]);
							Bcj[c][h] += Binc[c][h];
//if ( cloop % 50 == 0 && h == 7 )
//printf("Bcj: %f\t c: %d\t h: %d\n", Bcj[c][h], c, h);
//printf("W: %f preW: %f, CDp: %f, CDn: %f, m: %d, r: %d, h: %d, nhp: %f, nvp: %f\n", vishid[m][rr][h], preW, CDp, CDn, m, rr, h,
//neghidprobs[h],negvisprobs[m*5+rr]
//);
						} 
					}
				}

				// Update hidden biases
				// b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1]))
				for(h=0;h<TOTAL_FEATURES;h++) {
					if ( poshidact[h]  != 0.0 || neghidact[h]  != 0.0 ) {
						//poshidact[h]  /= ((double)(numcases*ntrain*5));
						//neghidact[h]  /= ((double)(numcases*ntrain*5));
						poshidact[h]  /= ((double)(numcases));
						neghidact[h]  /= ((double)(numcases));
						//poshidact[h]  /= ((double)(mcount));
						//neghidact[h]  /= ((double)(mcount));
						hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]));
						//hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]);
						hidbiases[h]  += hidbiasinc[h];
		//printf("hb: %f, pa: %f, na: %f, d0:%d\n", hidbiases[h], poshidact[h], neghidact[h], d0);
					}
				}

				ZERO(Apos);
				ZERO(Aneg);
				ZERO(Bpos);
				ZERO(Bneg);
				ZERO(poshidact);
				ZERO(neghidact);
				ZERO(posvisact);
				ZERO(negvisact);
				//ZERO(poscnt);
				//ZERO(negcnt);
				ZERO(moviecount);
				//mcount = 0;
			}
		}

		nrmse=sqrt(nrmse/ntrain);
		prmse = sqrt(s/n);
		//double prmse2 = sqrt(s2/n);
		
		//lg("%f\t%f\t%f\t%f\n",nrmse,prmse,prmse2,(clock()-t0)/(double)CLOCKS_PER_SEC);
		lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);
		if ( loopcount > 6 ) {
			EpsilonW  *= 0.90;
			EpsilonVB *= 0.90;
			EpsilonHB *= 0.90;
		} //else if ( loopcount > 5 ) {
			//EpsilonW  *= 0.82;
			//EpsilonVB *= 0.82;
			//EpsilonHB *= 0.82;
		//}
		//printf("dd: %d %d %d %d %d\n", dd[0], dd[1], dd[2], dd[3], dd[4]);
	}
	
	/* Perform a final iteration in which the errors are clipped and stored */
	recordErrors();
	
	//if(save_model) {
		//dappend_bin(fnameV,sV,NMOVIES);
		//dappend_bin(fnameU,sU,NUSERS);
	//}
	
	return 1;
}
Пример #15
0
void recordErrors()
{
	int u,h,f, j, i;
	for(u=0;u<NUSERS;u++) {

		//* CDpos =0, CDneg=0 (matrices)
		ZERO(negvisprobs);

		//* perform steps 1 to 8

		int base0=useridx[u][0];
		int d0=UNTRAIN(u);
		int dall=UNALL(u);

		// For all rated movies, accumulate contributions to hidden units
		double sumW[TOTAL_FEATURES];
		ZERO(sumW);
		for(j=0;j<d0;j++) {
			int m=userent[base0+j]&USER_MOVIEMASK;

			// 1. get one data point from data set.
			// 2. use values of this data point to set state of visible neurons Si
			int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;

			// for all hidden units h:
			for(h=0;h<TOTAL_FEATURES;h++) {
				// sum_j(W[i][j] * v[0][j]))
				//sumW[h]  += vishid[m][r][h];
				sumW[h]  += Wij(m,r,h);
			}
		}

		// Compute the hidden probabilities
		for(h=0;h<TOTAL_FEATURES;h++) {

			// 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si
			// compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j]))
			poshidprobs[h]  = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h]));
		}

		// 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction
		// for all visible units j:
		int r;
		int count = dall;
		for(j=0;j<count;j++) {
			int m=userent[base0+j]&USER_MOVIEMASK;
			for(h=0;h<TOTAL_FEATURES;h++) {
				for(r=0;r<5;r++) 
					//negvisprobs[m][r]  += poshidprobs[h] * vishid[m][r][h];
					negvisprobs[m][r]  += poshidprobs[h] * Wij(m,r,h);
			}

			// compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i]))
			negvisprobs[m][0]  = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0]));
			negvisprobs[m][1]  = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1]));
			negvisprobs[m][2]  = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2]));
			negvisprobs[m][3]  = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3]));
			negvisprobs[m][4]  = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4]));

			// Normalize probabilities
			double tsum  = 
			  negvisprobs[m][0] +
			  negvisprobs[m][1] +
			  negvisprobs[m][2] +
			  negvisprobs[m][3] +
			  negvisprobs[m][4];
			if ( tsum != 0 ) {
				negvisprobs[m][0]  /= tsum;
				negvisprobs[m][1]  /= tsum;
				negvisprobs[m][2]  /= tsum;
				negvisprobs[m][3]  /= tsum;
				negvisprobs[m][4]  /= tsum;
			}
		}

		// Compute and save error residuals
		for(i=0; i<dall;i++) {
			int m=userent[base0+i]&USER_MOVIEMASK;
			int r=(userent[base0+i]>>USER_LMOVIEMASK)&7;
			double expectedV = negvisprobs[m][1] + 2.0 * negvisprobs[m][2] + 3.0 * negvisprobs[m][3] + 4.0 * negvisprobs[m][4];
			double vdelta = (((double)r)-expectedV);
			err[base0+i] = vdelta;
		}
	}
}
Пример #16
0
int doAllFeatures() {

	/* Initial biases */
	{
		int u,m;
		
		for(u=0;u<NUSERS;u++) {
			wbU[u]=0.0;
		}
		for(m=0;m<NMOVIES;m++) {
			wbV[m]=0.0;
		}
	}
	
    float swbU[NUSERS]; 
    float swbV[NMOVIES];
	
	/* Optimize current feature */
	float nrmse=2., last_rmse=10.;
	float prmse = 0, last_prmse=0;
	int loopcount=0;
	float Gamma0 = G0;
	while( ( (prmse<=last_prmse) || loopcount < 6) ) {
		last_rmse=nrmse;
		last_prmse=prmse;
		nrmse=0.;
		clock_t t0=clock();
	    loopcount++;

		int u,m,j;

		// Save the prior wbU and wbV for when the RMSE gets worse
		for(u=0;u<NUSERS;u++) {
			swbU[u] = wbU[u];
		}
		for(m=0;m<NMOVIES;m++) {
			swbV[m]=wbV[m];
		}

		// Train
		for(u=0;u<NUSERS;u++) {

			//if (u%10000 == 0) {
				//printf("On user: %d\n", u);
				//fflush(stdout);
			//}

			int d0 = UNTRAIN(u);
			int base0=useridx[u][0];

			// For all rated movies
			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;

				// Figure out the current error
			    int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
			    r++;
				//float ee=err[base0+j];
				//float e2 = ee;
				float e2;
				e2 = r - (GLOBAL_MEAN + wbU[u] + wbV[m]);

				// Train the biases
				float wbUu = wbU[u];
				float wbVm = wbV[m];
				wbU[u] += Gamma0 * (e2 - wbUu * L4);
				wbV[m] += Gamma0 * (e2 - wbVm * L4);
			}
		}

		// Report rmse for main loop
		nrmse=0.;
		int ntrain=0;
		int elcnt=0;
		int k=2;
		int n=0;
		float s=0.;
		int i;
		for(u=0;u<NUSERS;u++) {

			int base0=useridx[u][0];
			int d012=UNALL(u);
			int i;
			int dall=UNALL(u);
			int d0 = UNTRAIN(u);

			for(i=0; i<d0;i++) {
				int m=userent[base0+i]&USER_MOVIEMASK;

			    int r=(userent[base0+i]>>USER_LMOVIEMASK)&7;
			    r++;
				//float ee=err[base0+j];
				float e2;
				e2 = r - (GLOBAL_MEAN + wbU[u] + wbV[m]);

				nrmse+=e2*e2;
				ntrain++;
			}


			// Attempt to compute probe RMSE
			int base=useridx[u][0];
			for(i=1;i<k;i++) base+=useridx[u][i];
			int d=useridx[u][k];
			int boffset = base-base0;
			for(i=0; i<d;i++) {
				int m=userent[base+i]&USER_MOVIEMASK;

				//float ee = err[base+i];
				float e;
			    int r=(userent[base+i]>>USER_LMOVIEMASK)&7;
			    r++;
				e = r - (GLOBAL_MEAN + wbU[u] + wbV[m]);

				s+=e*e;
			}
			n+=d;
		}

		nrmse=sqrt(nrmse/ntrain);
		prmse = sqrt(s/n);
		
		lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);

		Gamma0 *= 0.90;
	}

	// Restore the best parameters
	int u, m;
	for(u=0;u<NUSERS;u++) {
		wbU[u] = swbU[u];
	}
	for(m=0;m<NMOVIES;m++) {
		wbV[m] = swbV[m];
	}
	
	/* Perform a final iteration in which the errors are clipped and stored */
	removeUV();
	
	return 1;
}
Пример #17
0
computemix(char *fnames[], int nscores, double *xty)
{
#ifdef HOLDOUT
	lg("With holdout\n");
#endif
	int ns2=nscores+2;
	int ns1=nscores+1;
	FILE *fp[NSCORES];
	openfiles(fp,fnames,nscores);

	double xtx[NSCORES+2][NSCORES+2];
	ZERO(xtx);
	
	int u;
	for(u=0; u<NUSERS; u++) {
		PROGRESS(u,NUSERS);
		int base=useridx[u][0];
#ifdef HOLDOUT
		if(aopt) error("cant do holdout with -a");
		int d0=UNTRAIN(u);
		int d1=UNALL(u)-d0;
#else
		int d0=0;
		int d1=UNTRAIN(u);
#endif
		seekfiles(fp,nscores, d0);
		base+=d0;
		int j;			
		for(j=0;j<d1;j++) {
			unsigned int dd=userent[base+j];
			int r = (dd>>USER_LMOVIEMASK)&7;
			float s[NSCORES+2];
			readfiles(fp,s,nscores);
			int f;
			for(f=0;f<nscores;f++)		
				s[f]=r-s[f];
			s[nscores]=1.;
			s[nscores+1]=r;

			int ff;
			for(f=0;f<ns2;f++) {
				for(ff=0;ff<ns2;ff++)
					xtx[f][ff] +=s[f]*s[ff];
			}
		}
		int d2=UNTOTAL(u)-(d1+d0);
		seekfiles(fp,nscores, d2);
	}
	closefiles(fp,nscores);
	int count=xtx[nscores][nscores];
	int j1,j2;
	for(j1=0;j1<nscores;j1++)
		lg("File %d RMSE %f\n",j1,sqrt((xtx[j1][j1]+xtx[ns1][ns1]-2*xtx[ns1][j1])/count));
	double avgs[NSCORES+2],std[NSCORES+2];
	for(j1=0;j1<ns2;j1++) {
		avgs[j1]=xtx[nscores][j1]/count;
		std[j1]=sqrt(xtx[j1][j1]/count-avgs[j1]*avgs[j1]);
	}
	for(j1=0;j1<ns2;j1++)
		lg("%f\t",avgs[j1]);
	lg("\n");
	for(j1=0;j1<ns2;j1++)
		lg("%f\t",std[j1]);
	lg("\n");
	lg("-------------------------------------------------\n");
	for(j1=0;j1<ns2;j1++) {
		for(j2=0;j2<ns2;j2++) {
			lg("%f\t",(xtx[j1][j2]/count-avgs[j1]*avgs[j2])/(std[j1]*std[j2]+1.e-6));
		}
		lg("\n");
	}
	lg("-------------------------------------------------\n");
	double eavgs[NSCORES],estd[NSCORES];
	for(j1=0;j1<nscores;j1++) {
		eavgs[j1]=avgs[ns1]-avgs[j1];
		estd[j1]=sqrt((xtx[ns1][ns1]+ xtx[j1][j1]-2*xtx[j1][ns1])/count);
	}
	for(j1=0;j1<nscores;j1++)
		lg("%f\t",eavgs[j1]);
	lg("\n");
	for(j1=0;j1<nscores;j1++)
		lg("%f\t",estd[j1]);
	lg("\n");
	lg("-------------------------------------------------\n");
	for(j1=0;j1<nscores;j1++) {
		for(j2=0;j2<nscores;j2++) {
			lg("%f\t",((xtx[j1][j2]+xtx[ns1][ns1]-xtx[ns1][j1]-xtx[ns1][j2])/count-eavgs[j1]*eavgs[j2])/(estd[j1]*estd[j2]+1.e-6));
		}
		lg("\n");
	}


	char TRANS='N';
	char UFLO='U';
	int M=ns1;
	int N=ns1;
	int NRHS=1;
	double A[NSCORES+1][NSCORES+1];
	int LDA=NSCORES+1;
	double B[NSCORES+1];
	int LDB=NSCORES+1;
	double S[NSCORES+1];
	double RCOND=0.00001; // singular values below this are treated as zero.
	int RANK;
	double WORK[1000];
	int LWORK=1000;
	int INFO;

	for(j1=0;j1<ns1;j1++) {
		B[j1]=xtx[ns1][j1];
		for(j2=0;j2<ns1;j2++)
			A[j1][j2]=xtx[j1][j2];
	}	
	for(j1=0;j1<ns1;j1++) A[j1][j1]+=LAMBDA;
	/*dgesv_(&N,&NRHS,A,&LDA,IPIV,B,&LDB,&INFO);*/
	/*dgels_(&TRANS,&M,&N,&NRHS,A,&LDA,B,&LDB,WORK,&LWORK,&INFO);*/
	/*dgelss_( &M, &N, &NRHS, A, &LDA, B, &LDB, S, &RCOND, &RANK, WORK, &LWORK, &INFO );*/
	dposv_(&UFLO,&N,&NRHS,A,&LDA,B,&LDB,&INFO);
	if(INFO) error("failed %d\n",INFO);
		
	for(j1=0;j1<=nscores;j1++)
		xty[j1]=B[j1];

	lg("Check that the matrix inversion worked:\n");
	for(j1=0;j1<=nscores;j1++) {
		double sum=LAMBDA*B[j1];
		for(j2=0;j2<=nscores;j2++)
			sum+=xtx[j1][j2]*B[j2];
		lg("%f\t%f\n",sum,xtx[nscores+1][j1]);
	}
}
Пример #18
0
int doAllFeatures() {
    /* Initial weights */
    int i, j, h;
    for (j=0; j<NMOVIES; j++) {
        for (i=0; i<TOTAL_FEATURES; i++) {
            vishid[j][0][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][1][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][2][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][3][i] = 0.02 * randn() - 0.01; // Normal Distribution
            vishid[j][4][i] = 0.02 * randn() - 0.01; // Normal Distribution
        }
    }

    /* Initial biases */
    for(i=0;i<TOTAL_FEATURES;i++) {
        hidbiases[i]=0.0;
    }
    for (j=0; j<NMOVIES; j++) {
        unsigned int mtot = moviercount[j*SOFTMAX+0] + moviercount[j*SOFTMAX+1] + moviercount[j*SOFTMAX+2] + moviercount[j*SOFTMAX+3] + moviercount[j*SOFTMAX+4];
        for (i=0; i<SOFTMAX; i++) {
            visbiases[j][i] = log( ((double)moviercount[j*SOFTMAX+i]) / ((double) mtot) );
        }
    }

    /* Optimize current feature */
    double nrmse=2., last_rmse=10.;
    double prmse = 0, last_prmse=0;
    double s;
    int n;
    int loopcount=0;
    double EpsilonW  = epsilonw;
    double EpsilonVB = epsilonvb;
    double EpsilonHB = epsilonhb;
    double Momentum  = momentum;
    ZERO(CDinc);
    ZERO(visbiasinc);
    ZERO(hidbiasinc);
    int tSteps = 1;

    // Iterate through the model while the RMSE is decreasing 
    //while ( ((nrmse < (last_rmse-E) && prmse<last_prmse) || loopcount < 14) && loopcount < 80  )  {
    while ( ((nrmse < (last_rmse-E) ) || loopcount < 14) && loopcount < 80  )  {

        if ( loopcount >= 10 )
            tSteps = 3 + (loopcount - 10)/5;

        last_rmse=nrmse;
        last_prmse=prmse;
        clock_t t0=clock();
        loopcount++;
        int ntrain = 0;
        nrmse = 0.0;
        s  = 0.0;
        n = 0;

        if ( loopcount > 5 )
            Momentum = finalmomentum;

        //* CDpos =0, CDneg=0 (matrices)
        ZERO(CDpos);
        ZERO(CDneg);
        ZERO(poshidact);
        ZERO(neghidact);
        ZERO(posvisact);
        ZERO(negvisact);
        ZERO(moviecount);

        int u,m, f;
        for(u=0;u<NUSERS;u++) {

            //* Clear summations for probabilities
            ZERO(negvisprobs);
            ZERO(nvp2);

            //* perform steps 1 to 8
            int base0=useridx[u][0];
            int d0=UNTRAIN(u);
            int dall=UNALL(u);

            // For all rated movies, accumulate contributions to hidden units
            double sumW[TOTAL_FEATURES];
            ZERO(sumW);
            for(j=0;j<d0;j++) {
                int m=userent[base0+j]&USER_MOVIEMASK;
                moviecount[m]++;

                // 1. get one data point from data set.
                // 2. use values of this data point to set state of visible neurons Si
                int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;

                // Add to the bias contribution for set visible units
                posvisact[m][r] += 1.0;
 
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    // sum_j(W[i][j] * v[0][j]))
                    sumW[h]  += vishid[m][r][h];
                }
            }

            // Sample the hidden units state after computing probabilities
            for(h=0;h<TOTAL_FEATURES;h++) {

                // 3. compute Sj for each hidden neuron based on formula above and states of visible neurons Si
                // poshidprobs[h] = 1./(1 + exp(-V*vishid - hidbiases);
                // compute Q(h[0][i] = 1 | v[0]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[0][j]))
                poshidprobs[h]  = 1.0/(1.0 + exp(-sumW[h] - hidbiases[h]));

                // sample h[0][i] from Q(h[0][i] = 1 | v[0])
                if  ( poshidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
                    poshidstates[h]=1;
                    poshidact[h] += 1.0;
                } else {
                    poshidstates[h]=0;
                }
            }

            // Load up a copy of poshidstates for use in loop
            for ( h=0; h < TOTAL_FEATURES; h++ ) 
                curposhidstates[h] = poshidstates[h];

            // Make T Contrastive Divergence steps
            int stepT = 0;
            do {
                // Determine if this is the last pass through this loop
                int finalTStep = (stepT+1 >= tSteps);
                
                // 5. on visible neurons compute Si using the Sj computed in step3. This is known as reconstruction
                // for all visible units j:
                int r;
                int count = d0;
                count += useridx[u][2];  // too compute probe errors
                for(j=0;j<count;j++) {
                    int m=userent[base0+j]&USER_MOVIEMASK;
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        // Accumulate Weight values for sampled hidden states == 1
                        if ( curposhidstates[h] == 1 ) {
                            for(r=0;r<SOFTMAX;r++) {
                                negvisprobs[m][r]  += vishid[m][r][h];
                            }
                        }

                        // Compute more accurate probabilites for RMSE reporting
                        if ( stepT == 0 ) {  
                            for(r=0;r<SOFTMAX;r++) 
                                nvp2[m][r] += poshidprobs[h] * vishid[m][r][h];
                        }
                    }

                    // compute P(v[1][j] = 1 | h[0]) # for binomial units, sigmoid(c[j] + sum_i(W[i][j] * h[0][i]))
                    // Softmax elements are handled individually here
                    negvisprobs[m][0]  = 1./(1 + exp(-negvisprobs[m][0] - visbiases[m][0]));
                    negvisprobs[m][1]  = 1./(1 + exp(-negvisprobs[m][1] - visbiases[m][1]));
                    negvisprobs[m][2]  = 1./(1 + exp(-negvisprobs[m][2] - visbiases[m][2]));
                    negvisprobs[m][3]  = 1./(1 + exp(-negvisprobs[m][3] - visbiases[m][3]));
                    negvisprobs[m][4]  = 1./(1 + exp(-negvisprobs[m][4] - visbiases[m][4]));

                    // Normalize probabilities
                    double tsum  = 
                      negvisprobs[m][0] +
                      negvisprobs[m][1] +
                      negvisprobs[m][2] +
                      negvisprobs[m][3] +
                      negvisprobs[m][4];
                    if ( tsum != 0 ) {
                        negvisprobs[m][0]  /= tsum;
                        negvisprobs[m][1]  /= tsum;
                        negvisprobs[m][2]  /= tsum;
                        negvisprobs[m][3]  /= tsum;
                        negvisprobs[m][4]  /= tsum;
                    }
                    // Compute and Normalize more accurate RMSE reporting probabilities
                    if ( stepT == 0) {
                        nvp2[m][0]  = 1./(1 + exp(-nvp2[m][0] - visbiases[m][0]));
                        nvp2[m][1]  = 1./(1 + exp(-nvp2[m][1] - visbiases[m][1]));
                        nvp2[m][2]  = 1./(1 + exp(-nvp2[m][2] - visbiases[m][2]));
                        nvp2[m][3]  = 1./(1 + exp(-nvp2[m][3] - visbiases[m][3]));
                        nvp2[m][4]  = 1./(1 + exp(-nvp2[m][4] - visbiases[m][4]));
                        double tsum2  = 
                          nvp2[m][0] +
                          nvp2[m][1] +
                          nvp2[m][2] +
                          nvp2[m][3] +
                          nvp2[m][4];
                        if ( tsum2 != 0 ) {
                            nvp2[m][0]  /= tsum2;
                            nvp2[m][1]  /= tsum2;
                            nvp2[m][2]  /= tsum2;
                            nvp2[m][3]  /= tsum2;
                            nvp2[m][4]  /= tsum2;
                        }
                    }

                    // sample v[1][j] from P(v[1][j] = 1 | h[0])
                    double randval = (rand()/(double)(RAND_MAX));
                    if ( (randval -= negvisprobs[m][0]) <= 0.0 )
                        negvissoftmax[m] = 0;
                    else if ( (randval -= negvisprobs[m][1]) <= 0.0 )
                        negvissoftmax[m] = 1;
                    else if ( (randval -= negvisprobs[m][2]) <= 0.0 )
                        negvissoftmax[m] = 2;
                    else if ( (randval -= negvisprobs[m][3]) <= 0.0 )
                        negvissoftmax[m] = 3;
                    else //if ( (randval -= negvisprobs[m][4]) <= 0.0 )
                        negvissoftmax[m] = 4;

                    // if in training data then train on it
                    if ( j < d0 && finalTStep )  
                        negvisact[m][negvissoftmax[m]] += 1.0;
                }


                // 6. compute state of hidden neurons Sj again using Si from 5 step.
                // For all rated movies accumulate contributions to hidden units from sampled visible units
                ZERO(sumW);
                for(j=0;j<d0;j++) {
                    int m=userent[base0+j]&USER_MOVIEMASK;
     
                    // for all hidden units h:
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        sumW[h]  += vishid[m][negvissoftmax[m]][h];
                    }
                }
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    // compute Q(h[1][i] = 1 | v[1]) # for binomial units, sigmoid(b[i] + sum_j(W[i][j] * v[1][j]))
                    neghidprobs[h]  = 1./(1 + exp(-sumW[h] - hidbiases[h]));

                    // Sample the hidden units state again.
                    if  ( neghidprobs[h] >  (rand()/(double)(RAND_MAX)) ) {
                        neghidstates[h]=1;
                        if ( finalTStep )
                            neghidact[h] += 1.0;
                    } else {
                        neghidstates[h]=0;
                    }
                }

                // Compute error rmse and prmse before we start iterating on T
                if ( stepT == 0 ) {

                    // Compute rmse on training data
                    for(j=0;j<d0;j++) {
                        int m=userent[base0+j]&USER_MOVIEMASK;
                        int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
         
                        //# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
                        double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
                        double vdelta = (((double)r)-expectedV);
                        nrmse += (vdelta * vdelta);
                    }
                    ntrain+=d0;

                    // Sum up probe rmse
                    int base=useridx[u][0];
                    for(i=1;i<2;i++) base+=useridx[u][i];
                    int d=useridx[u][2];
                    for(i=0; i<d;i++) {
                        int m=userent[base+i]&USER_MOVIEMASK;
                        int r=(userent[base+i]>>USER_LMOVIEMASK)&7;
                        //# Compute some error function like sum of squared difference between Si in 1) and Si in 5)
                        double expectedV = nvp2[m][1] + 2.0 * nvp2[m][2] + 3.0 * nvp2[m][3] + 4.0 * nvp2[m][4];
                        double vdelta = (((double)r)-expectedV);
                        s+=vdelta*vdelta;
                    }
                    n+=d;
                }

                // If looping again, load the curposvisstates
                if ( !finalTStep ) {
                    for ( h=0; h < TOTAL_FEATURES; h++ ) 
                        curposhidstates[h] = neghidstates[h];
                    ZERO(negvisprobs);
                }

              // 8. repeating multiple times steps 5,6 and 7 compute (Si.Sj)n. Where n is small number and can 
              //    increase with learning steps to achieve better accuracy.

            } while ( ++stepT < tSteps );

            // Accumulate contrastive divergence contributions for (Si.Sj)0 and (Si.Sj)T
            for(j=0;j<d0;j++) {
                int m=userent[base0+j]&USER_MOVIEMASK;
                int r=(userent[base0+j]>>USER_LMOVIEMASK)&7;
 
                // for all hidden units h:
                for(h=0;h<TOTAL_FEATURES;h++) {
                    if ( poshidstates[h] == 1 ) {
                        // 4. now Si and Sj values can be used to compute (Si.Sj)0  here () means just values not average
                        //* accumulate CDpos = CDpos + (Si.Sj)0
                        CDpos[m][r][h] += 1.0;
                    }

                    // 7. now use Si and Sj to compute (Si.Sj)1 (fig.3)
                    CDneg[m][negvissoftmax[m]][h] += (double)neghidstates[h];
                }
            }

            // Update weights and biases after batch
            //
            int bsize = 100;
            if ( ((u+1) % bsize) == 0 || (u+1) == NUSERS ) {
                int numcases = u % bsize;
                numcases++;

                // Update weights
                for(m=0;m<NMOVIES;m++) {
                    if ( moviecount[m] == 0 ) continue;

                    // for all hidden units h:
                    for(h=0;h<TOTAL_FEATURES;h++) {
                        // for all softmax
                        int rr;
                        for(rr=0;rr<SOFTMAX;rr++) {
                            //# At the end compute average of CDpos and CDneg by dividing them by number of data points.
                            //# Compute CD = < Si.Sj >0  < Si.Sj >n = CDpos  CDneg
                            double CDp = CDpos[m][rr][h];
                            double CDn = CDneg[m][rr][h];
                            if ( CDp != 0.0 || CDn != 0.0 ) {
                                CDp /= ((double)moviecount[m]);
                                CDn /= ((double)moviecount[m]);

                                // W += epsilon * (h[0] * v[0]' - Q(h[1][.] = 1 | v[1]) * v[1]')
                                //# Update weights and biases W = W + alpha*CD (biases are just weights to neurons that stay always 1.0)
                                //e.g between data and reconstruction.
                                CDinc[m][rr][h] = Momentum * CDinc[m][rr][h] + EpsilonW * ((CDp - CDn) - weightcost * vishid[m][rr][h]);
                                vishid[m][rr][h] += CDinc[m][rr][h];
                            } 
                        }
                    }

                    // Update visible softmax biases
                    // c += epsilon * (v[0] - v[1])$
                    // for all softmax
                    int rr;
                    for(rr=0;rr<SOFTMAX;rr++) {
                        if ( posvisact[m][rr] != 0.0 || negvisact[m][rr] != 0.0 ) {
                            posvisact[m][rr] /= ((double)moviecount[m]);
                            negvisact[m][rr] /= ((double)moviecount[m]);
                            visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]));
                            //visbiasinc[m][rr] = Momentum * visbiasinc[m][rr] + EpsilonVB * ((posvisact[m][rr] - negvisact[m][rr]) - weightcost * visbiases[m][rr]);
                            visbiases[m][rr]  += visbiasinc[m][rr];
                        }
                    }
                }

                
                // Update hidden biases
                // b += epsilon * (h[0] - Q(h[1][.] = 1 | v[1]))
                for(h=0;h<TOTAL_FEATURES;h++) {
                    if ( poshidact[h]  != 0.0 || neghidact[h]  != 0.0 ) {
                        poshidact[h]  /= ((double)(numcases));
                        neghidact[h]  /= ((double)(numcases));
                        hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]));
                        //hidbiasinc[h] = Momentum * hidbiasinc[h] + EpsilonHB * ((poshidact[h] - neghidact[h]) - weightcost * hidbiases[h]);
                        hidbiases[h]  += hidbiasinc[h];
                    }
                }
                ZERO(CDpos);
                ZERO(CDneg);
                ZERO(poshidact);
                ZERO(neghidact);
                ZERO(posvisact);
                ZERO(negvisact);
                ZERO(moviecount);
            }
        }

        nrmse=sqrt(nrmse/ntrain);
        prmse = sqrt(s/n);
        
        printf("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);

        if ( TOTAL_FEATURES == 200 ) {
            if ( loopcount > 6 ) {
                EpsilonW  *= 0.90;
                EpsilonVB *= 0.90;
                EpsilonHB *= 0.90;
            } else if ( loopcount > 5 ) {  // With 200 hidden variables, you need to slow things down a little more
                EpsilonW  *= 0.50;         // This could probably use some more optimization
                EpsilonVB *= 0.50;
                EpsilonHB *= 0.50;
            } else if ( loopcount > 2 ) {
                EpsilonW  *= 0.70;
                EpsilonVB *= 0.70;
                EpsilonHB *= 0.70;
            }
        } else {  // The 100 hidden variable case
            if ( loopcount > 8 ) {
                EpsilonW  *= 0.92;
                EpsilonVB *= 0.92;
                EpsilonHB *= 0.92;
            } else if ( loopcount > 6 ) {
                EpsilonW  *= 0.90;
                EpsilonVB *= 0.90;
                EpsilonHB *= 0.90;
            } else if ( loopcount > 2 ) {
                EpsilonW  *= 0.78;
                EpsilonVB *= 0.78;
                EpsilonHB *= 0.78;
            }
        }
    }
    
    /* Perform a final iteration in which the errors are clipped and stored */
    recordErrors();
    
    //if(save_model) {
        //dappend_bin(fnameV,sV,NMOVIES);
        //dappend_bin(fnameU,sU,NUSERS);
    //}
    
    return 1;
}
Пример #19
0
void movietimeuser()
{
	lg("Movie Time(User)\n");
	ZERO(day0);
	// It is OK to look on all data for day0 because it is always known
	int u;
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNTOTAL(u);
		int j;
		for(j=0;j<d012;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			if(!day0[u] || day0[u]>day) day0[u]=day;
		}
	}
	
	// Remove average but only use training data
	double avg[NMOVIES];
	int moviecount[NMOVIES];
	ZERO(avg);
	ZERO(moviecount);
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d0=UNTRAIN(u);
		int j;
		for(j=0;j<d0;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			avg[m]+=DTIME(day-day0[u]);
			moviecount[m]++;
		}
	}
	int m;
	for(m=0;m<NMOVIES;m++) avg[m]/=moviecount[m];

	// compute unbiased estimator
	double theta[NMOVIES];
	double var[NMOVIES];
	ZERO(theta);
	ZERO(var);
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d0=UNTRAIN(u);
		int j;
		for(j=0;j<d0;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			// compute explanatory variable
			double x=DTIME(day-day0[u])-avg[m];
			theta[m]+=err[base+j]*x;
			var[m]+=x*x;
		}
	}
	for(m=0; m<NMOVIES; m++)
		theta[m]=(theta[m]/(var[m]+1.e-20))*moviecount[m]/(moviecount[m]+MOVIETIME_ALPHA);
	
	//predict
	for(u=0;u<NUSERS;u++) {
		int base=useridx[u][0];
		int d012=UNALL(u);
		int j;
		for(j=0;j<d012;j++) {
			int m=userent[base+j]&USER_MOVIEMASK;
			int day=userent[base+j]>>(USER_LMOVIEMASK+3);
			double x=DTIME(day-day0[u])-avg[m];
			err[base+j]-=theta[m]*x;
		}
	}
}
Пример #20
0
int doAllFeatures()
{
	/* Initial biases */
	{
		int u,m,f;
		
		for(u=0;u<NUSERS;u++) {
			for(f=0;f<NFEATURES;f++)
			    bU[u][f]=drand48()*0.01-0.005;
		}
		for(m=0;m<NMOVIES;m++) {
			for(f=0;f<NFEATURES;f++)
			    bV[m][f]=drand48()*0.01-0.005;
		}
	}
	
	
	/* Initial estimation for current feature */
	{
		int u,m,f;
		
		for(u=0;u<NUSERS;u++) {
			for(f=0;f<NFEATURES;f++)
			    sU[u][f]=drand48()*0.1-0.04;
		}
		for(m=0;m<NMOVIES;m++) {
			for(f=0;f<NFEATURES;f++) {
			    sV[m][f]=drand48()*0.05-0.025;
			    sY[m][f]=drand48()*0.02-0.01;
			}
		}
	}
	
	/* Optimize current feature */
	double nrmse=2., last_rmse=10.;
	double thr=sqrt(1.-E);
	int loopcount=0;
	    //thr=sqrt(1.-E2);
	double Gamma2 = G2;
	double Gamma0 = G0;
	while( ( nrmse < (last_rmse-E) ) || loopcount++ < 20) {
		last_rmse=nrmse;
		clock_t t0=clock();

		int u,m, f;
		for(u=0;u<NUSERS;u++) {

			// Calculate sumY and NuSY for each factor
			double sumY[NFEATURES];
			ZERO(sumY);
			double lNuSY[NFEATURES];
			ZERO(lNuSY);
			int base0=useridx[u][0];
			int d0=UNTRAIN(u);
			int j;
			int f;
			int dall=UNALL(u);
			double NuS = 1.0/sqrt(dall);
			for(j=0;j<d0;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++)
					sumY[f]+=sY[mm][f];
			}
//if ( loopcount > 1 ) {
//printf("sumY: %f\n", sumY);
//fflush(stdout);
//}
			for(j=0;j<d0;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++) {
					lNuSY[f] = NuS * sumY[f]; 
//if ( loopcount > 1 ) {
//printf("lNuSY: %f\n", lNuSY[f]);
//fflush(stdout);
//}
				}
			}

			double ycontrib[NFEATURES];
			ZERO(ycontrib);

			// For all rated movies
			double bdampen = d0/1.1;
			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;

				// Figure out the current error
				double ee=err[base0+j];
				double e2 = ee;
				for (f=0; f<NFEATURES; f++) {
				    e2 -= (bU[u][f] + bV[m][f]);
					e2 -= ((sU[u][f]+lNuSY[f])*sV[m][f]);
				}


				// update U V and slope component of Y
				//double yfactor = NuS/sqrt(moviecount[m]); 
				//double yfactor = NuS;
				double yfactor = NuS/d0;
				for (f=0; f<NFEATURES; f++) {

					// Train the biases
					double bUu = bU[u][f];
					double bVm = bV[m][f];
					bU[u][f] += Gamma0 * (e2 - bUu * L4) / bdampen;
					bV[m][f] += Gamma0 * (e2 - bVm * L4) / bdampen;

					double sUu = sU[u][f];
					double sVm = sV[m][f];

					sU[u][f] += (Gamma2 * ((e2 * sVm) - L8 * sUu));
					sV[m][f] += (Gamma2 * ((e2 * (sUu + lNuSY[f])) - L8 * sVm));
//printf("sU: %f\n", sU[u][f]);
//printf("sV: %f\n", sV[m][f]);
//fflush(stdout);

					ycontrib[f] += e2 * sVm * yfactor;
//printf("ycont: %f\n", ycontrib[f]);
//fflush(stdout);
				}
			}

			// Train Ys over all known movies for user
			for(j=0;j<dall;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				for (f=0; f<NFEATURES; f++) {
					double sYm = sY[m][f];
					sY[m][f] += Gamma2 * (ycontrib[f] - L7 * sYm);
//printf("before sY: %f\tycon: %f\tG2*ycon: %f\treg: %f\n", sY[m][f], ycontrib[f], (G2_Y*ycontrib[f]), G2_Y*L7_Y*sYm);
//printf("after sY: %f\tycon: %f\tG2*ycon: %f\treg: %f\n", sY[m][f], ycontrib[f], (G2_Y*ycontrib[f]), G2_Y*L7_Y*sYm);
//printf("sY: %f\tycon: %f\tG2*ycon: %f\n", sY[m][f], ycontrib[f], (G2*ycontrib[f]));
//fflush(stdout);
				}
			}
		}

		// Report rmse for main loop
		nrmse=0.;
		int ntrain=0;
		int elcnt=0;
		for(u=0;u<NUSERS;u++) {
			int base0=useridx[u][0];
			int d0=UNTRAIN(u);
			int j;

			// Setup the Ys again
			double sumY[NFEATURES];
			ZERO(sumY);
			double lNuSY[NFEATURES];
			ZERO(lNuSY);
			int dall=UNALL(u);
			double NuS = 1.0/sqrt(dall);
			for(j=0;j<d0;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++)
					sumY[f]+=sY[mm][f];
			}
			for(j=0;j<d0;j++) {
				int mm=userent[base0+j]&USER_MOVIEMASK;
				for(f=0;f<NFEATURES;f++) 
					lNuSY[f] = NuS * sumY[f]; 
			}

			for(j=0;j<d0;j++) {
				int m=userent[base0+j]&USER_MOVIEMASK;
				double ee = err[base0+j];
				double e2 = ee;
				for (f=0; f<NFEATURES; f++) {
				    e2 -= (bU[u][f] + bV[m][f]);
					e2 -= ( (sU[u][f] + lNuSY[f]) * sV[m][f]);
				}

if( elcnt++ == 5000 ) {
    printf("0 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[0], sV[m][0], sU[u][0], bU[u][0], bV[m][0], sY[m][0],u, m);
    printf("1 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[1], sV[m][1], sU[u][1], bU[u][1], bV[m][1], sY[m][1],u, m);
    printf("2 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[2], sV[m][2], sU[u][2], bU[u][2], bV[m][2], sY[m][2],u, m);
    printf("3 E: %f \t NE: %f\tNuSY: %f\tsV: %f\tsU: %f\tbU: %f\tbV: %f\tsY: %f\tU: %d\tM: %d\n", ee, e2, lNuSY[3], sV[m][3], sU[u][3], bU[u][3], bV[m][3], sY[m][3],u, m);
	fflush(stdout);
}
/*
if( e > 5.0 || e < -5.0 ) {
    printf("bad EE: %f\tU: %d\tM: %d\tNuSY: %f\te: %f\t sV: %f\tsU: %f\tbU: %f\tbV: %f\n", ee, u, m, NuSY, e, new_sV[m], new_sU[u], bUu, bVm);
	fflush(stdout);
}
*/

				nrmse+=e2*e2;
			}
			ntrain+=d0;
		}
		nrmse=sqrt(nrmse/ntrain);
		double prmse = rmseprobe();
		
		lg("%f\t%f\t%f\n",nrmse,prmse,(clock()-t0)/(double)CLOCKS_PER_SEC);
		//rmse_print(0);
		if ( loopcount < 6 ) {
		    Gamma2 *= 0.95;
		    Gamma0 *= 0.95;
		} else if ( loopcount < 14 ) {
		    Gamma2 *= 0.92;
		    Gamma0 *= 0.92;
		} else {
		    Gamma2 *= 0.90;
		    Gamma0 *= 0.90;
		}
	}
	
	/* Perform a final iteration in which the errors are clipped and stored */
	removeUV();
	
	//if(save_model) {
		//dappend_bin(fnameV,sV,NMOVIES);
		//dappend_bin(fnameU,sU,NUSERS);
	//}
	
	return 1;
}