double Evrot::evqualitygrad(Eigen::VectorXd& theta, int angle_index) { // build V,U,A Eigen::MatrixXd& V = gradU(theta, angle_index); Eigen::MatrixXd& U1 = build_Uab(theta, 0,angle_index-1); Eigen::MatrixXd& U2 = build_Uab(theta, angle_index+1,mNumAngles-1); Eigen::MatrixXd A = mX*U1*V*U2; delete &V; delete &U1; delete &U2; // rotate vecs according to current angles Eigen::MatrixXd& Y = rotate_givens(theta); // find max of each row Eigen::VectorXd max_values(mNumData); Eigen::VectorXi max_index_col(mNumData); for (int i=0; i<mNumData; i++ ) { int row, col; // Y.row(i).cwise().abs().maxCoeff(&row, &col); Y.row(i).cwiseAbs().maxCoeff(&row, &col); max_values[i] = Y(i,col); max_index_col[i] = col; } // compute gradient double dJ=0, tmp1, tmp2; for( int j=0; j<mNumDims; j++ ){ // loop over all columns for( int i=0; i<mNumData; i++ ){ // loop over all rows tmp1 = A(i,j) * Y(i,j) / (max_values[i]*max_values[i]); tmp2 = A(i,max_index_col[i]) * (Y(i,j)*Y(i,j)) / (max_values[i]*max_values[i]*max_values[i]); dJ += tmp1-tmp2; } } dJ = 2*dJ/mNumData/mNumDims; if( DEBUG ) std::cout << "Computed gradient = " << dJ << std::endl; delete &Y; return dJ; }
void Evrot::evrot() { // definitions int max_iter = 200; double dQ,Q,Q_new,Q_old1,Q_old2,Q_up,Q_down; double alpha; int iter,d; Eigen::VectorXd theta = Eigen::VectorXd::Zero(mNumAngles); Eigen::VectorXd theta_new = Eigen::VectorXd::Zero(mNumAngles); Q = evqual(mX); // initial quality if( DEBUG ) std::cout << "Q = " << Q << std::endl; Q_old1 = Q; Q_old2 = Q; iter = 0; while( iter < max_iter ){ // iterate to refine quality iter++; for( d = 0; d < mNumAngles; d++ ){ if( mMethod == 2 ){ // descend through numerical drivative alpha = 0.1; { // move up theta_new[d] = theta[d] + alpha; Eigen::MatrixXd& Xrot = rotate_givens(theta_new); Q_up = evqual(Xrot); delete &Xrot; } { // move down theta_new[d] = theta[d] - alpha; Eigen::MatrixXd& Xrot = rotate_givens(theta_new); Q_down = evqual(Xrot); delete &Xrot; } // update only if at least one of them is better if( Q_up > Q || Q_down > Q){ if( Q_up > Q_down ){ theta[d] = theta[d] + alpha; theta_new[d] = theta[d]; Q = Q_up; } else { theta[d] = theta[d] - alpha; theta_new[d] = theta[d]; Q = Q_down; } } } else { // descend through true derivative alpha = 1.0; dQ = evqualitygrad(theta, d); theta_new[d] = theta[d] - alpha * dQ; Eigen::MatrixXd& Xrot = rotate_givens(theta_new); Q_new = evqual(Xrot); delete &Xrot; if( Q_new > Q){ theta[d] = theta_new[d]; Q = Q_new; } else{ theta_new[d] = theta[d]; } } } // stopping criteria if( iter > 2 ){ if( Q - Q_old2 < 1e-3 ){ break; } } Q_old2 = Q_old1; Q_old1 = Q; } if (DEBUG) std::cout << "Done after " << iter << " iterations, Quality is " << Q << std::endl; mXrot = rotate_givens(theta_new); cluster_assign(); //output mQuality = Q; }
double evrot(double* mat,int dim,int ndata,int* out_clusts,double* out_xrot,int method) { int tmpi=0; int tmpk=0; int tmpj=0; int vari=0; int varj=0; int dk=5; int max_iter = 200; double dQ=0; double Q=0; double Q_new=0; double Q_old1=0; double Q_old2=0; double Q_up=0; double Q_down=0; double alpha=0; int iter=0; int d=0; double tmpDouble=0; int angle_num=0; angle_num=(int)(dim*(dim-1)/2); double* theta=new double[angle_num]; double* theta_new=new double[angle_num]; int* jk=new int[angle_num]; int* ik=new int[angle_num]; double* p_theta=theta; double* p_theta_new=theta_new; tmpi=0; for(vari=0;vari<dim-1;vari++) for(varj=vari+1;varj<=dim-1;varj++) { ik[tmpi]=vari; jk[tmpi]=varj; tmpi++; } Q = evqual(mat,dim,ndata); ///getchar(); Q_old1 = Q; Q_old2 = Q; iter = 0; MatrixInitZeros(out_xrot,dim,ndata); MatrixInitZeros(p_theta,1,angle_num); MatrixInitZeros(p_theta_new,1,angle_num); while( iter < max_iter ) {/* iterate to refine quality */ iter++; cout<<"iter "<<iter<<endl; for( d = 0; d < angle_num; d++ ) { if( method == 2 ) { /* descend through numerical drivative */ alpha = 0.1; /* move up */ p_theta_new[d] = p_theta[d] + alpha; rotate_givens(mat,theta_new,ik,jk,angle_num,ndata,dim,out_xrot); Q_up = evqual(out_xrot,dim,ndata); MatrixInitZeros(out_xrot,ndata,dim); /* move down */ p_theta_new[d] = p_theta[d] - alpha; rotate_givens(mat,theta_new,ik,jk,angle_num,ndata,dim,out_xrot); Q_down = evqual(out_xrot,dim,ndata); MatrixInitZeros(out_xrot,ndata,dim); /* update only if at least one of them is better */ if( Q_up > Q || Q_down > Q) { if( Q_up > Q_down ) { p_theta[d] = p_theta[d] + alpha; p_theta_new[d] = p_theta[d]; Q = Q_up; } else { p_theta[d] = p_theta[d] - alpha; p_theta_new[d] = p_theta[d]; Q = Q_down; } } } else { /* descend through true derivative */ alpha = 1.0; dQ = evqualitygrad(mat,theta,ik,jk,angle_num,d,dim,ndata); //cout<<"the dQ is \t "<<dQ<<endl; p_theta_new[d] = p_theta[d] - alpha * dQ; rotate_givens(mat,theta_new,ik,jk,angle_num,ndata,dim,out_xrot); Q_new = evqual(out_xrot,dim,ndata); if( Q_new > Q) { p_theta[d] = p_theta_new[d]; Q = Q_new; } else { p_theta_new[d] = p_theta[d]; } MatrixInitZeros(out_xrot,dim,ndata); } } /* stopping criteria */ if( iter > 2 ) { if( Q - Q_old2 < 1e-3 ) { break; } } Q_old2 = Q_old1; Q_old1 = Q; } rotate_givens(mat,theta_new,ik,jk,angle_num,ndata,dim,out_xrot); cluster_assign(out_xrot,out_clusts,dim,ndata); /* free allocated memory */ delete [] theta; delete [] theta_new; delete [] ik; delete [] jk; return Q; };
double evqualitygrad(double *X, double* theta,const int *ik,const int *jk,int angle_num,int angle_index,int dim,int ndata) { /* build V,U,A */ double* matret=new double[dim*dim]; double* U1=new double[dim*dim]; double* U2=new double[dim*dim]; double* A=new double[ndata*dim]; double* matrot=new double[ndata*dim]; double *max_values = new double[ndata]; int *max_index = new int[ndata]; double dJ=0, tmp1, tmp2; double* p_A; double *p_Y; /* find max of each row */ MatrixInitZeros(matret,dim,dim); MatrixInitZeros(U1,dim,dim); MatrixInitZeros(U2,dim,dim); MatrixInitZeros(A,ndata,dim); MatrixInitZeros(matrot,ndata,dim); MatrixInitZeros(max_values,ndata,1); MatrixInitZeros(max_index,ndata,1); int i,j, ind = 0; //getchar(); gradU(theta,angle_index,ik,jk,dim,matret); /**/ //getchar(); build_Uab(theta,0,angle_index-1,ik,jk,dim,U1); /**/ /**/ build_Uab(theta,angle_index+1,angle_num-1,ik,jk,dim,U2); buildA(X,U1,matret,U2,A,ndata,dim); /* rotate vecs according to current angles */ rotate_givens(X,theta,ik,jk,angle_num,ndata,dim,matrot); p_Y=matrot; MaxRowColumnAbsValue(p_Y,max_values,max_index,ndata,dim,1); /* compute gradient */ ind = 0; dJ=0; for( i=0; i<ndata; i++ ) { /* loop over all rows */ p_A=(double*)(A+i*dim); p_Y=(double*)(matrot+i*dim); for( j=0; j<dim; j++ ) { /* loop over all columns */ tmp1 = p_A[j] * p_Y[j] / (max_values[i]*max_values[i]); tmp2 = p_A[max_index[i]]*(p_Y[j]*p_Y[j])/(max_values[i]*max_values[i]*max_values[i]); dJ += tmp1-tmp2; } } dJ = 2*dJ/ndata/dim; delete []max_values; delete []max_index; delete []matrot; delete []matret; delete []U2; delete []U1; delete []A; return dJ; };