void YAPCAReduce<eltype>::reconstruct_error(const ya_type1 &input, const ya_type2 &output, const ya_type3 &dims, ya_type4 &rmsd_vec) { YA_DEBUG_ERROR(input.cols()==this->high_dim() && output.cols()==this->low_dim(), "Dimensions in input matrix do not match map"); YA_DEBUG_ERROR(maximum(dims)<=this->low_dim() && minimum(dims)>0, "A reconstruction dimensionality is greater than map"); YA_DEBUG_ERROR(eigen_vectors.rows()==this->high_dim() && eigen_vectors.cols()>=this->low_dim(), "Map does not match high and low dimensions"); YA_MatT new_coords=rowrep(column_means,input.rows()); rmsd_vec.setup(dims.numel()); ya_sizet dim=0; ya_sizet dN=dims.numel(); for (ya_sizet i=0; i<dN; i++) { while (dim<dims(i)) { new_coords+=output(":",dim)*transpose(eigen_vectors(":",dim)); dim++; } rmsd_vec(i)=rmsd(input,new_coords); } }
void YAPCAReduce<eltype>::reverse_map(const ya_type1 &input, ya_type2 &output) const { YA_DEBUG_ERROR(input.cols()==this->low_dim(), "Dimensions in input matrix do not match map"); YA_DEBUG_ERROR(eigen_vectors.rows()==this->high_dim() && eigen_vectors.cols()==this->low_dim(), "Map does not match high and low dimensionalities"); output=input*transpose(eigen_vectors)+rowrep(column_means,input.rows()); }
void YAPCAReduce<eltype>::forward_map(const ya_type1 &input, ya_type2 &output) const { YA_DEBUG_ERROR(input.cols()==this->high_dim(), "Dimensions in input matrix do not match map"); YA_DEBUG_ERROR(eigen_vectors.rows()==this->high_dim() && eigen_vectors.cols()==this->low_dim(), "Map does not match high and low dimensionalities"); output=YA_MatT(input-rowrep(column_means,input.rows()))*eigen_vectors; }
void point_pca_ep(const YA_BaseT &input, const eltype epsilon, const ya_type2 &dims, EigenOptions &eigopts, YA_RowD &vmean, YA_RowD &vmin, YA_RowD &vmax, YA_RowD &vstd, const int verbose) { YA_DEBUG_ERROR(epsilon>0, "Epsilon must be greater than 0 for point_pca"); int me, num_procs; ya_mpi_info(me,num_procs); YA_ColI counts, starts; ya_mpi_counts(input.rows(),num_procs,counts,starts); eigopts.dim(input.cols()); YA_MatT output; _point_pca_ep(input,epsilon,eigopts,output,true,verbose,me,num_procs, counts,starts); if (input.cols()!=dims.numel() && counts[me]>0) output=YA_MatT(output(":",dims-1)); #ifdef YA_MPI if (num_procs>1) mpi_vstat(output,dims.numel(),vmin,vmax,vmean,vstd); else #endif vstat(output,vmin,vmax,vmean,vstd); }
void point_pca_ep(const YA_BaseT &input, const eltype epsilon, const ya_type2 &dims, EigenOptions &eigopts, ya_type3 &output, const int verbose_out) { YA_DEBUG_ERROR(epsilon>0, "Epsilon must be greater than 0 for point_pca"); int me, num_procs; ya_mpi_info(me,num_procs); YA_ColI counts, starts; ya_mpi_counts(input.rows(),num_procs,counts,starts); eigopts.dim(input.cols()); _point_pca_ep(input,epsilon,eigopts,output,true,verbose_out,me,num_procs, counts,starts); if (num_procs>1) { YA_MatRMT piece_t; #ifdef YA_MPI if (counts[me]>0) piece_t=output(":",dims-1); ya_mpi_concat_rows(piece_t,output,dims.numel(),me,num_procs,counts,starts); #endif } else if (input.cols()!=dims.numel()) output=YA_MatT(output(":",dims-1)); }
int YAPCAReduce<eltype>::find_map(const ya_type1 &input, ya_type2 &output, ya_sizet dim, EigenOptions &eigopts) { // Target dimensionality YA_DEBUG_ERROR(dim<=input.cols(), "Not enough dimensions in input matrix"); eigopts.dim(dim); this->_high_dim=input.cols(); this->_low_dim=dim; // Column center the matrix column_means=sum(input)/static_cast<eltype>(input.rows()); YA_MatT input_cen(input.rows(),input.cols()); ya_sizet iC=input.cols(); for (ya_sizet i=0; i<iC; i++) input_cen(":",i)=input(":",i)-column_means(i); // Calculate covarience matrix #ifdef YA_PROGMETER YA_TimeKeeper tk; YA_ProgMeter pm; if (this->_verbose) { pm.start("Computing Forward and Reverse Maps:", 70, 0, false); tk.start(); } #endif VM_SymMat covmat=input_cen.T()*input_cen; eigs(covmat,eigen_vectors,this->eigen_values,eigopts); #ifdef YA_PROGMETER if (this->_verbose) { pm.finish(); tk.end(); cerr << "Done. " << tk << ".\n"; } #endif // Calculate the reduced output output=input_cen*eigen_vectors; return 0; }
void _point_pca(const YA_BaseT &input, const ya_sizet k, EigenOptions &eigopts, ya_type2 &output, const bool residual, const int verbose_out, const int me, const int num_procs, const YA_ColI &counts, const YA_ColI &starts) { YA_DEBUG_ERROR(k>=0 && k<input.rows(), "More neighbors than datapoints for point_pca"); int verbose=verbose_out; #ifdef YA_MPI if (me!=0) verbose=0; else if (num_procs>1 && verbose>1) verbose=1; #ifdef YA_PROGMETER if (verbose) cout << "Performing point-PCA on " << num_procs << " processors.\n"; #endif #endif YA_MatI neighbors; YA_MatD dists; if (num_procs==1) { kneighbors(input,k,neighbors,dists,verbose); neighbors=YA_MatI(concat(vm_cast<ya_sizet>::sc(vmcount(input.rows()).T()), neighbors)); } else if (counts[me]>0) kneighbors(input(vmcount(starts[me],":",starts[me]+counts[me]-1),":"), input,k+1,neighbors,dists,verbose); #ifdef YA_MPI if (num_procs>1 && verbose_out>1) { #ifdef YA_PROGMETER YA_TimeKeeper mtk; if (verbose>0) { cerr << "Waiting on other procs..."; mtk.start(); } MPI_Barrier(MPI_COMM_WORLD); if (verbose>0) { mtk.end(); cerr << "Done. " << mtk << endl; } #endif if (me==0) cerr << "Neighbor Distance Stats:\n"; eltype dmin, dmax, dmean, dstd; mpi_vstat(dists(":",vmcount(k)+1),dmin,dmax,dmean,dstd); if (me==0) cerr << " Min: " << dmin << " Max: " << dmax << endl << " Mean: " << dmean << " Std: " << dstd << endl << endl; } #endif dists.clear(); output.setup(counts[me],input.cols()); #ifdef YA_PROGMETER YA_TimeKeeper tk; YA_ProgMeter pm; if (verbose) { tk.start(); pm.start("Performing Point PCA:",70,counts[0],false); } #endif #pragma omp parallel { YA_RowT column_means; YA_MatT input_cen; VM_SymMat covmat; YA_RowT eigens; #pragma omp for for (ya_sizet i=0; i<counts[me]; i++) { // Column center the matrix column_means=sum(input(neighbors(i,":"),":")/static_cast<eltype>(k+1)); input_cen=input(neighbors(i,":"),":")-rowrep(column_means,k+1); covmat=input_cen.T()*input_cen; eigs(covmat,eigens,eigopts); output(i,":")=eigens; #ifdef YA_PROGMETER if (verbose) pm.iterate(); #endif } } neighbors.clear(); #ifdef YA_PROGMETER if (verbose) { pm.finish(); tk.end(); cerr << "Done. " << tk << ".\n"; } YA_TimeKeeper mtk; if (num_procs>1) if (verbose>0) { cerr << "Waiting on other procs..."; mtk.start(); } #endif #ifdef YA_MPI if (num_procs>1) MPI_Barrier(MPI_COMM_WORLD); #endif #ifdef YA_PROGMETER if (num_procs>1 && verbose>0) { mtk.end(); cerr << "Done. " << mtk << endl; } #endif if (residual && counts[me]>0) { YA_VecT totals=sum(output.T()); output=output.dot_div(colrep(totals,output.cols())); ya_sizet iC=output.cols(); for (ya_sizet i=1; i<iC-1; i++) output(":",i)+=output(":",i-1); output(":",iC-1)=1.0; output=1.0-output; } }
void _point_pca_ep(const YA_BaseT &input, const eltype epsilon, EigenOptions &eigopts, vmtype2 &output, const bool residual, const int verbose_out, const int me, const int num_procs, const YA_ColI &counts, const YA_ColI &starts) { YA_DEBUG_ERROR(epsilon>0, "Epsilon must be greater than 0 for point_pca"); int verbose=verbose_out; #ifdef YA_MPI if (me!=0) verbose=0; else if (num_procs>1 && verbose>1) verbose=1; #ifdef YA_PROGMETER if (verbose) cout << "Performing point-PCA on " << num_procs << " processors.\n"; #endif #endif vector<YA_DynI> neighbors; vector<YA_Dyn<eltype> > dists; if (num_procs==1) { eneighbors(input,epsilon,neighbors,dists,verbose); int iR=input.rows(); for (ya_sizet i=0; i<iR; i++) neighbors[i].push_back(i); } else if (counts[me]>0) eneighbors(input(vmcount(starts[me],":",starts[me]+counts[me]-1),":"), input,epsilon,neighbors,dists,verbose); dists.clear(); #ifdef YA_MPI if (num_procs>1 && verbose_out>1) { #ifdef YA_PROGMETER YA_TimeKeeper mtk; if (verbose>0) { cerr << "Waiting on other procs..."; mtk.start(); } MPI_Barrier(MPI_COMM_WORLD); if (verbose>0) { mtk.end(); cerr << "Done. " << mtk << endl; } #endif if (me==0) cerr << "Neighbor Counts Stats:\n"; ya_sizet dmin, dmax; eltype dmean, dstd; ya_sizet n=neighbors.size(); YA_ColI ncounts; if (n>0) ncounts.setup(n); for (ya_sizet i=0; i<n; i++) ncounts(i)=neighbors[i].numel()-1; mpi_vstat(ncounts,dmin,dmax,dmean,dstd); if (me==0) cerr << " Min: " << dmin << " Max: " << dmax << endl << " Mean: " << dmean << " Std: " << dstd << endl << endl; } #endif output.setup(counts[me],input.cols()); #ifdef YA_PROGMETER YA_TimeKeeper tk; YA_ProgMeter pm; if (verbose) { tk.start(); pm.start("Performing Point PCA:",70,counts[0],false); } #endif #pragma omp parallel { YA_RowT column_means; YA_MatT input_cen; VM_SymMat covmat; YA_RowT eigens; ya_sizet NN=counts[me]; #pragma omp for for (ya_sizet i=0; i<NN; i++) { ya_sizet k=neighbors[i].numel(); column_means=sum(input(neighbors[i],":")/static_cast<eltype>(k)); input_cen=input(neighbors[i],":")-rowrep(column_means,k); covmat=input_cen.T()*input_cen; eigs(covmat,eigens,eigopts); output(i,":")=eigens; #ifdef YA_PROGMETER if (verbose) pm.iterate(); #endif } } neighbors.clear(); #ifdef YA_PROGMETER if (verbose) { pm.finish(); tk.end(); cerr << "Done. " << tk << ".\n"; } YA_TimeKeeper mtk; if (num_procs>1 && verbose) { cerr << "Waiting on other procs..."; mtk.start(); } #endif #ifdef YA_MPI if (num_procs>1) MPI_Barrier(MPI_COMM_WORLD); #endif #ifdef YA_PROGMETER if (num_procs>1 && verbose>0) { mtk.end(); cerr << "Done. " << mtk << endl; } #endif if (residual && counts[me]>0) { YA_VecT totals=sum(output.T()); output=output.dot_div(colrep(totals,output.cols())); ya_sizet iC=output.cols(); for (ya_sizet i=1; i<iC-1; i++) output(":",i)+=output(":",i-1); output(":",iC-1)=1.0; output=1.0-output; } }