void print_histogram(std::ostream & os, FC::flow_complex<nt, st> fc) { auto const INF = std::numeric_limits<nt>::infinity(); auto min_incidence = get_min_incidence(fc); auto & a_ptr = std::get<0>(min_incidence); auto & b_ptr = std::get<1>(min_incidence); auto & min_ratio = std::get<2>(min_incidence); using fc_type = FC::flow_complex<nt, st>; auto print_hist = [&os] (nt ratio, fc_type const& fc) { auto hist = compute_hist(fc); os << ratio; for (auto & p : hist) os << " " << p.second; os << std::endl; }; print_hist(1, fc); // print initial fc while (INF != min_ratio) { fc = reduce(std::move(fc), a_ptr, b_ptr); print_hist(min_ratio, fc); // prepare next iteration min_incidence = get_min_incidence(fc); } }
/* Constructor ------------------------------------------------------------- */ LeafNode::LeafNode(const std::vector < MultidimArray<double> > &leafFeatures, int discrete_levels) { __discreteLevels = discrete_levels; K = leafFeatures.size(); if (__discreteLevels==0) { // This is a dummy node for features that cannot classify MultidimArray<int> newBins(1); A1D_ELEM(newBins,0)=0; Histogram1D hist; hist.resize(1); A1D_ELEM(hist,0)=1; IrregularHistogram1D irregHist; for (int k=0; k<K; k++) { irregHist.init(hist, newBins); irregHist.selfNormalize(); __leafPDF.push_back(irregHist); } } else { // Compute the minimum and maximum of each class double minval=0., maxval=0.; for(int k=0; k<K; k++) { double minvalk=0., maxvalk=0.; leafFeatures[k].computeDoubleMinMax(minvalk, maxvalk); if (k==0) { minval=minvalk; maxval=maxvalk; } else { minval=std::min(minval,minvalk); maxval=std::max(maxval,maxvalk); } } if (minval==maxval) { __discreteLevels=0; return; } // Compute the PDF of each class std::vector<Histogram1D> hist(K); for (int k=0; k<K; k++) { // There is variation of this feature for this class compute_hist(leafFeatures[k], hist[k], minval, maxval, 100); hist[k] += 1; // Apply Laplace correction } // Split the histograms into discrete_level (power of 2) bins std::queue< Matrix1D<int> > intervals, splittedIntervals; Matrix1D<int> limits(2); VECTOR_R2(limits,0,99); intervals.push(limits); int imax=ROUND(log2(__discreteLevels)); for (int i=0; i<imax; i++) { // Split all the intervals in the queue while (!intervals.empty()) { Matrix1D<int> currentInterval = intervals.front(); intervals.pop(); int lsplit = splitHistogramsUsingEntropy(hist, currentInterval(0), currentInterval(1)); VECTOR_R2(limits,currentInterval(0),lsplit); splittedIntervals.push(limits); VECTOR_R2(limits,lsplit+1, currentInterval(1)); splittedIntervals.push(limits); } // Copy the splitted intervals to the interval list while (!splittedIntervals.empty()) { intervals.push(splittedIntervals.front()); splittedIntervals.pop(); } } // Compute the bins of the split MultidimArray<int> newBins(__discreteLevels); imax=intervals.size(); for (int i=0; i<imax; i++) { A1D_ELEM(newBins,i) = intervals.front()(1); intervals.pop(); } // Compute now the irregular histograms IrregularHistogram1D irregHist; for (int k=0; k<K; k++) { irregHist.init(hist[k], newBins); irregHist.selfNormalize(); __leafPDF.push_back(irregHist); } } }
//majorAxis and minorAxis is the estimated particle size in px void ProgSortByStatistics::processInprocessInputPrepareSPTH(MetaData &SF, bool trained) { //#define DEBUG PCAMahalanobisAnalyzer tempPcaAnalyzer0; PCAMahalanobisAnalyzer tempPcaAnalyzer1; PCAMahalanobisAnalyzer tempPcaAnalyzer2; PCAMahalanobisAnalyzer tempPcaAnalyzer3; PCAMahalanobisAnalyzer tempPcaAnalyzer4; //Morphology tempPcaAnalyzer0.clear(); //Signal to noise ratio tempPcaAnalyzer1.clear(); tempPcaAnalyzer2.clear(); tempPcaAnalyzer3.clear(); //Histogram analysis, to detect black points and saturated parts tempPcaAnalyzer4.clear(); double sign = 1;//;-1; int numNorm = 3; int numDescriptors0=numNorm; int numDescriptors2=4; int numDescriptors3=11; int numDescriptors4 = 10; MultidimArray<float> v0(numDescriptors0); MultidimArray<float> v2(numDescriptors2); MultidimArray<float> v3(numDescriptors3); MultidimArray<float> v4(numDescriptors4); if (verbose>0) { std::cout << " Sorting particle set by new xmipp method..." << std::endl; } int nr_imgs = SF.size(); if (verbose>0) init_progress_bar(nr_imgs); int c = XMIPP_MAX(1, nr_imgs / 60); int imgno = 0, imgnoPCA=0; bool thereIsEnable=SF.containsLabel(MDL_ENABLED); bool first=true; // We assume that at least there is one particle size_t Xdim, Ydim, Zdim, Ndim; getImageSize(SF,Xdim,Ydim,Zdim,Ndim); //Initialization: MultidimArray<double> nI, modI, tempI, tempM, ROI; MultidimArray<bool> mask; nI.resizeNoCopy(Ydim,Xdim); modI.resizeNoCopy(Ydim,Xdim); tempI.resizeNoCopy(Ydim,Xdim); tempM.resizeNoCopy(Ydim,Xdim); mask.resizeNoCopy(Ydim,Xdim); mask.initConstant(true); MultidimArray<double> autoCorr(2*Ydim,2*Xdim); MultidimArray<double> smallAutoCorr; Histogram1D hist; Matrix2D<double> U,V,temp; Matrix1D<double> D; MultidimArray<int> radial_count; MultidimArray<double> radial_avg; Matrix1D<int> center(2); MultidimArray<int> distance; int dim; center.initZeros(); v0.initZeros(numDescriptors0); v2.initZeros(numDescriptors2); v3.initZeros(numDescriptors3); v4.initZeros(numDescriptors4); ROI.resizeNoCopy(Ydim,Xdim); ROI.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(ROI) { double temp = std::sqrt(i*i+j*j); if ( temp < (Xdim/2)) A2D_ELEM(ROI,i,j)= 1; else A2D_ELEM(ROI,i,j)= 0; } Image<double> img; FourierTransformer transformer(FFTW_BACKWARD); FOR_ALL_OBJECTS_IN_METADATA(SF) { if (thereIsEnable) { int enabled; SF.getValue(MDL_ENABLED,enabled,__iter.objId); if ( (enabled==-1) ) { imgno++; continue; } } img.readApplyGeo(SF,__iter.objId); if (targetXdim!=-1 && targetXdim!=XSIZE(img())) selfScaleToSize(LINEAR,img(),targetXdim,targetXdim,1); MultidimArray<double> &mI=img(); mI.setXmippOrigin(); mI.statisticsAdjust(0,1); mask.setXmippOrigin(); //The size of v1 depends on the image size and must be declared here int numDescriptors1 = XSIZE(mI)/2; //=100; MultidimArray<float> v1(numDescriptors1); v1.initZeros(numDescriptors1); double var = 1; normalize(transformer,mI,tempI,modI,0,var,mask); modI.setXmippOrigin(); tempI.setXmippOrigin(); nI = sign*tempI*(modI*modI); tempM = (modI*modI); A1D_ELEM(v0,0) = (tempM*ROI).sum(); int index = 1; var+=2; while (index < numNorm) { normalize(transformer,mI,tempI,modI,0,var,mask); modI.setXmippOrigin(); tempI.setXmippOrigin(); nI += sign*tempI*(modI*modI); tempM += (modI*modI); A1D_ELEM(v0,index) = (tempM*ROI).sum(); index++; var+=2; } nI /= tempM; tempPcaAnalyzer0.addVector(v0); nI=(nI*ROI); auto_correlation_matrix(mI,autoCorr); if (first) { radialAveragePrecomputeDistance(autoCorr, center, distance, dim); first=false; } fastRadialAverage(autoCorr, distance, dim, radial_avg, radial_count); for (int n = 0; n < numDescriptors1; ++n) A1D_ELEM(v1,n)=(float)DIRECT_A1D_ELEM(radial_avg,n); tempPcaAnalyzer1.addVector(v1); #ifdef DEBUG //String name = "000005@Images/Extracted/run_002/extra/BPV_1386.stk"; String name = "000010@Images/Extracted/run_001/extra/KLH_Dataset_I_Training_0028.stk"; //String name = "001160@Images/Extracted/run_001/DefaultFamily5"; std::cout << img.name() << std::endl; if (img.name()==name2) { FileName fpName = "test_1.txt"; mI.write(fpName); fpName = "test_2.txt"; nI.write(fpName); fpName = "test_3.txt"; tempM.write(fpName); fpName = "test_4.txt"; ROI.write(fpName); //exit(1); } #endif nI.binarize(0); int im = labelImage2D(nI,nI,8); compute_hist(nI, hist, 0, im, im+1); size_t l; int k,i,j; hist.maxIndex(l,k,i,j); A1D_ELEM(hist,j)=0; hist.maxIndex(l,k,i,j); nI.binarizeRange(j-1,j+1); double x0=0,y0=0,majorAxis=0,minorAxis=0,ellipAng=0; size_t area=0; fitEllipse(nI,x0,y0,majorAxis,minorAxis,ellipAng,area); A1D_ELEM(v2,0)=majorAxis/((img().xdim) ); A1D_ELEM(v2,1)=minorAxis/((img().xdim) ); A1D_ELEM(v2,2)= (fabs((img().xdim)/2-x0)+fabs((img().ydim)/2-y0))/((img().xdim)/2); A1D_ELEM(v2,3)=area/( (double)((img().xdim)/2)*((img().ydim)/2) ); for (int n=0 ; n < numDescriptors2 ; n++) { if ( std::isnan(std::abs(A1D_ELEM(v2,n)))) A1D_ELEM(v2,n)=0; } tempPcaAnalyzer2.addVector(v2); //mI.setXmippOrigin(); //auto_correlation_matrix(mI*ROI,autoCorr); //auto_correlation_matrix(nI,autoCorr); autoCorr.window(smallAutoCorr,-5,-5, 5, 5); smallAutoCorr.copy(temp); svdcmp(temp,U,D,V); for (int n = 0; n < numDescriptors3; ++n) A1D_ELEM(v3,n)=(float)VEC_ELEM(D,n); //A1D_ELEM(v3,n)=(float)VEC_ELEM(D,n)/VEC_ELEM(D,0); tempPcaAnalyzer3.addVector(v3); double minVal=0.; double maxVal=0.; mI.computeDoubleMinMax(minVal,maxVal); compute_hist(mI, hist, minVal, maxVal, 100); for (int n=0 ; n <= numDescriptors4-1 ; n++) { A1D_ELEM(v4,n)= (hist.percentil((n+1)*10)); } tempPcaAnalyzer4.addVector(v4); #ifdef DEBUG if (img.name()==name1) { FileName fpName = "test.txt"; mI.write(fpName); fpName = "test3.txt"; nI.write(fpName); } #endif imgno++; imgnoPCA++; if (imgno % c == 0 && verbose>0) progress_bar(imgno); } tempPcaAnalyzer0.evaluateZScore(2,20,trained); tempPcaAnalyzer1.evaluateZScore(2,20,trained); tempPcaAnalyzer2.evaluateZScore(2,20,trained); tempPcaAnalyzer3.evaluateZScore(2,20,trained); tempPcaAnalyzer4.evaluateZScore(2,20,trained); pcaAnalyzer.push_back(tempPcaAnalyzer0); pcaAnalyzer.push_back(tempPcaAnalyzer1); pcaAnalyzer.push_back(tempPcaAnalyzer1); pcaAnalyzer.push_back(tempPcaAnalyzer3); pcaAnalyzer.push_back(tempPcaAnalyzer4); }
void ProgSortByStatistics::processInputPrepare(MetaData &SF) { PCAMahalanobisAnalyzer tempPcaAnalyzer; tempPcaAnalyzer.clear(); Image<double> img; MultidimArray<double> img2; MultidimArray<int> radial_count; MultidimArray<double> radial_avg; Matrix1D<int> center(2); center.initZeros(); if (verbose>0) std::cout << " Processing training set ..." << std::endl; int nr_imgs = SF.size(); if (verbose>0) init_progress_bar(nr_imgs); int c = XMIPP_MAX(1, nr_imgs / 60); int imgno = 0, imgnoPCA=0; MultidimArray<float> v; MultidimArray<int> distance; int dim; bool thereIsEnable=SF.containsLabel(MDL_ENABLED); bool first=true; FOR_ALL_OBJECTS_IN_METADATA(SF) { if (thereIsEnable) { int enabled; SF.getValue(MDL_ENABLED,enabled,__iter.objId); if (enabled==-1) continue; } img.readApplyGeo(SF,__iter.objId); if (targetXdim!=-1 && targetXdim!=XSIZE(img())) selfScaleToSize(LINEAR,img(),targetXdim,targetXdim,1); MultidimArray<double> &mI=img(); mI.setXmippOrigin(); mI.statisticsAdjust(0,1); // Overall statistics Histogram1D hist; compute_hist(mI,hist,-4,4,31); // Radial profile img2.resizeNoCopy(mI); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img2) { double val=DIRECT_MULTIDIM_ELEM(mI,n); DIRECT_MULTIDIM_ELEM(img2,n)=val*val; } if (first) { radialAveragePrecomputeDistance(img2, center, distance, dim); first=false; } fastRadialAverage(img2, distance, dim, radial_avg, radial_count); // Build vector v.initZeros(XSIZE(hist)+XSIZE(img2)/2); int idx=0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(hist) v(idx++)=(float)DIRECT_A1D_ELEM(hist,i); for (size_t i=0; i<XSIZE(img2)/2; i++) v(idx++)=(float)DIRECT_A1D_ELEM(radial_avg,i); tempPcaAnalyzer.addVector(v); if (imgno % c == 0 && verbose>0) progress_bar(imgno); imgno++; imgnoPCA++; } if (verbose>0) progress_bar(nr_imgs); MultidimArray<double> vavg,vstddev; tempPcaAnalyzer.computeStatistics(vavg,vstddev); tempPcaAnalyzer.evaluateZScore(2,20,false); pcaAnalyzer.insert(pcaAnalyzer.begin(), tempPcaAnalyzer); }