float GetProgressUnitSize(int NumDataPoints, int num_cfft, long ac_fft_len) { #ifdef USE_MANUAL_CALLSTACK call_stack.enter("GetProgressUnitSize()"); #endif // A ProgressUnit is defined as the computation time of a // detection algorithm on the entire data block at a // given chirpfft pair. // Spike detection takes place for every FFT length and at // any slew rate. PoT detctors, on the other hand, may opt // to not execute if slew rate and/or FFT length limits are // not met. int i, ThisPoTLen, ThisPulsePoTLen, DummyOverlap; double NumProgressUnits; double LastChirpRate = 0.0f; double TotalSpikeProgressUnits = 0.0; double TotalGaussianProgressUnits = 0.0; double TotalPulseProgressUnits = 0.0; double TotalTripletProgressUnits = 0.0; double TotalChirpProgressUnits = 0.0; double rv; NumProgressUnits = 0.0; for (i = 0; i < num_cfft; i++) { // FFTs and spike finding TotalSpikeProgressUnits += SpikeProgressUnits(ChirpFftPairs[i].FftLen); // Autocorr FFTs and finding if ((long)ChirpFftPairs[i].FftLen == ac_fft_len) { TotalSpikeProgressUnits += SpikeProgressUnits(ChirpFftPairs[i].FftLen); } // Chirping if(ChirpFftPairs[i].ChirpRate != LastChirpRate) { TotalChirpProgressUnits += ChirpProgressUnits(); LastChirpRate = ChirpFftPairs[i].ChirpRate; } ThisPoTLen = NumDataPoints / ChirpFftPairs[i].FftLen; // Gaussians.... if (ChirpFftPairs[i].GaussFit) { TotalGaussianProgressUnits += GaussianProgressUnits(); } // Pulses and Triplets.... GetPulsePoTLen(ThisPoTLen, &ThisPulsePoTLen, &DummyOverlap); #ifdef USE_PULSE if(ChirpFftPairs[i].PulseFind) TotalPulseProgressUnits += PulseProgressUnits(ThisPulsePoTLen, ChirpFftPairs[i].FftLen - 1); #endif #ifdef USE_TRIPLET if(ThisPulsePoTLen >= PoTInfo.TripletMin && ThisPulsePoTLen <= PoTInfo.TripletMax) TotalTripletProgressUnits += TripletProgressUnits(); #endif } NumProgressUnits = TotalChirpProgressUnits + TotalSpikeProgressUnits + TotalGaussianProgressUnits + TotalPulseProgressUnits + TotalTripletProgressUnits; /* fprintf(stderr, "%f ChirpProgressUnits (%f\%)\n", TotalChirpProgressUnits, TotalChirpProgressUnits/NumProgressUnits); fprintf(stderr, "%f SpikeProgressUnits (%f\%)\n", TotalSpikeProgressUnits, TotalSpikeProgressUnits/NumProgressUnits); fprintf(stderr, "%f GaussianProgressUnits (%f\%)\n", TotalGaussianProgressUnits, TotalGaussianProgressUnits/NumProgressUnits); fprintf(stderr, "%f TripletProgressUnits (%f\%)\n", TotalTripletProgressUnits, TotalTripletProgressUnits/NumProgressUnits); fprintf(stderr, "%f PulseProgressUnits (%f\%)\n", TotalPulseProgressUnits, TotalPulseProgressUnits/NumProgressUnits); fprintf(stderr, "%f NumProgressUnits\n", NumProgressUnits); */ // Add a fudge factor of 0.01% to make sure we do not hit 100% done too soon rv=(1.0f/(float)(NumProgressUnits + NumProgressUnits * 0.0001)); #ifdef USE_MANUAL_CALLSTACK call_stack.exit(); #endif return rv; }
int analyze_pot(float *PowerSpectrum, int NumDataPoints, ChirpFftPair_t &cfft, int offset) { // This function analyses Power over Time for the current data block. // The PoT array is created by taking an array of power spectra (a // standard row-major 2D array) and extracting the PoT as column-major // data. We essentialy turn the array on its side. int retval = 0, i, FftLength=cfft.FftLen, // Current FFT length ThisPoT, // index of current PoT along the freq axis PoTLen, // complement of FFT length - determines time res PulsePoTLen, // length of PoT segment passed to pulse finders Overlap, // PoT segment overlap in bins TOffset, // index into ThisPoT of current pulse segment PulsePoTNum = 0, // the oridinal position of a pulse PoT w/in a full PoT NumPulsePoTs = 0, // the number of pulse PoTs w/in a full PoT. This is // constant regardless of FFT or PoT length and is // determined by slew rate. AdvanceBy; // the number of bins to advance for the next pulse PoT float ProgressAddFactor = 0.0, // sum of progress adds for ThisPoT ProgressPerPulsePoT = 0.0; // for local progress display bool SkipGauss = false, SkipPulse = false, SkipTriplet = false, TOffsetOK = true; static float *GaussPoT = NULL, *PulsePoT = NULL; #ifdef DEBUG_POT fprintf(stderr, "========= FftLength = %d =========\n", FftLength); #endif PoTLen = NumDataPoints / FftLength; // in bins GetPulsePoTLen(PoTLen, &PulsePoTLen, &Overlap); // in bins AdvanceBy = PulsePoTLen - Overlap; // in bins // Max limits how *slow* the slewrate can be, while Min limits how // *fast* the slewrate can be. Max is limited only by the client // memory budget. if(PulsePoTLen > PoTInfo.TripletMax || PulsePoTLen < PoTInfo.TripletMin) SkipTriplet = true; SkipGauss = !(cfft.GaussFit); SkipPulse = !(cfft.PulseFind); if(!SkipPulse || !SkipTriplet) { // NumPulsePoTs is the number of PoT segments that we pass to the pulse // detectors per frequency bin. ProgressPerPulsePoT is the inverse of // number of pulse detection segments in the entire data block, taking // into account that we skip the first (DC) frequency bin. An assumption // is made here that minimum pulse/triplet PoT length will always be // greater than 1. Otherwise, AdvanceBy can become zero and a divide by // zero can occur. The assumption is also made that FftLength is always // greater than 1! NumPulsePoTs = 1 + (PoTLen-PulsePoTLen)/AdvanceBy + ((PoTLen-PulsePoTLen)%AdvanceBy ? 1 : 0); ProgressPerPulsePoT = (float)1 / ((FftLength - 1) * NumPulsePoTs); } #ifdef DEBUG_POT fprintf(stderr, "SlewRate = %f\n", PoTInfo.SlewRate); fprintf(stderr, "PoTLen = %d\n", PoTLen); fprintf(stderr, "MaxPoTLen = %d\n", PoTInfo.MaxPoTLen); fprintf(stderr, "PoTDuration = %f\n", PoTInfo.WUDuration); fprintf(stderr, "BeamRate = %f\n", PoTInfo.BeamRate); fprintf(stderr, "PulsePoTLen = %d\n", PulsePoTLen); fprintf(stderr, "Overlap = %d\n", Overlap); fprintf(stderr, "AdvanceBy = %d\n", AdvanceBy); fprintf(stderr, "min_slew = %f\n", PoTInfo.min_slew); fprintf(stderr, "max_slew = %f\n", PoTInfo.max_slew); fprintf(stderr, "PulseOverlapFactor = %f\n", PoTInfo.PulseOverlapFactor); fprintf(stderr, "PulseBeams = %f\n", PoTInfo.PulseBeams); fprintf(stderr, "PulseThresh = %f\n", PoTInfo.PulseThresh); fprintf(stderr, "PulseMax = %d\n", PoTInfo.PulseMax); fprintf(stderr, "PulseMin = %d\n", PoTInfo.PulseMin); fprintf(stderr, "PulseFftMax = %d\n", PoTInfo.PulseFftMax); fprintf(stderr, "TripletThresh = %f\n", PoTInfo.TripletThresh); fprintf(stderr, "TripletMax = %d\n", PoTInfo.TripletMax); fprintf(stderr, "TripletMin = %d\n", PoTInfo.TripletMin); #endif #ifndef USE_PULSE SkipPulse = TRUE; static int doneprintnopulsefind = 0; if(!doneprintnopulsefind) { fprintf(stderr,"SkipPulse is set to TRUE: Not doing Pulsefinds.\n"); doneprintnopulsefind = TRUE; } #endif #ifndef USE_TRIPLET SkipTriplet = TRUE; #endif // Get memory fot the PoT arrays. The PoT array for gausian analysis is // of set size. The PoT array for pulse analysis is sized to cover // PulseBeams beams, regardless of whether this violates either the // triplet or pulse limits on array size. if(!GaussPoT) { GaussPoT = (float *)malloc_a(swi.analysis_cfg.gauss_pot_length * sizeof(float), MEM_ALIGN); if(GaussPoT == NULL) { SETIERROR(MALLOC_FAILED, "GaussPoT == NULL"); } } if(!PulsePoT) { PulsePoT = (float *)calloc_a(PoTInfo.MaxPoTLen+3, sizeof(float), MEM_ALIGN); if(PulsePoT == NULL) { SETIERROR(MALLOC_FAILED, "PulsePoT == NULL"); } } bool b_gaussStarted = false; // Look for gaussians --------------------------------------------------- if(!SkipGauss && (analysis_state.PoT_activity == POT_DOING_GAUSS || analysis_state.PoT_activity == POT_INACTIVE)) { #ifdef BOINC_APP_GRAPHICS if (!nographics()) strcpy(sah_graphics->status, "Searching for Gaussians"); #endif // If we are back from being interrupted in the middle of gaussian PoT // analysis, load state and continue. Otherwise start anew, skipping // the DC (0) bin. if(analysis_state.PoT_activity == POT_DOING_GAUSS) { ThisPoT = analysis_state.PoT_freq_bin; } else { ThisPoT = 1; // skip the DC bin on start of new cfft pair } // Initial display of local Progress / CPU time. Assumes that // we start ThisPoT at 1 each time in! #ifdef BOINC_APP_GRAPHICS if (!nographics()) sah_graphics->local_progress = ((float)ThisPoT-1)/(FftLength-1); #endif #ifdef USE_CUDA //#ifndef CUDAACC_EMULATION if(gSetiUseCudaDevice) { //TODO: remove the autocorr_fftlen test when v6 fully deprecated. bool noscore = false; //swi.analysis_cfg.autocorr_fftlen!=0 && gaussian_count!=0; //cudaAcc_Gaussfit(FftLength, best_gauss->score, noscore); //printf("GaussFitStart\r\n"); b_gaussStarted = true; // started earlier //cudaAcc_GaussfitStart(FftLength, best_gauss->score, noscore); if(PoTLen > swi.analysis_cfg.gauss_pot_length) analysis_state.FLOP_counter+=((double)NumDataPoints+swi.analysis_cfg.gauss_pot_length * (FftLength-1)); // GetFixedPoT // There are FftLength - 1 fixed PoTs for a chirp/fft pair, and for each fixed PoT full analysis would do // (1 + (PoTInfo.GaussTOffsetStop - PoTInfo.GaussTOffsetStart)) GetPeak and GetTrueMean operations. // Use (1 - PoTInfo.GaussSigma*0.09) empirically found to represent fraction of PoTs which don't // take either of the two early out paths. double CorrWt = (1.0 - PoTInfo.GaussSigma*0.09) * (FftLength - 1) * (1 + (PoTInfo.GaussTOffsetStop - PoTInfo.GaussTOffsetStart)); analysis_state.FLOP_counter+=6.0*floor(PoTInfo.GaussSigma+0.5) * CorrWt; // GetPeak analysis_state.FLOP_counter+=(double)(floor(PoTInfo.GaussSigma+0.5) * 3.911+5) * CorrWt; // GetTrueMean // Estimate one in twenty fit positions look good enough to be checked further. analysis_state.FLOP_counter+= 0.05 * (20.0*swi.analysis_cfg.gauss_pot_length+5.0) * CorrWt; // GetChiSq / 20 progress += ProgressUnitSize * GaussianProgressUnits(); progress=std::min(progress,1.0); // prevent display of > 100% fraction_done(progress,remaining); } //#endif CUDAACC_EMULATION else { #endif //USE_CUDA // loop through frequencies /* for(; ThisPoT < FftLength; ThisPoT++) { // Create PowerOfTime array for gaussian fit retval = GetFixedPoT( PowerSpectrum, NumDataPoints, FftLength, GaussPoT, swi.analysis_cfg.gauss_pot_length, ThisPoT ); if (retval) continue; retval = GaussFit(GaussPoT, FftLength, ThisPoT); if (retval) SETIERROR(retval,"from GaussFit"); progress += ProgressUnitSize * GaussianProgressUnits() / (float)(FftLength - 1); progress=std::min(progress,1.0); // prevent display of > 100% fraction_done(progress,remaining); // At the end of each frequency bin we update progress and save state. #ifdef BOINC_APP_GRAPHICS if (!nographics()) { sah_graphics->local_progress = ((float)ThisPoT)/(FftLength-1); } #endif analysis_state.PoT_freq_bin = ThisPoT; analysis_state.PoT_activity = POT_DOING_GAUSS; retval = checkpoint(); if (retval) SETIERROR(retval,"from checkpoint()"); } // end loop through frequencies */ #ifdef USE_CUDA } #endif //USE_CUDA analysis_state.PoT_freq_bin = -1; analysis_state.PoT_activity = POT_INACTIVE; } // end looking for gaussians // Look for pulses ------------------------------------------------------- if(!SkipPulse || !SkipTriplet) { #ifdef BOINC_APP_GRAPHICS if (!nographics()) { strcpy(sah_graphics->status, "Searching for Pulses / Triplets"); // init local progress for pulse search sah_graphics->local_progress = 0; } #endif // If we are back from being interrupted in the middle of pulse PoT // analysis, load state and continue. Otherwise start anew, skipping // the DC (0) bin. if(analysis_state.PoT_activity == POT_DOING_PULSE) { ThisPoT = analysis_state.PoT_freq_bin; } else { ThisPoT = 1; // skip the DC bin on start of new cfft pair } PulsePoTNum = 0; #ifdef BOINC_APP_GRAPHICS // Inital display of Local Progress if(!nographics()) { sah_graphics->local_progress = (((ThisPoT-1) * NumPulsePoTs) + PulsePoTNum) * ProgressPerPulsePoT; } #endif #ifdef USE_CUDA if(gSetiUseCudaDevice) { /* if(!SkipTriplet || !SkipPulse) // do beforehand on fftstreamX { // CUDASYNC; //printf("CalculateMean\r\n"); cudaAcc_calculate_mean(PulsePoTLen, 0, AdvanceBy, FftLength); } if(!SkipPulse) { //printf("FindPulses\r\n"); cudaAcc_find_pulses((float) best_pulse->score, PulsePoTLen, AdvanceBy, FftLength); } if(!SkipTriplet) { //printf("FindTriplets\r\n"); cudaAcc_find_triplets(PulsePoTLen, (float)PoTInfo.TripletThresh, AdvanceBy, FftLength); } */ /* timespec t1, t2; t1.tv_sec = 0; t1.tv_nsec = 5000; while(cudaEventQuery(summaxDoneEvent) != cudaSuccess) nanosleep(&t1, &t2); */ int gflags = 0; if(b_gaussStarted) { //printf("fetchGaussFitFlags\r\n"); b_gaussStarted = false; gflags = cudaAcc_fetchGaussfitFlags(FftLength, best_gauss->score); } //printf("fetchTripletAndPulseFlags\r\n"); int has_dataT = 0, has_dataP = 0; if(!SkipTriplet) has_dataT = cudaAcc_fetchTripletFlags(SkipTriplet, PulsePoTLen, AdvanceBy, FftLength, offset); if(gflags > 0) { //printf("ProcessGaussFit\r\n"); cudaAcc_processGaussFit(FftLength, best_gauss->score); } if(!SkipTriplet) { if((has_dataT & 1) && !(has_dataT & 4)) // has triplet data and no error in triplet { //printf("processTripletResults\r\n"); cudaAcc_processTripletResults(PulsePoTLen, AdvanceBy, FftLength); } analysis_state.FLOP_counter+=(10.0 + PulsePoTLen) * NumPulsePoTs * (FftLength - 1); // ! hard to estimate, so be generous and use 9 analysis_state.FLOP_counter+=810.0; // (10.0*numBinsAboveThreshold*numBinsAboveThreshold); progress += ProgressUnitSize * TripletProgressUnits(); } if(!SkipPulse) has_dataP = cudaAcc_fetchPulseFlags(SkipTriplet, PulsePoTLen, AdvanceBy, FftLength, offset); if(!SkipPulse) { if((has_dataP & 2) && !(has_dataP & 8)) // has pulse data and no error in pulse { //printf("processPulseResults\r\n"); cudaAcc_processPulseResults(PulsePoTLen, AdvanceBy, FftLength); } analysis_state.FLOP_counter+=(PulsePoTLen*0.1818181818182+400.0)*PulsePoTLen * NumPulsePoTs * (FftLength - 1); progress += ProgressUnitSize * PulseProgressUnits(PulsePoTLen, FftLength - 1); } //#ifndef CUDAACC_EMULATION //if(!SkipTriplet) // cudaAcc_fetchTripletAndPulseFlags(SkipTriplet, SkipPulse, PulsePoTLen, AdvanceBy, FftLength); progress=std::min(progress,1.0); // prevent display of > 100% fraction_done(progress,remaining); //if(!SkipTriplet) // cudaAcc_processTripletResults(PulsePoTLen, AdvanceBy, FftLength); //#endif //CUDAACC_EMULATION } // else // { #endif //USE_CUDA // loop through frequencies /* for(; ThisPoT < FftLength; ThisPoT++) { // loop through time for each frequency. PulsePoTNum is // used only for progress calculation. for(TOffset = 0, PulsePoTNum = 1, TOffsetOK = true; TOffsetOK; PulsePoTNum++, TOffset += AdvanceBy) { // Create PowerOfTime array for pulse detection. If there // are not enough points left in this PoT, adjust TOffset // to get the latest possible pulse PoT. if(TOffset + PulsePoTLen >= PoTLen) { TOffsetOK = false; TOffset = PoTLen - PulsePoTLen; } if (use_transposed_pot) { memcpy(PulsePoT, &PowerSpectrum[ThisPoT * PoTLen + TOffset], PulsePoTLen*sizeof(float)); } else { for(i = 0; i < PulsePoTLen; i++) { PulsePoT[i] = PowerSpectrum[ThisPoT + (TOffset+i) * FftLength]; } } if(!SkipTriplet) { retval = find_triplets(PulsePoT, PulsePoTLen, (float)PoTInfo.TripletThresh, TOffset, ThisPoT); if (retval) SETIERROR(retval,"from find_triplets()"); } //#ifndef CUDAACC_EMULATION if(!SkipPulse) { retval = find_pulse(PulsePoT, PulsePoTLen, (float)PoTInfo.PulseThresh, TOffset, ThisPoT ); if (retval) SETIERROR(retval,"from find_pulse()"); } //#endif //CUDAACC_EMULATION // At the end of each pulse PoT we update progress. Progress // will thus get updted several times per frequency bin. #ifdef BOINC_APP_GRAPHICS if (!nographics()) { sah_graphics->local_progress = (((ThisPoT-1) * NumPulsePoTs) + PulsePoTNum) * ProgressPerPulsePoT; } #endif if(!SkipTriplet) { progress += (ProgressUnitSize * TripletProgressUnits()) / (float)(FftLength - 1) / NumPulsePoTs; } if(!SkipPulse) { progress += (ProgressUnitSize * PulseProgressUnits(PulsePoTLen, FftLength - 1)) / (float)(FftLength - 1) / NumPulsePoTs; } progress=std::min(progress,1.0); // prevent display of > 100% fraction_done(progress,remaining); } // end loop through time for each frequency // At the end of each frequency bin we save state. analysis_state.PoT_activity = POT_DOING_PULSE; analysis_state.PoT_freq_bin = ThisPoT; retval = checkpoint(); if (retval) SETIERROR(retval,"from checkpoint()"); } // end loop through frequencies analysis_state.PoT_freq_bin = -1; analysis_state.PoT_activity = POT_INACTIVE; #ifdef USE_CUDA } #endif //USE_CUDA */ } // end looking for pulses if(b_gaussStarted) // process results late { //printf("late GaussBlock\r\n"); int flags = cudaAcc_fetchGaussfitFlags(FftLength, best_gauss->score); if(flags>0) cudaAcc_processGaussFit(FftLength, best_gauss->score); } return (retval); // no error return point }