// Make diffs from last texts std::vector<std::pair<diff_type, std::string> > make_diff() { return compute_diffs(compute_snake(textA, textB)); }
// dataset - the channel data for a particular time step // lowchan - the lowest channel number to do computations for // highchan - the highest channel number to do computations for // numSigma - the number of sigma away from the mean the difference should be rejected // ignore?_[low|high] - two ranges of channels to ignore from an RFI perspective void rfi_detection(SpecRecord dataset[], int size, int lowchan, int highchan, float numSigma, float numSigmaThresh, int ignoreA_low, int ignoreA_high, int ignoreB_low, int ignoreB_high) { FILE * finalmeanfile; FILE * finalsigmafile; FILE * sigmathreshfile; PolStatistics sigmaStat; PolStatistics * stats; PolDifferences * diffs; int i; float sigmaThreshOffXX, sigmaThreshOffYY; float sigmaThreshOnXX, sigmaThreshOnYY; finalmeanfile = fopen("finalmean.dat", "w"); fprintf(finalmeanfile, "# date offXX offYY onXX onYY\n"); finalsigmafile = fopen("finalsigma.dat", "w"); fprintf(finalsigmafile, "# date offXX offYY onXX onYY\n"); sigmathreshfile = fopen("sigmathresh.dat", "w"); fprintf(sigmathreshfile, "# offXX offYY onXX onYY\n"); printf("Requesting malloc for %lu bytes of memory\n",sizeof(PolStatistics)*size); stats = (PolStatistics *)malloc(sizeof(PolStatistics) * size); if (stats == NULL) { printf("ERROR: malloc failed in rfi_detection() !\n"); } printf("Requesting malloc for %lu bytes of memory\n",sizeof(PolDifferences)*size); diffs = (PolDifferences *)malloc(sizeof(PolDifferences) * size); if (diffs == NULL) { printf("ERROR: malloc failed in rfi_detection() !\n"); } //iterate over each time step and calculate a final sigma over the //frequency domain for (i=0; i<size; i++) { int chan; char outlierFound; SpecRecord * pRec = &(dataset[i]); PolStatistics * pStat = &(stats[i]); PolDifferences * pDiff = &(diffs[i]); //skip any timesteps flagged as bad if (pRec->flagBAD) continue; compute_diffs(pRec, pDiff, lowchan, highchan); //clear the channel flags for (chan=lowchan; chan<highchan; chan++) { pRec->flagRFI[chan] = RFI_NONE; } //now iterate over the channels //compute sigma for channels not flagged as excluded //apply sigma to exclude any channels //repeat while we still find outliers do { //calculate means and sigmas from not flagged channels compute_stats_on_diffs(pRec, pDiff, pStat, lowchan, highchan); //we now have sigma values for a particular timestep //now iterate over the frequencies and mark outliers outlierFound = FALSE; for (chan=lowchan; chan<highchan; chan++) { if ((chan >= ignoreA_low && chan <= ignoreA_high) || (chan >= ignoreB_low && chan <= ignoreB_high)) { continue; } if (pRec->flagRFI[chan] == RFI_NONE) { if ( (fabs(pStat->meanOffXX - pDiff->OffXX[chan]) > (numSigma * pStat->sigmaOffXX)) || (fabs(pStat->meanOffYY - pDiff->OffYY[chan]) > (numSigma * pStat->sigmaOffYY)) || (fabs(pStat->meanOnXX - pDiff->OnXX[chan]) > (numSigma * pStat->sigmaOnXX)) || (fabs(pStat->meanOnYY - pDiff->OnYY[chan]) > (numSigma * pStat->sigmaOnYY)) ) { pRec->flagRFI[chan] = 1; pRec->flagRFI[chan+1] = 1; outlierFound = TRUE; } } } } while (outlierFound); } //we now have a final sigma //determine the distribution of the sigmas and set thresholds compute_stats_on_stats(stats, size, &sigmaStat); sigmaThreshOffXX = sigmaStat.sigmaOffXX * numSigmaThresh + sigmaStat.meanOffXX; sigmaThreshOffYY = sigmaStat.sigmaOffYY * numSigmaThresh + sigmaStat.meanOffYY; sigmaThreshOnXX = sigmaStat.sigmaOnXX * numSigmaThresh + sigmaStat.meanOnXX; sigmaThreshOnYY = sigmaStat.sigmaOnYY * numSigmaThresh + sigmaStat.meanOnYY; fprintf(stdout, "Sigma of Sigmas: %8.6f %8.6f %8.6f %8.6f \n", sigmaStat.sigmaOffXX, sigmaStat.sigmaOffYY, sigmaStat.sigmaOnXX, sigmaStat.sigmaOnYY); fprintf(stdout, "Mean of Sigmas: %8.6f %8.6f %8.6f %8.6f \n", sigmaStat.meanOffXX, sigmaStat.meanOffYY, sigmaStat.meanOnXX, sigmaStat.meanOnYY); fprintf(sigmathreshfile, "%8.6f %8.6f %8.6f %8.6f \n", sigmaThreshOffXX, sigmaThreshOffYY, sigmaThreshOnXX, sigmaThreshOnYY); //iterate over each time step and mark RFI in the channels for (i=0; i<size; i++) { int chan; SpecRecord * pRec = &(dataset[i]); PolStatistics * pStat = &(stats[i]); PolDifferences * pDiff = &(diffs[i]); //skip any timesteps flagged as bad if (pRec->flagBAD) continue; //clear the channel flags for (chan=lowchan; chan<highchan; chan++) { pRec->flagRFI[chan] = RFI_NONE; } //do one last pass over the channels and mark the flags using the //final values of sigma and the means for (chan=lowchan; chan<highchan; chan++) { //skip over channels that are to be excluded if ((chan >= ignoreA_low && chan <= ignoreA_high) || (chan >= ignoreB_low && chan <= ignoreB_high)) { continue; } //if the sigma is very large, then the sigma clipping will not work //so check for the sigma tolerance, and mark all channels as RFI //if the threshold is exceeded. if ((pStat->sigmaOffXX > sigmaThreshOffXX) || (pStat->sigmaOffYY > sigmaThreshOffYY) || (pStat->sigmaOnXX > sigmaThreshOnXX) || (pStat->sigmaOnYY > sigmaThreshOnYY)) { pRec->flagRFI[chan] = RFI_SIGMA_EXCEEDED; continue; } //mark the channel and the next one as RFI if the difference is an outlier //flagRFI bitmask is updates with all reasons why it was RFI if (fabs(pStat->meanOffXX - pDiff->OffXX[chan]) > (numSigma * pStat->sigmaOffXX)) { pRec->flagRFI[chan] |= RFI_CALOFF_XX; pRec->flagRFI[chan+1] |= RFI_CALOFF_XX; } if (fabs(pStat->meanOffYY - pDiff->OffYY[chan]) > (numSigma * pStat->sigmaOffYY)) { pRec->flagRFI[chan] |= RFI_CALOFF_YY; pRec->flagRFI[chan+1] |= RFI_CALOFF_YY; } if (fabs(pStat->meanOnYY - pDiff->OnYY[chan]) > (numSigma * pStat->sigmaOnYY)) { pRec->flagRFI[chan] |= RFI_CALON_YY; pRec->flagRFI[chan+1] |= RFI_CALON_YY; } if (fabs(pStat->meanOnXX - pDiff->OnXX[chan]) > (numSigma * pStat->sigmaOnXX)) { pRec->flagRFI[chan] |= RFI_CALON_XX; pRec->flagRFI[chan+1] |= RFI_CALON_XX; } } //print the final sigmas and means for this time step print_sigmas(finalsigmafile, pRec, pStat); print_means(finalmeanfile, pRec, pStat); } //endfor timestep free(stats); free(diffs); fclose(finalmeanfile); fclose(finalsigmafile); fclose(sigmathreshfile); }
// Returns an array of diff chunks. // Each chunk has a type (equal, insert or erase) and the string chunk std::vector<std::pair<diff_type, std::string> > make_diff(std::string A, std::string B) { textA = A; textB = B; return compute_diffs(compute_snake(textA, textB)); }