void dsp::LoadToFold::construct () try { SingleThread::construct (); #if HAVE_CUDA bool run_on_gpu = thread_id < config->get_cuda_ndevice(); cudaStream_t stream = reinterpret_cast<cudaStream_t>( gpu_stream ); #endif if (manager->get_info()->get_detected()) { Unpacker* unpacker = manager->get_unpacker(); // detected data is handled much more efficiently in TFP order if ( config->optimal_order && unpacker->get_order_supported (TimeSeries::OrderTFP) ) { unpacker->set_output_order (TimeSeries::OrderTFP); } #if HAVE_CFITSIO #if HAVE_fits // Use callback to handle scales/offsets for read-in if (manager->get_info()->get_machine() == "FITS") { if (Operation::verbose) cerr << "Using callback to read PSRFITS file." << endl; // connect a callback bool success = false; FITSUnpacker* funp = dynamic_cast<FITSUnpacker*> ( manager->get_unpacker()); FITSFile* ffile = dynamic_cast<FITSFile*> (manager->get_input()); if (funp && ffile) { ffile->update.connect ( funp, &FITSUnpacker::set_parameters ); success = true; } else { MultiFile* mfile = dynamic_cast<MultiFile*> (manager->get_input()); if (mfile) { for (unsigned i=0; i < mfile->nfiles(); ++i) { ffile = dynamic_cast<FITSFile*> (mfile->get_files()[i].get()); if (funp && ffile) { ffile->update.connect ( funp, &FITSUnpacker::set_parameters ); success = true; } } } } if (not success) cerr << "dspsr: WARNING: FITS input input but unable to apply scales and offsets." << endl; } #endif #endif config->coherent_dedispersion = false; prepare_interchan (unpacked); build_fold (unpacked); return; } // record the number of operations in signal path unsigned noperations = operations.size(); bool report_vitals = thread_id==0 && config->report_vitals; if (manager->get_info()->get_type() != Signal::Pulsar) { // the kernel gets messed up by DM=0 sources, like PolnCal if (report_vitals) cerr << "Disabling coherent dedispersion of non-pulsar signal" << endl; config->coherent_dedispersion = false; } // the data are not detected, so set up phase coherent reduction path // NB that this does not necessarily mean coherent dedispersion. unsigned frequency_resolution = config->filterbank.get_freq_res (); if (config->coherent_dedispersion) { if (!kernel) kernel = new Dedispersion; if (frequency_resolution) { if (report_vitals) cerr << "dspsr: setting filter length to " << frequency_resolution << endl; kernel->set_frequency_resolution (frequency_resolution); } if (config->times_minimum_nfft) { if (report_vitals) cerr << "dspsr: setting filter length to minimum times " << config->times_minimum_nfft << endl; kernel->set_times_minimum_nfft (config->times_minimum_nfft); } if (config->nsmear) { if (report_vitals) cerr << "dspsr: setting smearing to " << config->nsmear << endl; kernel->set_smearing_samples (config->nsmear); } if (config->use_fft_bench) { if (report_vitals) cerr << "dspsr: using benchmarks to choose optimal FFT length" << endl; #if HAVE_CUDA if (run_on_gpu) kernel->set_optimal_fft( new OptimalFilterbank("CUDA") ); else #endif kernel->set_optimal_fft( new OptimalFFT ); } } else kernel = 0; if (!config->integration_turns && !passband) passband = new Response; Response* response = kernel.ptr(); if (config->zap_rfi) { if (!rfi_filter) rfi_filter = new RFIFilter; rfi_filter->set_input (manager); response = rfi_filter; if (kernel) { if (!response_product) response_product = new ResponseProduct; response_product->add_response (kernel); response_product->add_response (rfi_filter); response = response_product; } } if (!config->calibrator_database_filename.empty()) { dsp::PolnCalibration* polcal = new PolnCalibration; polcal-> set_database_filename (config->calibrator_database_filename); if (kernel) { if (!response_product) response_product = new ResponseProduct; response_product->add_response (polcal); response_product->add_response (kernel); response_product->set_copy_index (0); response_product->set_match_index (1); response = response_product; } } // convolved and filterbank are out of place TimeSeries* filterbanked = unpacked; // filterbank is performing channelisation if (config->filterbank.get_nchan() > 1) { // new storage for filterbank output (must be out-of-place) filterbanked = new_time_series (); #if HAVE_CUDA if (run_on_gpu) filterbanked->set_memory (device_memory); #endif config->filterbank.set_device( device_memory.ptr() ); config->filterbank.set_stream( gpu_stream ); // software filterbank constructor if (!filterbank) filterbank = config->filterbank.create(); if (!config->input_buffering) filterbank->set_buffering_policy (NULL); filterbank->set_input (unpacked); filterbank->set_output (filterbanked); if (config->filterbank.get_convolve_when() == Filterbank::Config::During) { filterbank->set_response (response); if (!config->integration_turns) filterbank->set_passband (passband); } // Get order of operations correct if (!config->filterbank.get_convolve_when() == Filterbank::Config::Before) operations.push_back (filterbank.get()); } // output of convolved will be filterbanked|unpacked TimeSeries* convolved = filterbanked; bool filterbank_after_dedisp = config->filterbank.get_convolve_when() == Filterbank::Config::Before; if (config->coherent_dedispersion && config->filterbank.get_convolve_when() != Filterbank::Config::During) { if (!convolution) convolution = new Convolution; if (!config->input_buffering) convolution->set_buffering_policy (NULL); convolution->set_response (response); if (!config->integration_turns) convolution->set_passband (passband); convolved = new_time_series(); if (filterbank_after_dedisp) { convolution->set_input (filterbanked); convolution->set_output (convolved); // out of place } else { convolution->set_input (filterbanked); convolution->set_output (convolved); // out of place } #if HAVE_CUDA if (run_on_gpu) { convolved->set_memory (device_memory); convolution->set_device (device_memory.ptr()); unsigned nchan = manager->get_info()->get_nchan() * config->filterbank.get_nchan(); if (nchan >= 16) convolution->set_engine (new CUDA::ConvolutionEngineSpectral (stream)); else convolution->set_engine (new CUDA::ConvolutionEngine (stream)); } #endif operations.push_back (convolution.get()); } if (filterbank_after_dedisp) prepare_interchan (convolved); else prepare_interchan (convolved); if (filterbank_after_dedisp && filterbank) operations.push_back (filterbank.get()); if (config->plfb_nbin) { // Set up output Archiver* archiver = new Archiver; unloader.resize(1); unloader[0] = archiver; prepare_archiver( archiver ); if (!phased_filterbank) { if (output_subints()) { Subint<PhaseLockedFilterbank> *sub_plfb = new Subint<PhaseLockedFilterbank>; if (config->integration_length) { sub_plfb->set_subint_seconds (config->integration_length); } else if (config->integration_turns) { sub_plfb->set_subint_turns (config->integration_turns); sub_plfb->set_fractional_pulses (config->fractional_pulses); } sub_plfb->set_unloader (unloader[0]); phased_filterbank = sub_plfb; } else { phased_filterbank = new PhaseLockedFilterbank; } } phased_filterbank->set_nbin (config->plfb_nbin); phased_filterbank->set_npol (config->npol); if (config->plfb_nchan) phased_filterbank->set_nchan (config->plfb_nchan); phased_filterbank->set_input (convolved); if (!phased_filterbank->has_output()) phased_filterbank->set_output (new PhaseSeries); phased_filterbank->bin_divider.set_reference_phase(config->reference_phase); // Make dummy fold instance so that polycos get created fold.resize(1); fold[0] = new dsp::Fold; if (config->folding_period) fold[0]->set_folding_period (config->folding_period); if (config->ephemerides.size() > 0) fold[0]->set_pulsar_ephemeris ( config->ephemerides[0] ); else if (config->predictors.size() > 0) fold[0]->set_folding_predictor ( config->predictors[0] ); fold[0]->set_output ( phased_filterbank->get_output() ); fold[0]->prepare ( manager->get_info() ); operations.push_back (phased_filterbank.get()); path.resize(1); path[0] = new SignalPath (operations); return; // the phase-locked filterbank does its own detection and folding } Reference::To<Fold> presk_fold; Reference::To<Archiver> presk_unload; TimeSeries * cleaned = convolved; // peform zapping based on the results of the SKFilterbank if (config->sk_zap) { if (config->nosk_too) { Detection* presk_detect = new Detection; // set up an out-of-place detection to effect a fork in the signal path TimeSeries* presk_detected = new_time_series(); #if HAVE_CUDA if (run_on_gpu) presk_detected->set_memory (device_memory); #endif presk_detect->set_input (convolved); presk_detect->set_output (presk_detected); configure_detection (presk_detect, 0); operations.push_back (presk_detect); presk_unload = new Archiver; presk_unload->set_extension( ".nosk" ); prepare_archiver( presk_unload ); build_fold (presk_fold, presk_unload); presk_fold->set_input( presk_detected ); #if HAVE_CUDA if (run_on_gpu) presk_fold->set_engine (new CUDA::FoldEngine(stream, false)); #endif presk_fold->prepare( manager->get_info() ); presk_fold->reset(); operations.push_back (presk_fold.get()); } cleaned = new_time_series(); if (!skestimator) skestimator = new SpectralKurtosis(); if (!config->input_buffering) skestimator->set_buffering_policy (NULL); skestimator->set_input (convolved); skestimator->set_output (cleaned); skestimator->set_M (config->sk_m); #if HAVE_CUDA if (run_on_gpu) { // for input buffering convolved->set_engine (new CUDA::TimeSeriesEngine (device_memory)); cleaned->set_memory (device_memory); skestimator->set_engine (new CUDA::SpectralKurtosisEngine (device_memory)); } #endif skestimator->set_thresholds (config->sk_m, config->sk_std_devs); if (config->sk_chan_start > 0 && config->sk_chan_end < config->filterbank.get_nchan()) skestimator->set_channel_range (config->sk_chan_start, config->sk_chan_end); skestimator->set_options (config->sk_no_fscr, config->sk_no_tscr, config->sk_no_ft); operations.push_back (skestimator.get()); } // Cyclic spectrum also detects and folds if (config->cyclic_nchan) { build_fold(cleaned); return; } if (!detect) detect = new Detection; TimeSeries* detected = cleaned; detect->set_input (cleaned); detect->set_output (cleaned); configure_detection (detect, noperations); operations.push_back (detect.get()); if (config->npol == 3) { detected = new_time_series (); detect->set_output (detected); } else if (config->fourth_moment) { if (Operation::verbose) cerr << "LoadToFold::construct fourth order moments" << endl; FourthMoment* fourth = new FourthMoment; operations.push_back (fourth); fourth->set_input (detected); detected = new_time_series (); fourth->set_output (detected); } build_fold (detected); if (presk_fold) { // presk fold and unload are pushed back after the primary ones are built fold.push_back( presk_fold ); unloader.push_back( presk_unload.get() ); } if (config->sk_fold) { PhaseSeriesUnloader* unload = get_unloader( get_nfold() ); unload->set_extension( ".sk" ); Reference::To<Fold> skfold; build_fold (skfold, unload); skfold->set_input( cleaned); skfold->prepare( manager->get_info() ); skfold->reset(); fold.push_back( skfold ); operations.push_back( skfold.get() ); } } catch (Error& error) { throw error += "dsp::LoadToFold::construct"; }
void dsp::LoadToFITS::construct () try { // sets operations to zero length then adds IOManger/unpack SingleThread::construct (); bool run_on_gpu = false; #if HAVE_CUDA run_on_gpu = thread_id < config->get_cuda_ndevice(); cudaStream_t stream = reinterpret_cast<cudaStream_t>( gpu_stream ); #endif /* The following lines "wire up" the signal path, using containers to communicate the data between operations. */ // set up for optimal memory usage pattern Unpacker* unpacker = manager->get_unpacker(); if (!config->dedisperse && unpacker->get_order_supported (config->order)) unpacker->set_output_order (config->order); // get basic information about the observation Observation* obs = manager->get_info(); const unsigned nchan = obs->get_nchan (); const unsigned npol = obs->get_npol (); const unsigned ndim = obs->get_ndim (); const double rate = obs->get_rate () ; if (verbose) { cerr << "Source = " << obs->get_source() << endl; cerr << "Frequency = " << obs->get_centre_frequency() << endl; cerr << "Bandwidth = " << obs->get_bandwidth() << endl; cerr << "Channels = " << nchan << endl; cerr << "Sampling rate = " << rate << endl; cerr << "State = " << tostring(obs->get_state()) <<endl; } obs->set_dispersion_measure( config->dispersion_measure ); unsigned fb_nchan = config->filterbank.get_nchan(); unsigned nsample; double tsamp, samp_per_fb; unsigned tres_factor; double factor = obs->get_state() == Signal::Nyquist? 0.5 : 1.0; if (fb_nchan > 0) { // Strategy will be to tscrunch from Nyquist resolution to desired reso. // voltage samples per filterbank sample samp_per_fb = config->tsamp * rate; if (verbose) cerr << "voltage samples per filterbank sample="<<samp_per_fb << endl; // correction for number of samples per filterbank channel tres_factor = round(factor*samp_per_fb/fb_nchan); tsamp = tres_factor/factor*fb_nchan/rate; // voltage samples per output block nsample = round(samp_per_fb * config->nsblk); } else { samp_per_fb = 1.0; tres_factor = round(rate * config->tsamp); tsamp = tres_factor/factor * 1/rate; nsample = config->nsblk * tres_factor; } cerr << "digifits: requested tsamp=" << config->tsamp << " rate=" << rate << endl << " actual tsamp=" << tsamp << " (tscrunch=" << tres_factor << ")" << endl; if (verbose) cerr << "digifits: nsblk=" << config->nsblk << endl; // the unpacked input will occupy nbytes_per_sample double nbytes_per_sample = sizeof(float) * nchan * npol * ndim; double MB = 1024.0 * 1024.0; // ideally, block size would be a full output block, but this is too large // pick a nice fraction that will divide evently into maximum RAM // NB this doesn't account for copies (yet) if (verbose) cerr << "digifits: nsample * nbytes_per_sample=" << nsample * nbytes_per_sample << " config->maximum_RAM=" << config->maximum_RAM << endl; while (nsample * nbytes_per_sample > config->maximum_RAM) nsample /= 2; if (verbose) cerr << "digifits: block_size=" << (nbytes_per_sample*nsample)/MB << " MB " << "(" << nsample << " samp)" << endl; manager->set_block_size ( nsample ); // if running on multiple GPUs, make nsblk such that no buffering is // required if ((run_on_gpu) and (config->get_total_nthread() > 1)) { config->nsblk = nsample / samp_per_fb; if (verbose) cerr << "digifits: due to GPU multi-threading, setting nsblk="<<config->nsblk << endl; } TimeSeries* timeseries = unpacked; #if HAVE_CUDA if (run_on_gpu) { timeseries->set_memory (device_memory); timeseries->set_engine (new CUDA::TimeSeriesEngine (device_memory)); } #endif if (!obs->get_detected()) { cerr << "digifits: input data not detected" << endl; // if no filterbank specified if ((fb_nchan == 0) && (nchan == 1)) { throw Error(InvalidParam,"dsp::LoadToFITS::construct", "must specify filterbank scheme if single channel data"); } if ((config->coherent_dedisp) && (config->dispersion_measure != 0.0)) { cerr << "digifits: performing coherent dedispersion" << endl; kernel = new Dedispersion; kernel->set_dispersion_measure( config->dispersion_measure ); unsigned frequency_resolution = config->filterbank.get_freq_res (); cerr << "digifits: config->filterbank.get_freq_res= " << frequency_resolution << endl; if (frequency_resolution) { cerr << "digifits: setting filter length to " << frequency_resolution << endl; //kernel->set_frequency_resolution (frequency_resolution); kernel -> set_times_minimum_nfft (frequency_resolution); } } else { if (config->dispersion_measure != 0.0) cerr << "digifits: performing incoherent dedispersion" << endl; config->coherent_dedisp = false; } // filterbank is performing channelisation if (config->filterbank.get_nchan() > 1) { // If user specifies -FN:D, enable coherent dedispersion if (config->filterbank.get_convolve_when() == Filterbank::Config::During) { // during is the only option for filterbank config->filterbank.set_convolve_when( Filterbank::Config::During ); } else { config->coherent_dedisp = false; } #if HAVE_CUDA if (run_on_gpu) { config->filterbank.set_device ( device_memory.ptr() ); config->filterbank.set_stream ( gpu_stream ); } #endif filterbank = config->filterbank.create (); filterbank->set_nchan( config->filterbank.get_nchan() ); filterbank->set_input( timeseries ); filterbank->set_output( timeseries = new_TimeSeries() ); #if HAVE_CUDA if (run_on_gpu) timeseries->set_memory (device_memory); #endif if (config->coherent_dedisp && kernel) filterbank->set_response( kernel ); if ( !config->coherent_dedisp ) { unsigned freq_res = config->filterbank.get_freq_res(); if (freq_res > 1) filterbank->set_frequency_resolution ( freq_res ); } operations.push_back( filterbank.get() ); } // if convolution does not happen during filterbanking if (config->coherent_dedisp && config->filterbank.get_convolve_when() != Filterbank::Config::During) { cerr << "digifits: creating convolution operation" << endl; if (!convolution) convolution = new Convolution; if (!config->input_buffering) convolution->set_buffering_policy (NULL); convolution->set_response (kernel); //if (!config->integration_turns) // convolution->set_passband (passband); convolution->set_input (timeseries); convolution->set_output (timeseries = new_TimeSeries() ); // out of place #if HAVE_CUDA if (run_on_gpu) { timeseries->set_memory (device_memory); convolution->set_device (device_memory.ptr()); unsigned nchan = manager->get_info()->get_nchan(); if (fb_nchan) nchan *= fb_nchan; if (nchan >= 16) convolution->set_engine (new CUDA::ConvolutionEngineSpectral (stream)); else convolution->set_engine (new CUDA::ConvolutionEngine (stream)); } #endif operations.push_back (convolution.get()); } if (verbose) cerr << "digifits: creating detection operation" << endl; Detection* detection = new Detection; detection->set_input ( timeseries ); // always use coherence for GPU, pscrunch later if needed if (run_on_gpu) { #ifdef HAVE_CUDA if (npol == 2) { detection->set_output_state (Signal::Coherence); detection->set_engine (new CUDA::DetectionEngine(stream) ); detection->set_output_ndim (2); detection->set_output (timeseries); } else { detection->set_output_state (Signal::Intensity); detection->set_engine (new CUDA::DetectionEngine(stream) ); detection->set_output (timeseries = new_TimeSeries()); cerr << "detection->set_output(timeseries = newTimeSeries())" << endl; detection->set_output_ndim (1); timeseries->set_memory (device_memory); } #endif } else { switch (config->npol) { case 1: detection->set_output_state (Signal::Intensity); //detected = new_TimeSeries(); break; case 2: detection->set_output_state (Signal::PPQQ); //detected = new_TimeSeries(); break; case 4: detection->set_output_state (Signal::Coherence); // use this to avoid copies -- seem to segfault in multi-threaded //detection->set_output_ndim (2); break; default: throw Error(InvalidParam,"dsp::LoadToFITS::construct", "invalid polarization specified"); } detection->set_output (timeseries); } operations.push_back ( detection ); } #if HAVE_CUDA if (run_on_gpu) { // to support input buffering timeseries->set_engine (new CUDA::TimeSeriesEngine (device_memory)); } #endif TScrunch* tscrunch = new TScrunch; tscrunch->set_factor ( tres_factor ); tscrunch->set_input ( timeseries ); tscrunch->set_output ( timeseries = new_TimeSeries() ); #if HAVE_CUDA if ( run_on_gpu ) { tscrunch->set_engine ( new CUDA::TScrunchEngine(stream) ); timeseries->set_memory (device_memory); } #endif operations.push_back( tscrunch ); #if HAVE_CUDA if (run_on_gpu) { TransferCUDA* transfer = new TransferCUDA (stream); transfer->set_kind (cudaMemcpyDeviceToHost); transfer->set_input( timeseries ); transfer->set_output( timeseries = new_TimeSeries() ); operations.push_back (transfer); } #endif // need to do PolnReshape if have done on GPU (because uses the // hybrid npol=2, ndim=2 for the Stokes parameters) if (run_on_gpu) { PolnReshape* reshape = new PolnReshape; switch (config->npol) { case 4: reshape->set_state ( Signal::Coherence ); break; case 2: reshape->set_state ( Signal::PPQQ ); break; case 1: reshape->set_state ( Signal::Intensity ); break; default: throw Error(InvalidParam,"dsp::LoadToFITS::construct", "invalid polarization specified"); } reshape->set_input (timeseries ); reshape->set_output ( timeseries = new_TimeSeries() ); operations.push_back(reshape); } //else if (config->npol == 4) else if (false) { PolnReshape* reshape = new PolnReshape; reshape->set_state ( Signal::Coherence ); reshape->set_input (timeseries ); reshape->set_output ( timeseries = new_TimeSeries() ); operations.push_back (reshape); } if ( config->dedisperse ) { //if (verbose) cerr << "digifits: removing dispersion delays" << endl; SampleDelay* delay = new SampleDelay; delay->set_input (timeseries); delay->set_output (timeseries); delay->set_function (new Dedispersion::SampleDelay); operations.push_back( delay ); } // only do pscrunch for detected data -- NB always goes to Intensity bool do_pscrunch = (obs->get_npol() > 1) && (config->npol==1) && (obs->get_detected()); if (do_pscrunch) { //if (verbose) cerr << "digifits: creating pscrunch transformation" << endl; PScrunch* pscrunch = new PScrunch; pscrunch->set_input (timeseries); pscrunch->set_output (timeseries); operations.push_back( pscrunch ); } if (verbose) cerr << "digifits: creating output bitseries container" << endl; BitSeries* bitseries = new BitSeries; if (verbose) cerr << "digifits: creating PSRFITS digitizer with nbit=" << config->nbits << endl; FITSDigitizer* digitizer = new FITSDigitizer (config->nbits); digitizer->set_input (timeseries); digitizer->set_output (bitseries); // PSRFITS allows us to save the reference spectrum in each output block // "subint", so we can take advantage of this to store the exect // reference spectrum for later use. By default, we will rescale the // spectrum using values for exactly one block (nsblk samples). This // potentially improves the dynamic range, but makes the observaiton more // subject to transiennts. By calling set_rescale_nblock(N), the path // will keep a running mean/scale for N sample blocks. This is presented // to the user through rescale_seconds, which will choose the appropriate // block length to approximate the requested time interval. digitizer->set_rescale_samples (config->nsblk); if (config->rescale_constant) { cerr << "digifits: holding scales and offsets constant" << endl; digitizer->set_rescale_constant (true); } else if (config->rescale_seconds > 0) { double tblock = config->tsamp * config->nsblk; unsigned nblock = unsigned ( config->rescale_seconds/tblock + 0.5 ); if (nblock < 1) nblock = 1; digitizer->set_rescale_nblock (nblock); cerr << "digifits: using "<<nblock<<" blocks running mean for scales and constant ("<<tblock*nblock<<") seconds"<<endl; } operations.push_back( digitizer ); if (verbose) cerr << "digifits: creating PSRFITS output file" << endl; const char* output_filename = 0; if (!config->output_filename.empty()) output_filename = config->output_filename.c_str(); FITSOutputFile* outputfile = new FITSOutputFile (output_filename); outputfile->set_nsblk (config->nsblk); outputfile->set_nbit (config->nbits); outputfile->set_max_length (config->integration_length); outputFile = outputfile; outputFile->set_input (bitseries); operations.push_back( outputFile.get() ); // add a callback for the PSRFITS reference spectrum digitizer->update.connect ( dynamic_cast<FITSOutputFile*> (outputFile.get()), &FITSOutputFile::set_reference_spectrum); } catch (Error& error) { throw error += "dsp::LoadToFITS::construct"; }