void Pulsar::PolnProfileFit::solve () try { RealTimer clock; clock.start(); equation->solve (); clock.stop(); if (verbose) { float chisq = equation->get_solver()->get_chisq(); float nfree = equation->get_solver()->get_nfree(); cerr << "Pulsar::PolnProfileFit::solve solved in " << clock << "." " chisq=" << chisq/nfree << endl; } } catch (Error& error) { throw error += "Pulsar::PolnProfileFit::solve"; }
void dsp::Archiver::unload (const PhaseSeries* _profiles) try { if (!single_archive && archive_class_name.size() == 0) throw Error (InvalidState, "dsp::Archiver::unload", "neither Archive nor class name specified"); if (!_profiles) throw Error (InvalidState, "dsp::Archiver::unload", "Profile data not provided"); if (verbose > 2) cerr << "dsp::Archiver::unload profiles=" << _profiles << endl; if (_profiles->get_nbin() == 0 || _profiles->get_ndat_folded() == 0) { if (verbose > 2) cerr << "dsp::Archiver::unload ignoring empty sub-integration" << endl; return; } on_host( _profiles, profiles ); uint64_t ndat_folded = profiles->get_ndat_folded(); uint64_t ndat_total = profiles->get_ndat_total(); double percent = double(ndat_folded)/double(ndat_total) * 100.0; if (verbose > 2) cerr << "dsp::Archiver::unload folded " << ndat_folded << " out of " << ndat_total << " total samples: " << percent << "%" << endl; uint64_t ndat_expected = profiles->get_ndat_expected(); if (ndat_expected && ndat_expected < 0.9 * ndat_total) { /* ndat_expected is the number of samples expected to be totalled in the sub-integration. This number can possibly differ from ndat_total due to different rounding in different thread (an untested assertion). */ if (verbose > 2) cerr << "dsp::Archiver::unload ignoring incomplete sub-integration \n\t" "expected=" << ndat_expected << " total=" << ndat_total << endl; return; } if (profiles->get_integration_length() < minimum_integration_length) { cerr << "dsp::Archiver::unload ignoring " << profiles->get_integration_length() << " seconds of data" << endl; return; } if (use_single_archive) { // Generate new archive if needed if (!single_archive) { if (verbose > 2) cerr << "dsp::Archiver::unload creating new single Archive" << endl; single_archive = new_Archive(); } // refer to the single archive to which all sub-integration will be written archive = single_archive; // add the profile data add (archive, profiles); // See if we've reached max subints per file, if not, we're done for now if (subints_per_file == 0 || (archive->get_nsubint() < subints_per_file)) return; // Max subint limit reached so we need to unload this file and // start a new one next time. finish(); single_archive = 0; return; } if (!archive) archive = new_Archive(); if (verbose > 2) cerr << "dsp::Archiver::unload set Pulsar::Archive" << endl; set (archive, profiles); if (script.size()) try { if (verbose > 2) cerr << "dsp::Archiver::unload post-processing" << endl; if (!interpreter) interpreter = standard_shell(); interpreter->set( archive ); interpreter->script( script ); } catch (Error& error) { if (verbose) cerr << "dsp::Archiver::unload post-processing " << archive->get_filename() << " failed:\n" << error.get_message() << endl; return; } if (verbose > 2) cerr << "dsp::Archiver::unload archive '" << archive->get_filename() << "'" << endl; RealTimer timer; if (dsp::Operation::record_time) timer.start(); archive -> unload(); if (dsp::Operation::record_time) { timer.stop(); cerr << "dsp::Archiver::unload in " << timer << endl; } } catch (Error& error) { throw error += "dsp::Archiver::unload"; }
void Speed::runTest () { unsigned nfloat = nchan * nfft; if (!real_to_complex) nfloat *= 2; unsigned size = sizeof(float) * nfloat; if (!nloop) { nloop = (1024*1024*256) / size; if (nloop > 2000) nloop = 2000; cerr << "Speed::runTest nloop=" << nloop << endl; } dsp::Filterbank::Engine* engine = 0; dsp::Memory* memory = 0; #if HAVE_CUFFT cudaStream_t stream = 0; // cudaStreamCreate( &stream ); engine = new CUDA::FilterbankEngine (stream); memory = new CUDA::DeviceMemory; #endif if (!memory) memory = new dsp::Memory; if (!engine) throw Error (InvalidState, "Speed::runTest", "engine not set"); float* in = NULL; if (do_fwd_fft) { in = (float*) memory->do_allocate (size); memory->do_zero (in, size); } engine->scratch = (float*) memory->do_allocate (size + 4*sizeof(float)); dsp::TimeSeries ts; ts.set_state( Signal::Analytic ); dsp::Filterbank temp; temp.set_nchan (nchan); temp.set_frequency_resolution (nfft); temp.set_input (&ts); engine->setup (&temp); cerr << "entering loop" << endl; double total_time = 0; for (unsigned j=0; j<niter; j++) { RealTimer timer; timer.start (); for (unsigned i=0; i<nloop; i++) engine->perform (in); engine->finish (); timer.stop (); total_time += timer.get_elapsed(); } double time_us = total_time * 1e6 / (nloop*niter); // cerr << "time=" << time << endl; if (in) memory->do_free (in); memory->do_free (engine->scratch); double log2_nfft = log2(nfft); double log2_nchan = log2(nchan); double bwd = 2; if (nchan == 1) bwd = 1; double mflops = 5.0 * nfft * nchan * (bwd*log2_nfft + log2_nchan) / time_us; cerr << "nchan=" << nchan << " nfft=" << nfft << " time=" << time_us << "us" " log2(nfft)=" << log2_nfft << " log2(nchan)=" << log2_nchan << " mflops=" << mflops << endl; cout << nchan << " " << nfft << " " << time_us << " " << log2_nchan << " " << log2_nfft << " " << mflops << endl; }
static PyObject * solve_slae(PyObject *dummy, PyObject *args) { Perm vecPerm(nVectors); bemcluster<vec3d>* clTreeVec = new bemcluster<vec3d>(vectors, vecPerm.op_perm, 0, nVectors); clTreeVec->createClusterTree(bmin, vecPerm.op_perm, vecPerm.po_perm); const unsigned nClustersPan = clTreeVec->getncl(); std::cout << "done, " << nClustersPan << " clusters -- "; std::cout << inMB(nClustersPan * sizeof(bemcluster<vec3d>)) << " MB." << std::endl; bemblcluster<vec3d,vec3d>* blclTreeVec = new bemblcluster<vec3d,vec3d>(clTreeVec, clTreeVec); unsigned nblcksVec = 0; blclTreeVec->subdivide(clTreeVec,clTreeVec, eta * eta, nblcksVec); std::cout << "done, " << nblcksVec << " blocks -- "; std::cout << inMB(blclTreeVec->size()) << " MB." << std::endl; RealTimer timer; kernel = &callback_kernel; MATGENKERNEL MatGen(vectors, vecPerm.op_perm, kernel); BEMMatrix<vec3d,vec3d> A(nVectors, blclTreeVec); matgenGeH_omp(MatGen, nblcksVec, A.blclTree, eps_matgen, rankmax, A.blcks); { const double allmem = sizeH(A.blclTree, A.blcks); io::displayInfo(allmem, nVectors, timer.current(), sizeof(dcomp)); } // std::cout << "Agglomerating matrix ... " << std::flush; // timer.restart(); // agglH(A.blclTree, A.blcks, eps_aggl, rankmax); // std::cout << "done." << std::endl; // { // const double allmem = sizeH(A.blclTree, A.blcks); // io::displayInfo(allmem, nVectors, timer.current(), sizeof(dcomp)); // } dcomp *b = new dcomp[nVectors]; unsigned i = 0; do { b[i] = callback_vectorb(vecPerm.op_perm[i] + 1); i++; } while( i < nVectors ); dcomp *x = new dcomp[nVectors]; std::fill_n(x, nVectors, dcomp(0.,0.)); double acc = eps_gmres; unsigned steps = steps_gmres; if (GMRes(A, b, x, acc, 100, steps)) std::cout << "GMRes: iteration did not converge."; else std::cout << "GMRes converged to " << acc << " in " << steps << " steps."; std::cout << " Solution took " << timer << "." << std::endl; solution = new dcomp[nVectors]; i = 0; do { solution[i] = x[vecPerm.po_perm[i]]; i++; } while( i < nVectors ); delete clTreeVec; return Py_None; }