Beispiel #1
0
void Pulsar::PolnProfileFit::solve () try
{
  RealTimer clock;

  clock.start();

  equation->solve ();

  clock.stop();

  if (verbose)
  {
    float chisq = equation->get_solver()->get_chisq();
    float nfree = equation->get_solver()->get_nfree();
    cerr << "Pulsar::PolnProfileFit::solve solved in " << clock << "."
      " chisq=" << chisq/nfree << endl;
  }
}
catch (Error& error) {
  throw error += "Pulsar::PolnProfileFit::solve";
}
Beispiel #2
0
void dsp::Archiver::unload (const PhaseSeries* _profiles) try
{
  if (!single_archive && archive_class_name.size() == 0)
    throw Error (InvalidState, "dsp::Archiver::unload", 
        	 "neither Archive nor class name specified");

  if (!_profiles)
    throw Error (InvalidState, "dsp::Archiver::unload",
        	 "Profile data not provided");

  if (verbose > 2)
    cerr << "dsp::Archiver::unload profiles=" << _profiles << endl;
  
  if (_profiles->get_nbin() == 0 || _profiles->get_ndat_folded() == 0)
  {
    if (verbose > 2)
      cerr << "dsp::Archiver::unload ignoring empty sub-integration" << endl;
    return;
  }

  on_host( _profiles, profiles );

  uint64_t ndat_folded = profiles->get_ndat_folded();
  uint64_t ndat_total = profiles->get_ndat_total();
  double percent = double(ndat_folded)/double(ndat_total) * 100.0;

  if (verbose > 2)
    cerr << "dsp::Archiver::unload folded " << ndat_folded << " out of "
         << ndat_total << " total samples: " << percent << "%" << endl;

  uint64_t ndat_expected = profiles->get_ndat_expected();
  if (ndat_expected && ndat_expected < 0.9 * ndat_total)
  {
    /*
      ndat_expected is the number of samples expected to be totalled in
      the sub-integration.  This number can possibly differ from ndat_total
      due to different rounding in different thread (an untested assertion).
    */

    if (verbose > 2)
      cerr << "dsp::Archiver::unload ignoring incomplete sub-integration \n\t"
        "expected=" << ndat_expected << " total=" << ndat_total << endl;

    return;
  }

  if (profiles->get_integration_length() < minimum_integration_length)
  {
    cerr << "dsp::Archiver::unload ignoring " 
         << profiles->get_integration_length() << " seconds of data" << endl;

    return;
  }

  if (use_single_archive)
  {
    // Generate new archive if needed
    if (!single_archive)
    {
      if (verbose > 2)
	cerr << "dsp::Archiver::unload creating new single Archive" << endl;
      single_archive = new_Archive();
    }

    // refer to the single archive to which all sub-integration will be written
    archive = single_archive;

    // add the profile data
    add (archive, profiles);

    // See if we've reached max subints per file, if not, we're done for now
    if (subints_per_file == 0 || (archive->get_nsubint() < subints_per_file))
      return;

    // Max subint limit reached so we need to unload this file and
    // start a new one next time.
    finish();
    single_archive = 0;

    return;
  }

  if (!archive)
    archive = new_Archive();

  if (verbose > 2)
    cerr << "dsp::Archiver::unload set Pulsar::Archive" << endl;

  set (archive, profiles);

  if (script.size()) try
  {
    if (verbose > 2)
      cerr << "dsp::Archiver::unload post-processing" << endl;

    if (!interpreter)
      interpreter = standard_shell();

    interpreter->set( archive );
    interpreter->script( script );
  }
  catch (Error& error)
  {
    if (verbose)
      cerr << "dsp::Archiver::unload post-processing "
           << archive->get_filename() << " failed:\n"
           << error.get_message() << endl;
    return;
  }

  if (verbose > 2)
    cerr << "dsp::Archiver::unload archive '"
         << archive->get_filename() << "'" << endl;
    
  RealTimer timer;
  if (dsp::Operation::record_time)
    timer.start();
    
  archive -> unload();
  
  if (dsp::Operation::record_time)
  {
    timer.stop();
    cerr << "dsp::Archiver::unload in " << timer << endl;
  }

}
catch (Error& error)
{
  throw error += "dsp::Archiver::unload";
}
Beispiel #3
0
void Speed::runTest ()
{
  unsigned nfloat = nchan * nfft;
  if (!real_to_complex)
    nfloat *= 2;

  unsigned size = sizeof(float) * nfloat;

  if (!nloop)
  {
    nloop = (1024*1024*256) / size;
    if (nloop > 2000)
      nloop = 2000;
    cerr << "Speed::runTest nloop=" << nloop << endl;
  }

  dsp::Filterbank::Engine* engine = 0;
  dsp::Memory* memory = 0;

#if HAVE_CUFFT
  cudaStream_t stream = 0;
  // cudaStreamCreate( &stream );
  engine = new CUDA::FilterbankEngine (stream);
  memory = new CUDA::DeviceMemory;
#endif

  if (!memory)
    memory = new dsp::Memory;

  if (!engine)
    throw Error (InvalidState, "Speed::runTest",
		 "engine not set");

  float* in = NULL;

  if (do_fwd_fft)
  {
    in = (float*) memory->do_allocate (size);
    memory->do_zero (in, size);
  }

  engine->scratch = (float*) memory->do_allocate (size + 4*sizeof(float));

  dsp::TimeSeries ts;
  ts.set_state( Signal::Analytic );

  dsp::Filterbank temp;
  temp.set_nchan (nchan);
  temp.set_frequency_resolution (nfft);
  temp.set_input (&ts);
  engine->setup (&temp);

  cerr << "entering loop" << endl;

  double total_time = 0;

  for (unsigned j=0; j<niter; j++)
  {
    RealTimer timer;
    timer.start ();

    for (unsigned i=0; i<nloop; i++)
      engine->perform (in);

    engine->finish ();

    timer.stop ();

    total_time += timer.get_elapsed();
  }

  double time_us = total_time * 1e6 / (nloop*niter);

  // cerr << "time=" << time << endl;
  if (in)
    memory->do_free (in);

  memory->do_free (engine->scratch);

  double log2_nfft = log2(nfft);
  double log2_nchan = log2(nchan);

  double bwd = 2;
  if (nchan == 1)
    bwd = 1;

  double mflops = 5.0 * nfft * nchan * (bwd*log2_nfft + log2_nchan) / time_us;

  cerr << "nchan=" << nchan << " nfft=" << nfft << " time=" << time_us << "us"
    " log2(nfft)=" << log2_nfft << " log2(nchan)=" << log2_nchan << 
    " mflops=" << mflops << endl;

  cout << nchan << " " << nfft << " " << time_us << " "
       << log2_nchan << " " << log2_nfft << " " << mflops << endl;
}
Beispiel #4
0
static PyObject *
solve_slae(PyObject *dummy, PyObject *args)
{
  Perm vecPerm(nVectors);

  bemcluster<vec3d>* clTreeVec =
    new bemcluster<vec3d>(vectors, vecPerm.op_perm, 0, nVectors);

  clTreeVec->createClusterTree(bmin, vecPerm.op_perm, vecPerm.po_perm);
  const unsigned nClustersPan = clTreeVec->getncl();

  std::cout << "done, " << nClustersPan << " clusters -- ";
  std::cout << inMB(nClustersPan * sizeof(bemcluster<vec3d>)) << " MB." << std::endl;

  bemblcluster<vec3d,vec3d>* blclTreeVec =
    new bemblcluster<vec3d,vec3d>(clTreeVec, clTreeVec);

  unsigned nblcksVec = 0;
  blclTreeVec->subdivide(clTreeVec,clTreeVec, eta * eta, nblcksVec);
  std::cout << "done, " << nblcksVec << " blocks -- ";
  std::cout << inMB(blclTreeVec->size()) << " MB." << std::endl;


  RealTimer timer;
  kernel = &callback_kernel;
  MATGENKERNEL MatGen(vectors, vecPerm.op_perm, kernel);
  BEMMatrix<vec3d,vec3d> A(nVectors, blclTreeVec);

  matgenGeH_omp(MatGen, nblcksVec, A.blclTree, eps_matgen, rankmax, A.blcks);
  {
    const double allmem = sizeH(A.blclTree, A.blcks);
    io::displayInfo(allmem, nVectors, timer.current(), sizeof(dcomp));
  }

  // std::cout << "Agglomerating matrix ... " << std::flush;
  // timer.restart();

  // agglH(A.blclTree, A.blcks, eps_aggl, rankmax);
  // std::cout << "done." << std::endl;

  // {
  //   const double allmem = sizeH(A.blclTree, A.blcks);
  //   io::displayInfo(allmem, nVectors, timer.current(), sizeof(dcomp));
  // }

  dcomp *b = new dcomp[nVectors];

  unsigned i = 0;
  do {
    b[i] = callback_vectorb(vecPerm.op_perm[i] + 1);
    i++;
  } while( i < nVectors );

  dcomp *x = new dcomp[nVectors];
  std::fill_n(x, nVectors, dcomp(0.,0.));

  double acc = eps_gmres;
  unsigned steps = steps_gmres;

  if (GMRes(A, b, x, acc, 100, steps))
    std::cout << "GMRes: iteration did not converge.";
  else
    std::cout << "GMRes converged to " << acc << " in "
  	      << steps << " steps.";

  std::cout << " Solution took " << timer << "." << std::endl;

  solution = new dcomp[nVectors];
  i = 0;
  do {
    solution[i] = x[vecPerm.po_perm[i]];
    i++;
  } while( i < nVectors );

  delete clTreeVec;

  return Py_None;
}