Esempio n. 1
0
File: tri2d.c Progetto: H2Lib/H2Lib
ptri2d
read_tri2d(const char *name)
{

  FILE     *in;
  ptri2d    t2;
  real(*x)[2];
  uint(*e)[2];
  uint(*t)[3];
  uint     *xb;
  uint     *eb;
  uint      i;

  uint      vertices, edges, triangles;
  uint      items;
  char      buf[80], *res;

  in = fopen(name, "r");
  if (!in) {
    (void) fprintf(stderr, "Could not open file \"%s\" for reading\n", name);
    return 0;
  }

  res = fgets(buf, BUFSIZE, in);
  assert(res != NULL);
  while (!feof(in) && buf[0] == '#') {
    res = fgets(buf, 80, in);
    assert(res != NULL);
  }
  items = sscanf(buf, "%u %u %u", &vertices, &edges, &triangles);
  if (items != 3) {
    (void) fprintf(stderr, "Could not get sizes from file \"%s\"\n", name);
    (void) fclose(in);
    return 0;
  }

  t2 = new_tri2d(vertices, edges, triangles);
  x = t2->x;
  e = t2->e;
  t = t2->t;
  xb = t2->xb;
  eb = t2->eb;

  /*vertices */
  for (i = 0; i < vertices; i++) {
    res = fgets(buf, 80, in);
    assert(res != NULL);
    while (!feof(in) && buf[0] == '#') {
      res = fgets(buf, 80, in);
      assert(res != NULL);
    }
    items = sscanf(buf, "%" SCANF_PREFIX "f %" SCANF_PREFIX "f %u", x[i],
		   x[i] + 1, xb + i);
    if (items != 3) {
      (void) fprintf(stderr, "Could not read vertex %u from file \"%s\"\n", i,
		     name);
      del_tri2d(t2);
      (void) fclose(in);
      return 0;
    }
  }
  /*edges */
  for (i = 0; i < edges; i++) {
    res = fgets(buf, 80, in);
    assert(res != NULL);
    while (!feof(in) && buf[0] == '#') {
      res = fgets(buf, 80, in);
      assert(res != NULL);
    }
    items = sscanf(buf, "%u %u %u", e[i], e[i] + 1, eb + i);
    if (items != 3) {
      (void) fprintf(stderr, "Could not read edge %u from file \"%s\"\n", i,
		     name);
      del_tri2d(t2);
      (void) fclose(in);
      return 0;
    }
  }
  /*triangles */
  for (i = 0; i < triangles; i++) {
    res = fgets(buf, 80, in);
    assert(res != NULL);
    while (!feof(in) && buf[0] == '#') {
      res = fgets(buf, 80, in);
      assert(res != NULL);
    }
    items = sscanf(buf, "%u %u %u", t[i], t[i] + 1, t[i] + 2);
    if (items != 3) {
      (void) fprintf(stderr, "Could not read triangle %u from file \"%s\"\n",
		     i, name);
      del_tri2d(t2);
      (void) fclose(in);
      return 0;
    }
  }

  (void) fclose(in);

  return t2;
}
// calculate fft and then get the power
arma::mat powerFFT(arma::mat mat, int nfft)
{
	arma::cx_mat cmat = fft(mat, nfft);
	arma::mat result = square(real(cmat)) + square(imag(cmat));
	return result;
}
Esempio n. 3
0
int main(int argc, char* argv[])
{
  // Choose a Butcher's table or define your own.
  ButcherTable bt(butcher_table_type);
  if (bt.is_explicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage explicit R-K method.", bt.get_size());
  if (bt.is_diagonally_implicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage diagonally implicit R-K method.", bt.get_size());
  if (bt.is_fully_implicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage fully implicit R-K method.", bt.get_size());

  // Load the mesh.
  Mesh mesh;
  MeshReaderH2D mloader;
  mloader.load("square.mesh", &mesh);

  // Initial mesh refinements.
  for(int i = 0; i < INIT_REF_NUM; i++) mesh.refine_all_elements();

  // Convert initial condition into a Solution<std::complex<double> >.
  CustomInitialCondition psi_time_prev(&mesh);
  Solution<std::complex<double> > psi_time_new(&mesh);

  // Initialize the weak formulation.
  double current_time = 0;

  CustomWeakFormGPRK wf(h, m, g, omega);
  
  // Initialize boundary conditions.
  DefaultEssentialBCConst<std::complex<double> > bc_essential("Bdy", 0.0);
  EssentialBCs<std::complex<double> > bcs(&bc_essential);

  // Create an H1 space with default shapeset.
  H1Space<std::complex<double> > space(&mesh, &bcs, P_INIT);
  int ndof = space.get_num_dofs();
  Hermes::Mixins::Loggable::Static::info("ndof = %d", ndof);
 
  // Initialize the FE problem.
  DiscreteProblem<std::complex<double> > dp(&wf, &space);

  // Initialize views.
  ScalarView sview_real("Solution - real part", new WinGeom(0, 0, 600, 500));
  ScalarView sview_imag("Solution - imaginary part", new WinGeom(610, 0, 600, 500));
  sview_real.fix_scale_width(80);
  sview_imag.fix_scale_width(80);

  // Initialize Runge-Kutta time stepping.
  RungeKutta<std::complex<double> > runge_kutta(&wf, &space, &bt);
  
  // Time stepping:
  int ts = 1;
  int nstep = (int)(T_FINAL/time_step + 0.5);
  for(int ts = 1; ts <= nstep; ts++)
  {
    // Perform one Runge-Kutta time step according to the selected Butcher's table.
    Hermes::Mixins::Loggable::Static::info("Runge-Kutta time step (t = %g s, time step = %g s, stages: %d).", 
         current_time, time_step, bt.get_size());
    
    try
    {
      runge_kutta.setTime(current_time);
      runge_kutta.setTimeStep(time_step);
      runge_kutta.rk_time_step_newton(&psi_time_prev, &psi_time_new);
    }
    catch(Exceptions::Exception& e)
    {
      e.printMsg();
      throw Hermes::Exceptions::Exception("Runge-Kutta time step failed");
    }

    // Show the new time level solution.
    char title[100];
    sprintf(title, "Solution - real part, Time %3.2f s", current_time);
    sview_real.set_title(title);
    sprintf(title, "Solution - imaginary part, Time %3.2f s", current_time);
    sview_imag.set_title(title);
    RealFilter real(&psi_time_new);
    ImagFilter imag(&psi_time_new);
    sview_real.show(&real);
    sview_imag.show(&imag);

    // Copy solution for the new time step.
    psi_time_prev.copy(&psi_time_new);

    // Increase current time and time step counter.
    current_time += time_step;
    ts++;
  }

  // Wait for the view to be closed.
  View::wait();
  return 0;
}
Esempio n. 4
0
DSPFLOAT	fm_Demodulator::demodulate (DSPCOMPLEX z) {
DSPFLOAT	res;
DSPFLOAT	I, Q;
#define	DCAlpha	0.0001
//#define	DCAlpha	0.000001

	if (abs (z) <= 0.001)
	   I = Q = 0.001;	// do not make these 0 too often
	else { 
	   I = real (z) / abs (z);
	   Q = imag (z) / abs (z);
	}

	z	= DSPCOMPLEX (I, Q);
	switch (selectedDecoder) {
	   default:
	   case FM1DECODER:
	      res	= Imin1 * (Q - Qmin2) - Qmin1 * (I - Imin2);
	      res	/= Imin1 * Imin1 + Qmin1 * Qmin1;
	      Imin2	= Imin1;
	      Qmin2	= Qmin1;
	      fm_afc	= (1 - DCAlpha) * fm_afc + DCAlpha * res;
	      res	= (res - fm_afc) * fm_cvt;
	      res	/= K_FM;
	      break;

	   case FM2DECODER:
	      res	= arg (z * DSPCOMPLEX (Imin1, - Qmin1));
	      fm_afc	= (1 - DCAlpha) * fm_afc + DCAlpha * res;
	      res	= (res - fm_afc) * fm_cvt;
	      res	/= K_FM;
	      break;

	   case FM3DECODER:
	      res	= myAtan. atan2 (Q * Imin1 - I * Qmin1,
	                                 I * Imin1 + Q * Qmin1);
	      fm_afc	= (1 - DCAlpha) * fm_afc + DCAlpha * res;
	      res	= (res - fm_afc) * fm_cvt;
	      res	/= K_FM;
	      break;
//
	   case FM4DECODER:
	      myfm_pll	-> do_pll (z);
//	lowpass the NCO frequency term to get a DC offset
	      fm_afc	= (1 - DCAlpha) * fm_afc +
	                   DCAlpha * myfm_pll -> getPhaseIncr ();
	      res	= (myfm_pll -> getPhaseIncr () - fm_afc) * fm_cvt;
	      res	/= K_FM;
	      break;

	   case FM5DECODER:
	      res	= (Imin1 * Q - Qmin1 * I + 1.0) / 2.0;
	      res	= Arcsine [(int)(res * ArcsineSize)];
	      fm_afc	= (1 - DCAlpha) * fm_afc + DCAlpha * res;
	      res	= (res - fm_afc) * fm_cvt;
	      res	/= K_FM;
	      break;
	}
//
//	and shift ...
	Imin1	= I;
	Qmin1	= Q;
	return res;
}
Esempio n. 5
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing zdot
*/
int main(  int argc, char** argv )
{
    magma_int_t info = 0;
    magma_queue_t queue=NULL;
    magma_queue_create( 0, &queue );

    const float one  = MAGMA_S_MAKE(1.0, 0.0);
    const float zero = MAGMA_S_MAKE(0.0, 0.0);
    float alpha;

    TESTING_INIT();

    magma_s_matrix a={Magma_CSR}, b={Magma_CSR}, x={Magma_CSR}, y={Magma_CSR}, skp={Magma_CSR};

    printf("%%=======================================================================================================================================================================\n");
    printf("\n");
    printf("            |                            runtime                                            |                              GFLOPS\n");
    printf("%% n num_vecs |  CUDOT       CUGEMV       MAGMAGEMV       MDOT       MDGM    MDGM_SHFL      |      CUDOT       CUGEMV      MAGMAGEMV       MDOT       MDGM      MDGM_SHFL\n");
    printf("%%------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
    printf("\n");

    for( magma_int_t num_vecs=1; num_vecs <= 32; num_vecs += 1 ) {
        for( magma_int_t n=500000; n < 500001; n += 10000 ) {
            int iters = 10;
            float computations = (2.* n * iters * num_vecs);

            #define ENABLE_TIMER
            #ifdef ENABLE_TIMER
            real_Double_t mdot1, mdot2, mdgm1, mdgm2, magmagemv1, magmagemv2, cugemv1, cugemv2, cudot1, cudot2;
            real_Double_t mdot_time, mdgm_time, mdgmshf_time, magmagemv_time, cugemv_time, cudot_time;
            #endif

            CHECK( magma_svinit( &a, Magma_DEV, n, num_vecs, one, queue ));
            CHECK( magma_svinit( &b, Magma_DEV, n, 1, one, queue ));
            CHECK( magma_svinit( &x, Magma_DEV, n, 8, one, queue ));
            CHECK( magma_svinit( &y, Magma_DEV, n, 8, one, queue ));
            CHECK( magma_svinit( &skp, Magma_DEV, 1, num_vecs, zero, queue ));

            // warm up
            CHECK( magma_sgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue ));

            // CUDOT
            #ifdef ENABLE_TIMER
            cudot1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                for( int l=0; l<num_vecs; l++){
                    alpha = magma_sdot( n, a.dval+l*a.num_rows, 1, b.dval, 1, queue );
                    //cudaDeviceSynchronize();    
                }
                //cudaDeviceSynchronize();   
            }
            #ifdef ENABLE_TIMER
            cudot2 = magma_sync_wtime( queue );
            cudot_time=cudot2-cudot1;
            #endif
            // CUGeMV
            #ifdef ENABLE_TIMER
            cugemv1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                magma_sgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue );
            }
            #ifdef ENABLE_TIMER
            cugemv2 = magma_sync_wtime( queue );
            cugemv_time=cugemv2-cugemv1;
            #endif
            // MAGMAGeMV
            #ifdef ENABLE_TIMER
            magmagemv1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                magmablas_sgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue );
            }
            #ifdef ENABLE_TIMER
            magmagemv2 = magma_sync_wtime( queue );
            magmagemv_time=magmagemv2-magmagemv1;
            #endif
            // MDOT
            #ifdef ENABLE_TIMER
            mdot1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                for( int c = 0; c<num_vecs/2; c++ ){
                    CHECK( magma_smdotc( n, 2, a.dval, b.dval, x.dval, y.dval, skp.dval, queue ));
                }
                for( int c = 0; c<num_vecs%2; c++ ){
                    CHECK( magma_smdotc( n, 1, a.dval, b.dval, x.dval, y.dval, skp.dval, queue ));
                }
                //h++;
            }
            #ifdef ENABLE_TIMER
            mdot2 = magma_sync_wtime( queue );
            mdot_time=mdot2-mdot1;
            #endif
            // MDGM
            #ifdef ENABLE_TIMER
            mdgm1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                CHECK( magma_sgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue ));
                //h++;
            }
            #ifdef ENABLE_TIMER
            mdgm2 = magma_sync_wtime( queue );
            mdgm_time=mdgm2-mdgm1;
            #endif
            // MDGM_shfl
            
            #ifdef ENABLE_TIMER
            mdgm1 = magma_sync_wtime( queue );
            #endif
            for( int h=0; h < iters; h++) {
                CHECK( magma_sgemvmdot_shfl( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue ));
            }
            #ifdef ENABLE_TIMER
            mdgm2 = magma_sync_wtime( queue );
            mdgmshf_time=mdgm2-mdgm1;
            #endif
                
                
            //magma_sprint_gpu(num_vecs,1,skp.dval,num_vecs);

            //Chronometry
            #ifdef ENABLE_TIMER
            printf("%d  %d  %e  %e  %e  %e  %e  %e  || %e  %e  %e  %e  %e  %e\n",
                    int(n), int(num_vecs),
                    cudot_time/iters,
                    (cugemv_time)/iters,
                    (magmagemv_time)/iters,
                    (mdot_time)/iters,
                    (mdgm_time)/iters,
                    (mdgmshf_time)/iters,
                    computations/(cudot_time*1e9),
                    computations/(cugemv_time*1e9),
                    computations/(magmagemv_time*1e9),
                    computations/(mdot_time*1e9),
                    computations/(mdgm_time*1e9),
                    computations/(mdgmshf_time*1e9) );
            #endif

            magma_smfree(&a, queue );
            magma_smfree(&b, queue );
            magma_smfree(&x, queue );
            magma_smfree(&y, queue );
            magma_smfree(&skp, queue );
        }

        //printf("%%================================================================================================================================================\n");
        //printf("\n");
        //printf("\n");
    }
    
    // use alpha to silence compiler warnings
    if ( isnan( real( alpha ))) {
        info = -1;
    }

cleanup:
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return info;
}
Esempio n. 6
0
/* This function creates a logarithmic stepped vector of values
   starting at the given start value, ending with the given stop value
   and containing points elements. */
void logsweep::create (nr_double_t start, nr_double_t stop, int points) {
  vector v = logspace (start, stop, points);
  setSize (points);
  for (int i = 0; i < points; i++) set (i, real (v.get (i)));
}
Esempio n. 7
0
int tmatrix<nr_type_t>::isFinite (void) {
  for (int i = 0; i < rows * cols; i++)
    if (!std::isfinite (real (data[i]))) return 0;
  return 1;
}
Esempio n. 8
0
int main()
{
    #ifdef _OPENMP
    const u_char num_procs = omp_get_num_procs(); //!< number of available processors
    #else
    const u_char num_procs = 1;
    #endif
    std::cerr << "processors in use: " << short(num_procs) << std::endl;


    // generation of childrens, e.g.: only root = 0, grand-children = 2
    // total number of nodes, including (childsbyDimension) boundary elements

    real simulationTime = g_span[dimX]/g_velocity*5; // 5 periods

#ifdef REGULAR
    monores_grid_t grid(g_level);
#else
    multires_grid_t grid(g_level);
#endif

    auto start = std::chrono::steady_clock::now();

    do {
        grid.timeStep();
    } while(grid.getTime() < simulationTime);

    auto done = std::chrono::steady_clock::now();

    std::cerr << "simulation time passed: " << grid.getTime() << std::endl;
    double elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(done - start).count();
    std::cerr << "calculation time: " << elapsed_time << std::endl;

    size_t size = grid.size();
    size_t NN = pow(1 << g_level, g_dimension);
    std::cerr << "used nodes: " << size << "/" << NN << "=" << real(size)/NN << std::endl;


    // output file
#ifndef REGULAR
    grid.unfold(g_level);
#endif
    std::cerr << "after unfold: size = " << grid.size() << std::endl;
    std::ofstream file("/tmp/output.txt");
    file << "# x y phi" << std::endl;
    for(const point_t point: grid) {
        // std::cerr << point.m_x[dimX] << " : " << point.m_phi << std::endl;
        /*
        */
        file << boost::format("%e %e %e\n")
                % point.m_x[dimX]
                % point.m_x[dimY]
                // % point.m_index[dimX]
                // % point.m_index[dimY]
                % point.m_phi;
        /*
        file << boost::format("%e ") % point.m_phi;
        static size_t count = 0;
        const size_t N = (1 << g_level);
        if (++count % N == 0 ) file << std::endl;
        */
    }
    file.close();

    return 0;
}
Esempio n. 9
0
static DFBResult
__CoreInputDeviceDispatch__Dispatch( CoreInputDevice *obj,
                                FusionID      caller,
                                int           method,
                                void         *ptr,
                                unsigned int  length,
                                void         *ret_ptr,
                                unsigned int  ret_size,
                                unsigned int *ret_length )
{
    D_UNUSED
    DFBResult ret;


    DirectFB::IInputDevice_Real real( core_dfb, obj );


    switch (method) {
        case CoreInputDevice_SetKeymapEntry: {
            D_UNUSED
            CoreInputDeviceSetKeymapEntry       *args        = (CoreInputDeviceSetKeymapEntry *) ptr;
            CoreInputDeviceSetKeymapEntryReturn *return_args = (CoreInputDeviceSetKeymapEntryReturn *) ret_ptr;

            D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_SetKeymapEntry\n" );

            return_args->result = real.SetKeymapEntry( args->key_code, &args->entry );
            if (return_args->result == DFB_OK) {
            }

            *ret_length = sizeof(CoreInputDeviceSetKeymapEntryReturn);

            return DFB_OK;
        }

        case CoreInputDevice_ReloadKeymap: {
            D_UNUSED
            CoreInputDeviceReloadKeymap       *args        = (CoreInputDeviceReloadKeymap *) ptr;
            CoreInputDeviceReloadKeymapReturn *return_args = (CoreInputDeviceReloadKeymapReturn *) ret_ptr;

            D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_ReloadKeymap\n" );

            return_args->result = real.ReloadKeymap(  );
            if (return_args->result == DFB_OK) {
            }

            *ret_length = sizeof(CoreInputDeviceReloadKeymapReturn);

            return DFB_OK;
        }

        case CoreInputDevice_SetConfiguration: {
            D_UNUSED
            CoreInputDeviceSetConfiguration       *args        = (CoreInputDeviceSetConfiguration *) ptr;
            CoreInputDeviceSetConfigurationReturn *return_args = (CoreInputDeviceSetConfigurationReturn *) ret_ptr;

            D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_SetConfiguration\n" );

            return_args->result = real.SetConfiguration( &args->config );
            if (return_args->result == DFB_OK) {
            }

            *ret_length = sizeof(CoreInputDeviceSetConfigurationReturn);

            return DFB_OK;
        }

    }

    return DFB_NOSUCHMETHOD;
}
Esempio n. 10
0
 /** \brief ensure this remains a valid rotation */
 void normalize() {
   (*this) /= std::sqrt( real()*real() + imag()*imag() );
 }
Esempio n. 11
0
 inline rot_complex operator*(const rot_complex<T2> &r) const {
   return rot_complex(
     real() * r.real() - imag() * r.imag(),
     imag() * r.real() + real() * r.imag()
   );
 }
Esempio n. 12
0
 inline void invert(rot_complex<T2> &out) const {
   T denom = real()*real() + imag()*imag();
   out.real() = real() / denom;
   out.imag() = imag() / denom;
 }
Esempio n. 13
0
  void PreconCG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b)
  {

    profile.Start(QUDA_PROFILE_INIT);
    // Check to see that we're not trying to invert on a zero-field source
    const double b2 = norm2(b);
    if(b2 == 0){
      profile.Stop(QUDA_PROFILE_INIT);
      printfQuda("Warning: inverting on zero-field source\n");
      x=b;
      param.true_res = 0.0;
      param.true_res_hq = 0.0;
    }

    int k=0;
    int rUpdate=0;

    cudaColorSpinorField* minvrPre;
    cudaColorSpinorField* rPre;
    cudaColorSpinorField* minvr;
    cudaColorSpinorField* minvrSloppy;
    cudaColorSpinorField* p;


    ColorSpinorParam csParam(b);
    cudaColorSpinorField r(b);
    if(K) minvr = new cudaColorSpinorField(b);
    csParam.create = QUDA_ZERO_FIELD_CREATE;
    cudaColorSpinorField y(b,csParam);

    mat(r, x, y); // => r = A*x;
    double r2 = xmyNormCuda(b,r);

    csParam.setPrecision(param.precision_sloppy);
    cudaColorSpinorField tmpSloppy(x,csParam);
    cudaColorSpinorField Ap(x,csParam);

    cudaColorSpinorField *r_sloppy;
    if(param.precision_sloppy == x.Precision())
    {
      r_sloppy = &r;
      minvrSloppy = minvr;
    }else{
      csParam.create = QUDA_COPY_FIELD_CREATE;
      r_sloppy = new cudaColorSpinorField(r,csParam);
      if(K) minvrSloppy = new cudaColorSpinorField(*minvr,csParam);
    }
  

    cudaColorSpinorField *x_sloppy;
    if(param.precision_sloppy == x.Precision() ||
        !param.use_sloppy_partial_accumulator) {
      csParam.create = QUDA_REFERENCE_FIELD_CREATE;
      x_sloppy = &x;
    }else{
      csParam.create = QUDA_COPY_FIELD_CREATE;
      x_sloppy = new cudaColorSpinorField(x,csParam);
    }


    cudaColorSpinorField &xSloppy = *x_sloppy;
    cudaColorSpinorField &rSloppy = *r_sloppy;

    if(&x != &xSloppy){
      copyCuda(y, x); // copy x to y
      zeroCuda(xSloppy);
    }else{
      zeroCuda(y); // no reliable updates // NB: check this
    }

    const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;

    if(K){
      csParam.create = QUDA_COPY_FIELD_CREATE;
      csParam.setPrecision(param.precision_precondition);
      rPre = new cudaColorSpinorField(rSloppy,csParam);
      // Create minvrPre 
      minvrPre = new cudaColorSpinorField(*rPre);
      globalReduce = false;
      (*K)(*minvrPre, *rPre);  
      globalReduce = true;
      *minvrSloppy = *minvrPre;
      p = new cudaColorSpinorField(*minvrSloppy);
    }else{
      p = new cudaColorSpinorField(rSloppy);
    }

  
    profile.Stop(QUDA_PROFILE_INIT);


    profile.Start(QUDA_PROFILE_PREAMBLE);



    double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
    double heavy_quark_res = 0.0; // heavy quark residual 
    if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z);
    int heavy_quark_check = 10; // how often to check the heavy quark residual


    double alpha = 0.0, beta=0.0;
    double pAp;
    double rMinvr  = 0;
    double rMinvr_old = 0.0;
    double r_new_Minvr_old = 0.0;
    double r2_old = 0;
    r2 = norm2(r);

    double rNorm = sqrt(r2);
    double r0Norm = rNorm;
    double maxrx = rNorm;
    double maxrr = rNorm;
    double delta = param.delta;


    if(K) rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);

    profile.Stop(QUDA_PROFILE_PREAMBLE);
    profile.Start(QUDA_PROFILE_COMPUTE);


    quda::blas_flops = 0;

    int steps_since_reliable = 1;

    const int maxResIncrease = 0;

    while(!convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter){

      matSloppy(Ap, *p, tmpSloppy);

      double sigma;
      bool breakdown = false;
      pAp   = reDotProductCuda(*p,Ap);

      alpha = (K) ? rMinvr/pAp : r2/pAp;
      Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); 
      // r --> r - alpha*A*p
      r2_old = r2;
      r2 = real(cg_norm);
  
      sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k-1 - r_k) breaks

      if(K) rMinvr_old = rMinvr;

      rNorm = sqrt(r2);
      if(rNorm > maxrx) maxrx = rNorm;
      if(rNorm > maxrr) maxrr = rNorm;


      int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
      int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;

  
      // force a reliable update if we are within target tolerance (only if doing reliable updates)
      if( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1;
    

      if( !(updateR || updateX) ){

        if(K){
          r_new_Minvr_old = reDotProductCuda(rSloppy,*minvrSloppy);
          *rPre = rSloppy;
          globalReduce = false;
          (*K)(*minvrPre, *rPre);
          globalReduce = true;
      

          *minvrSloppy = *minvrPre;

          rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
          beta = (rMinvr - r_new_Minvr_old)/rMinvr_old; 
          axpyZpbxCuda(alpha, *p, xSloppy, *minvrSloppy, beta);
        }else{
          beta = sigma/r2_old; // use the alternative beta computation
          axpyZpbxCuda(alpha, *p, xSloppy, rSloppy, beta);
        }
      } else { // reliable update

        axpyCuda(alpha, *p, xSloppy); // xSloppy += alpha*p
        copyCuda(x, xSloppy);
        xpyCuda(x, y); // y += x
        // Now compute r 
        mat(r, y, x); // x is just a temporary here
        r2 = xmyNormCuda(b, r);
        copyCuda(rSloppy, r); // copy r to rSloppy
        zeroCuda(xSloppy);


        // break-out check if we have reached the limit of the precision
        static int resIncrease = 0;
        if(sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this 
          warningQuda("PCG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm);

          k++;
          rUpdate++;
          if(++resIncrease > maxResIncrease) break;
        }else{
          resIncrease = 0;
        }

        rNorm = sqrt(r2);
        maxrr = rNorm;
        maxrx = rNorm;
        r0Norm = rNorm;
        ++rUpdate;

        if(K){
          *rPre = rSloppy;
          globalReduce = false;
          (*K)(*minvrPre, *rPre);
          globalReduce = true;

          *minvrSloppy = *minvrPre;

          rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
          beta = rMinvr/rMinvr_old;        

          xpayCuda(*minvrSloppy, beta, *p); // p = minvrSloppy + beta*p
        }else{ // standard CG - no preconditioning

          // explicitly restore the orthogonality of the gradient vector
          double rp = reDotProductCuda(rSloppy, *p)/(r2);
          axpyCuda(-rp, rSloppy, *p);

          beta = r2/r2_old;
          xpayCuda(rSloppy, beta, *p);

          steps_since_reliable = 0;
        }
      }      
      breakdown = false;
      ++k;
      PrintStats("PCG", k, r2, b2, heavy_quark_res);
    }


    profile.Stop(QUDA_PROFILE_COMPUTE);

    profile.Start(QUDA_PROFILE_EPILOGUE);

    if(x.Precision() != param.precision_sloppy) copyCuda(x, xSloppy);
    xpyCuda(y, x); // x += y


    param.secs = profile.Last(QUDA_PROFILE_COMPUTE);
    double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9;
    reduceDouble(gflops);
    param.gflops = gflops;
    param.iter += k;

    if (k==param.maxiter)
      warningQuda("Exceeded maximum iterations %d", param.maxiter);

    if (getVerbosity() >= QUDA_VERBOSE)
      printfQuda("CG: Reliable updates = %d\n", rUpdate);





    // compute the true residual 
    mat(r, x, y);
    double true_res = xmyNormCuda(b, r);
    param.true_res = sqrt(true_res / b2);

    // reset the flops counters
    quda::blas_flops = 0;
    mat.flops();
    matSloppy.flops();
    matPrecon.flops();

    profile.Stop(QUDA_PROFILE_EPILOGUE);
    profile.Start(QUDA_PROFILE_FREE);

    if(K){ // These are only needed if preconditioning is used
      delete minvrPre;
      delete rPre;
      delete minvr;
      if(x.Precision() != param.precision_sloppy)  delete minvrSloppy;
    }
    delete p;

    if(x.Precision() != param.precision_sloppy){
      delete x_sloppy;
      delete r_sloppy;
    }

    profile.Stop(QUDA_PROFILE_FREE);
    return;
  }
Esempio n. 14
0
static types::InternalType* import_sparse(int dataset)
{
    types::Sparse* sp = nullptr;
    //get sparse dimensions
    int complex = 0;
    std::vector<int> pdims;
    int size = getDimsNode(dataset, &complex, pdims);

    //get non zeros count
    int nnz = 0;
    int datannz = getDataSetIdFromName(dataset, "__nnz__");
    readInteger32Matrix(datannz, &nnz);

    if (nnz == 0)
    {
        closeList6(dataset);
        return new types::Sparse(pdims[0], pdims[1]);
    }

    //get inner vector
    int datain = getDataSetIdFromName(dataset, "__inner__");
    int dimin = 0;
    int sizein = getDatasetInfo(datain, &complex, &dimin, NULL);
    std::vector<int> dimsin(dimin);
    sizein = getDatasetInfo(datain, &complex, &dimin, dimsin.data());

    std::vector<int> in(sizein);
    int ret = readInteger32Matrix(datain, in.data());
    if (ret < 0)
    {
        closeList6(dataset);
        return nullptr;
    }

    //get outer vector
    int dataout = getDataSetIdFromName(dataset, "__outer__");
    int dimout = 0;
    int sizeout = getDatasetInfo(dataout, &complex, &dimout, NULL);
    std::vector<int> dimsout(dimout);
    sizeout = getDatasetInfo(dataout, &complex, &dimout, dimsout.data());

    std::vector<int> out(sizeout);
    ret = readInteger32Matrix(dataout, out.data());
    if (ret < 0)
    {
        closeList6(dataset);
        return nullptr;
    }

    //get data
    int ddata = getDataSetIdFromName(dataset, "__data__");
    int dimdata = 0;
    int sizedata = getDatasetInfo(ddata, &complex, &dimdata, NULL);
    std::vector<int> dimsdata(dimdata);
    sizedata = getDatasetInfo(ddata, &complex, &dimdata, dimsdata.data());

    std::vector<double> real(sizedata);

    if (complex)
    {
        std::vector<double> img(sizedata);
        ret = readDoubleComplexMatrix(ddata, real.data(), img.data());
        if (ret < 0)
        {
            closeList6(dataset);
            return nullptr;
        }

        sp = new types::Sparse(pdims[0], pdims[1], nnz, in.data(), out.data(), real.data(), img.data());
    }
    else
    {
        ret = readDoubleMatrix(ddata, real.data());
        if (ret < 0)
        {
            closeList6(dataset);
            return nullptr;
        }

        sp = new types::Sparse(pdims[0], pdims[1], nnz, in.data(), out.data(), real.data(), nullptr);
    }

    closeList6(dataset);
    return sp;
}
Esempio n. 15
0
void exafmm_kernel::M2L(std::vector<real>& CiL, const std::vector<real> CjM,
		const std::array<std::vector<real>, NDIM>& d, integer N, std::vector<real>& L_r, std::vector<real>& L_i, std::vector<real>& Ynm) {
	integer Nynm;
	Nynm = (((N - 1) / 64) + 1) * 64;
#pragma vector aligned
#pragma simd
	for (integer i = 0; i != N; ++i) {
		real rho = std::sqrt(d[0][i] * d[0][i] + d[1][i] * d[1][i] + d[2][i] * d[2][i]);
		real theta = std::acos(d[2][i] / rho);
		real phi = std::atan2(d[1][i], d[0][i]);
		real x = std::cos(theta);                              // x = cos(theta)
		real y = std::sin(theta);                              // y = sin(theta)
		real fact = 1;                                   // Initialize 2 * m + 1
		real pn = 1;                        // Initialize Legendre polynomial Pn
		real rhom = real(1.0) / rho;                          // Initialize rho^(-m-1)
#pragma novector
		for (int m = 0; m != FMM_P; ++m) {                     // Loop over m in Ynm
			real eim_r = std::cos(real(m) * phi);
			real eim_i = std::sin(real(m) * phi);
			real p = pn;                  //  Associated Legendre polynomial Pnm
			int npn = m * m + 2 * m;                  //  Index of Ynm for m > 0
			int nmn = m * m;                          //  Index of Ynm for m < 0
			Ynm[npn * Nynm + i] = rhom * p * prefactor[npn] * eim_r; //  rho^(-m-1) * Ynm for m > 0
			if (npn != nmn) {
				Ynm[nmn * Nynm + i] = rhom * p * prefactor[npn] * eim_i; //  rho^(-m-1) * Ynm for m > 0
			}
			real p1 = p;                                              //  Pnm-1
			p = x * real(2 * m + 1) * p1;          //  Pnm using recurrence relation
			rhom /= rho;                                          //  rho^(-m-1)
			real rhon = rhom;                                     //  rho^(-n-1)
#pragma novector
			for (int n = m + 1; n != FMM_P; ++n) {            //  Loop over n in Ynm
				int npm = n * n + n + m;             //   Index of Ynm for m > 0
				int nmm = n * n + n - m;             //   Index of Ynm for m < 0
				Ynm[npm * Nynm + i] = rhon * p * prefactor[npm] * eim_r; //   rho^n * Ynm for m > 0
				if (npm != nmm) {
					Ynm[nmm * Nynm + i] = rhon * p * prefactor[npm] * eim_i; //   rho^n * Ynm for m > 0
				}
				real p2 = p1;                                         //   Pnm-2
				p1 = p;                                               //   Pnm-1
				p = (x * real(2 * n + 1) * p1 - real(n + m) * p2) / real(n - m + 1); //   Pnm using recurrence relation
				rhon /= rho;                                     //   rho^(-n-1)
			}                                         //  End loop over n in Ynm
			pn = -pn * fact * y;                                      //  Pn
			fact += real(2);                                             //  2 * m + 1
		}                                              // End loop over m in Ynm
	}

	for (integer j = 0; j != FMM_P; ++j) {
		for (integer k = 0; k <= j; ++k) {
			const integer jkp = j * j + j + k;
			const integer jkm = j * j + j - k;
#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				L_r[i] = L_i[i] = real(0.0);
			}
			for (integer n = 0; n != FMM_P - j; ++n) {
				for (integer m = -n; m <= +n; ++m) {
					const integer nn = n * n + n;
					const integer nj = (n + j) * ((n + j) + 1);
					const integer jknm = jkp * FMM_P * FMM_P + n * n + n + m;
					const integer nmp = nn + std::abs(m);
					const integer nmm = nn - std::abs(m);
					const integer jnkmp = nj + std::abs(m - k);
					const integer jnkmm = nj - std::abs(m - k);
					real tmp_r, tmp_i;
					const real sgn = SGN(m-k);
					COMPLEX_MULT(tmp_r, tmp_i, CjM[nmp], SGN(m) * CjM[nmm], Cnm_r[jknm], Cnm_i[jknm]);
					const auto Yp = Ynm.data() + Nynm * jnkmp;
					const auto Ym = Ynm.data() + Nynm * jnkmm;
#pragma vector aligned
#pragma simd
					for (integer i = 0; i != N; ++i) {
						COMPLEX_MULT_ADD(L_r[i], L_i[i], tmp_r, tmp_i, Yp[i], sgn * Ym[i]);
					}
				}
			}
			auto Cp = CiL.data() + N * jkp;
			auto Cm = CiL.data() + N * jkm;
//#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				Cp[i] = L_r[i];
				Cm[i] = (k == 0) ? L_r[i] : L_i[i];
			}
		}
	}

}
Esempio n. 16
0
bool Complex::operator>=(const Complex & a) const {
  if (real() >= a.real() || imag() >= a.imag()) return true;
  else
  return false;
}
Esempio n. 17
0
/* EAM July 2004  (revised to dynamic buffer July 2005)
 * There are probably an infinite number of things that can
 * go wrong if the user mis-matches arguments and format strings
 * in the call to sprintf, but I hope none will do worse than
 * result in a garbage output string.
 */
void
f_sprintf(union argument *arg)
{
    struct value a[10], *args;
    struct value num_params;
    struct value result;
    char *buffer;
    int bufsize;
    char *next_start, *outpos, tempchar;
    int next_length;
    char *prev_start;
    int prev_pos;
    int i, remaining;
    int nargs = 0;
    int save_errno;
    enum DATA_TYPES spec_type;

    /* Retrieve number of parameters from top of stack */
    pop(&num_params);
    nargs = num_params.v.int_val;
    if (nargs > 10) {	/* Fall back to slow but sure allocation */
	args = gp_alloc(sizeof(struct value)*nargs, "sprintf args");
    } else
	args = a;

    for (i=0; i<nargs; i++)
	pop(&args[i]);  /* pop next argument */

    /* Make sure we got a format string of some sort */
    if (args[nargs-1].type != STRING)
	int_error(NO_CARET,"First parameter to sprintf must be a format string");

    /* Allocate space for the output string. If this isn't */
    /* long enough we can reallocate a larger space later. */
    bufsize = 80 + strlen(args[nargs-1].v.string_val);
    buffer = gp_alloc(bufsize, "f_sprintf");

    /* Copy leading fragment of format into output buffer */
    outpos = buffer;
    next_start  = args[nargs-1].v.string_val;
    next_length = strcspn(next_start,"%");
    strncpy(outpos, next_start, next_length);

    next_start += next_length;
    outpos += next_length;

    /* Format the remaining sprintf() parameters one by one */
    prev_start = next_start;
    prev_pos = next_length;
    remaining = nargs - 1;

    /* If the user has set an explicit LC_NUMERIC locale, apply it */
    /* to sprintf calls during expression evaluation.              */
    set_numeric_locale();

    /* Each time we start this loop we are pointing to a % character */
    while (remaining-->0 && next_start[0] && next_start[1]) {
	struct value *next_param = &args[remaining];

	/* Check for %%; print as literal and don't consume a parameter */
	if (!strncmp(next_start,"%%",2)) {
	    next_start++;
	    do {
		*outpos++ = *next_start++;
	    } while(*next_start && *next_start != '%');
	    remaining++;
	    continue;
	}

	next_length = strcspn(next_start+1,"%") + 1;
	tempchar = next_start[next_length];
	next_start[next_length] = '\0';

	spec_type = sprintf_specifier(next_start);

	/* string value <-> numerical value check */
	if ( spec_type == STRING && next_param->type != STRING )
	    int_error(NO_CARET,"f_sprintf: attempt to print numeric value with string format");
	if ( spec_type != STRING && next_param->type == STRING )
	    int_error(NO_CARET,"f_sprintf: attempt to print string value with numeric format");

#ifdef HAVE_SNPRINTF
	/* Use the format to print next arg */
	save_errno = errno;
	switch(spec_type) {
	case INTGR:
	    snprintf(outpos,bufsize-(outpos-buffer),
		     next_start, (int)real(next_param));
	    break;
	case CMPLX:
	    snprintf(outpos,bufsize-(outpos-buffer),
		     next_start, real(next_param));
	    break;
	case STRING:
	    snprintf(outpos,bufsize-(outpos-buffer),
		next_start, next_param->v.string_val);
	    break;
	default:
	    int_error(NO_CARET,"internal error: invalid spec_type");
	}
#if _MSC_VER
       buffer[bufsize-1] = '\0';	/* VC++ is not ANSI-compliant */
       if (errno == ERANGE) errno = save_errno;
#endif 

#else
	/* FIXME - this is bad; we should dummy up an snprintf equivalent */
	switch(spec_type) {
	case INTGR:
	    sprintf(outpos, next_start, (int)real(next_param));
	    break;
	case CMPLX:
	    sprintf(outpos, next_start, real(next_param));
	    break;
	case STRING:
	    sprintf(outpos, next_start, next_param->v.string_val);
	    break;
	default:
	    int_error(NO_CARET,"internal error: invalid spec_type");
	}
#endif

	next_start[next_length] = tempchar;
	next_start += next_length;
	outpos = &buffer[strlen(buffer)];

	/* Check whether previous parameter output hit the end of the buffer */
	/* If so, reallocate a larger buffer, go back and try it again.      */
	if (strlen(buffer) >= bufsize-2) {
	    bufsize *= 2;
	    buffer = gp_realloc(buffer, bufsize, "f_sprintf");
	    next_start = prev_start;
	    outpos = buffer + prev_pos;
	    remaining++;
	    continue;
	} else {
	    prev_start = next_start;
	    prev_pos = outpos - buffer;
	}

    }

    /* Copy the trailing portion of the format, if any */
    /* We could just call snprintf(), but it doesn't check for */
    /* whether there really are more variables to handle.      */
    i = bufsize - (outpos-buffer);
    while (*next_start && --i > 0) {
	if (*next_start == '%' && *(next_start+1) == '%')
	    next_start++;
	*outpos++ = *next_start++;
    }
    *outpos = '\0';

    FPRINTF((stderr," snprintf result = \"%s\"\n",buffer));
    push(Gstring(&result, buffer));
    free(buffer);

    /* Free any strings from parameters we have now used */
    for (i=0; i<nargs; i++)
	gpfree_string(&args[i]);

    if (args != a)
	free(args);

    /* Return to C locale for internal use */
    reset_numeric_locale();

}
Esempio n. 18
0
double Complex::mod() const { return sqrt(sqr(real()) + sqr(imag())); }
Esempio n. 19
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zunmbr
*/
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double Cnorm, error, dwork[1];
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magma_int_t ione = 1;
    magma_int_t m, n, k, mi, ni, mm, nn, nq, size, info;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t nb, ldc, lda, lwork, lwork_max;
    magmaDoubleComplex *C, *R, *A, *work, *tau, *tauq, *taup;
    double *d, *e;
    magma_int_t status = 0;
    
    magma_opts opts;
    opts.parse_opts( argc, argv );
    
    // need slightly looser bound (60*eps instead of 30*eps) for some tests
    opts.tolerance = max( 60., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    // test all combinations of input parameters
    magma_vect_t  vect [] = { MagmaQ,          MagmaP       };
    magma_side_t  side [] = { MagmaLeft,       MagmaRight   };
    magma_trans_t trans[] = { Magma_ConjTrans, MagmaNoTrans };

    printf("%%   M     N     K   vect side   trans   CPU Gflop/s (sec)   GPU Gflop/s (sec)   ||R||_F / ||QC||_F\n");
    printf("%%==============================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
      for( int ivect = 0; ivect < 2; ++ivect ) {
      for( int iside = 0; iside < 2; ++iside ) {
      for( int itran = 0; itran < 2; ++itran ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            m = opts.msize[itest];
            n = opts.nsize[itest];
            k = opts.ksize[itest];
            nb  = magma_get_zgebrd_nb( m, n );
            ldc = m;
            // A is nq x k (vect=Q) or k x nq (vect=P)
            // where nq=m (left) or nq=n (right)
            nq  = (side[iside] == MagmaLeft ? m  : n );
            mm  = (vect[ivect] == MagmaQ    ? nq : k );
            nn  = (vect[ivect] == MagmaQ    ? k  : nq);
            lda = mm;
            
            // MBR calls either MQR or MLQ in various ways
            if ( vect[ivect] == MagmaQ ) {
                if ( nq >= k ) {
                    gflops = FLOPS_ZUNMQR( m, n, k, side[iside] ) / 1e9;
                }
                else {
                    if ( side[iside] == MagmaLeft ) {
                        mi = m - 1;
                        ni = n;
                    }
                    else {
                        mi = m;
                        ni = n - 1;
                    }
                    gflops = FLOPS_ZUNMQR( mi, ni, nq-1, side[iside] ) / 1e9;
                }
            }
            else {
                if ( nq > k ) {
                    gflops = FLOPS_ZUNMLQ( m, n, k, side[iside] ) / 1e9;
                }
                else {
                    if ( side[iside] == MagmaLeft ) {
                        mi = m - 1;
                        ni = n;
                    }
                    else {
                        mi = m;
                        ni = n - 1;
                    }
                    gflops = FLOPS_ZUNMLQ( mi, ni, nq-1, side[iside] ) / 1e9;
                }
            }
            
            // workspace for gebrd is (mm + nn)*nb
            // workspace for unmbr is m*nb or n*nb, depending on side
            lwork_max = max( (mm + nn)*nb, max( m*nb, n*nb ));
            // this rounds it up slightly if needed to agree with lwork query below
            lwork_max = int( real( magma_zmake_lwork( lwork_max )));
            
            TESTING_MALLOC_CPU( C,    magmaDoubleComplex, ldc*n );
            TESTING_MALLOC_CPU( R,    magmaDoubleComplex, ldc*n );
            TESTING_MALLOC_CPU( A,    magmaDoubleComplex, lda*nn );
            TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork_max );
            TESTING_MALLOC_CPU( d,    double,             min(mm,nn) );
            TESTING_MALLOC_CPU( e,    double,             min(mm,nn) );
            TESTING_MALLOC_CPU( tauq, magmaDoubleComplex, min(mm,nn) );
            TESTING_MALLOC_CPU( taup, magmaDoubleComplex, min(mm,nn) );
            
            // C is full, m x n
            size = ldc*n;
            lapackf77_zlarnv( &ione, ISEED, &size, C );
            lapackf77_zlacpy( "Full", &m, &n, C, &ldc, R, &ldc );
            
            size = lda*nn;
            lapackf77_zlarnv( &ione, ISEED, &size, A );
            
            // compute BRD factorization to get Householder vectors in A, tauq, taup
            //lapackf77_zgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info );
            magma_zgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info );
            if (info != 0) {
                printf("magma_zgebrd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            if ( vect[ivect] == MagmaQ ) {
                tau = tauq;
            } else {
                tau = taup;
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            lapackf77_zunmbr( lapack_vect_const( vect[ivect] ),
                              lapack_side_const( side[iside] ),
                              lapack_trans_const( trans[itran] ),
                              &m, &n, &k,
                              A, &lda, tau, C, &ldc, work, &lwork_max, &info );
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gflops / cpu_time;
            if (info != 0) {
                printf("lapackf77_zunmbr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            // query for workspace size
            lwork = -1;
            magma_zunmbr( vect[ivect], side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, work, lwork, &info );
            if (info != 0) {
                printf("magma_zunmbr (lwork query) returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            lwork = (magma_int_t) MAGMA_Z_REAL( work[0] );
            if ( lwork < 0 || lwork > lwork_max ) {
                printf("Warning: optimal lwork %d > allocated lwork_max %d\n", (int) lwork, (int) lwork_max );
                lwork = lwork_max;
            }
            
            gpu_time = magma_wtime();
            magma_zunmbr( vect[ivect], side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, work, lwork, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0) {
                printf("magma_zunmbr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* =====================================================================
               compute relative error |QC_magma - QC_lapack| / |QC_lapack|
               =================================================================== */
            size = ldc*n;
            blasf77_zaxpy( &size, &c_neg_one, C, &ione, R, &ione );
            Cnorm = lapackf77_zlange( "Fro", &m, &n, C, &ldc, dwork );
            error = lapackf77_zlange( "Fro", &m, &n, R, &ldc, dwork ) / (magma_dsqrt(m*n) * Cnorm);
            
            printf( "%5d %5d %5d   %c   %4c   %5c   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                    (int) m, (int) n, (int) k,
                    lapacke_vect_const( vect[ivect] ),
                    lapacke_side_const( side[iside] ),
                    lapacke_trans_const( trans[itran] ),
                    cpu_perf, cpu_time, gpu_perf, gpu_time,
                    error, (error < tol ? "ok" : "failed") );
            status += ! (error < tol);
            
            TESTING_FREE_CPU( C );
            TESTING_FREE_CPU( R );
            TESTING_FREE_CPU( A );
            TESTING_FREE_CPU( work );
            TESTING_FREE_CPU( d );
            TESTING_FREE_CPU( e );
            TESTING_FREE_CPU( taup );
            TESTING_FREE_CPU( tauq );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }}}  // end ivect, iside, itran
      printf( "\n" );
    }
    
    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}
Esempio n. 20
0
Complex Complex::operator*(const double & b) const {
  return Complex(b * real(), b * imag());
}
Esempio n. 21
0
/*! \brief Poles, zeros and elliptic cells coefficients computation.
 *
 * Inputs are:
 * \arg eps : Oscillations in working bandwidth
 * \arg A :  Weakening of attenuated band
 * \arg f : Low frequency transition edge  [Hz]
 * \arg fb : High frequency transition edge [Hz]
 * \arg fe : Sampling frequency  [Hz]
 * \arg NCellMax : Maximum number of cells
 *
 * Outputs are :
 * \arg NCells : number of cells, must be positive and lower or equal to NCellMax
 * \arg poles : poles of the cells (imaginary part positive or null) 
 * \arg zero : zeros of the cells (imaginary part positive or null)
 * \arg CoefA : coefficient A of the cells
 * \arg CoefB : coefficient B of the cells
 * \arg CoefC : coefficient C of the cells
 * \arg CoefD : coefficient D of the cells
 *
 * Computations : \n
 * \f$  \omega_c = fb \cdot 2 \cdot \pi \f$ \n
 * \f$  \omega_r = fa \cdot 2 \cdot \pi \f$ \n
 * \f$  T = 1/fe \f$ \n
 * \f$  dk1 = \frac{eps}{\sqrt{A^2-1}} \f$ \n
 * \f$  dk = \frac{tan(\omega_c \cdot \frac{T}{2})}{tan(\omega_r \cdot \frac{T}{2})} \f$ \n
 * \f$  dkp = \sqrt{1-dk^2} \f$ \n
 * \f$  ak1 = ak(dk) \textrm{ using ak function} \f$ \n
 * \f$  ak2 = ak(dk1) \textrm{ using ak function} \f$ \n
 * \f$  ak3 = ak(dkp) \textrm{ using ak function} \f$ \n
 * \f$  ak4 = cak(dk1^2) \textrm{ using cak function} \f$ \n
 *
 * \f$ N = \frac{1}{2} \cdot ceil \big( ceil(\frac{ak4 \cdot ak1}{ak2 \cdot ak3}+1) \big) \f$ \n
 * N is checked : \f$ 0 \le N \le NCellMax \f$ \n
 *
 * \f$  U_0 = -\frac{ak3}{ak4} \cdot \frac{alog(1+\sqrt{(1+eps^2)})}{eps} \f$ \n
 * \arg for  \f$ i=0,\dots,N-1 \f$ \n
 * \f$ xmag = 2 \cdot i \cdot \frac{ak1}{2 \cdot N} \f$ \n
 * \f$ zeros[i] = -ak3 + I \cdot xmag \f$ \n
 * \f$ poles[i] = U_0 + I \cdot xmag \f$ \n
 * \arg for  \f$ i=0,\dots,2 \cdot N-1 \f$ \n
 * \f$ Q = real(zeros[mod(i,N)]) \f$ \n
 * \f$ R = imag(zeros[mod(i,N)]) \f$ \n
 * \f$ a1 = sn(Q, dkp, ak3, ak1) \textrm{ using sn function} \f$ \n
 * \f$ b1 = sn(R, dk,  ak1, ak3) \textrm{ using sn function} \f$ \n
 * \f$ \sigma= \left\{ \begin{array}{ll} 0 & \textrm{if } i \le N \\
 a1 \cdot \sqrt{(1-a1^2)*(1-b1^2)} \cdot \frac{dn}{de} & else \end{array} \right. \f$ \n
 * \f$ dn = \sqrt{1-{(dk \cdot b1)}^2} \f$ \n
 * \f$ de = 1-{(a1 \cdot dn)}^2 \f$ \n
 * \f$ \omega = b1 \cdot \frac{\sqrt{(1-(dkp \cdot a1)^2)}}{de} \f$ \n
 * \f$ C[i] = -2 \cdot \sigma \cdot \omega_c \f$ \n
 * \f$ D[i] = (\sigma^2 + \omega^2) \cdot {\omega_c}^2 \f$ \n
 * \f$ \sigma = \sigma \cdot tan(\omega_c \cdot \frac{T}{2}) \f$ \n
 * \f$ \omega = \omega \cdot tan(\omega_c \cdot \frac{T}{2}) \f$ \n
 * \f$ \left\{ \begin{array}{ll}
 \textrm{if } i \le N & zeros[i] = \sigma + I \cdot \omega \\
 \textrm{else} & poles[i] = \sigma + I \cdot \omega \end{array} \right. \f$ \n
 * \arg for  \f$ i=2 \cdot N-1,\dots,0 \f$ \n
 * \f$ \left\{ \begin{array}{ll} \textrm{if } i \le N -1 & (X,Y)=(real(zeros[i]),imag(zeros[i])) \\
 else & (X,Y)=(real(poles[i]),imag(poles[i])) \end{array} \right. \f$ \n
 * \f$    Re = \frac{1-X^2-Y^2}{(1-X)^2+Y^2} \f$ \n
 * \f$    V = \frac{2 \cdot Y}{(1-X)^2+Y^2} \f$ \n
 * \f$    c1 = -2 \cdot Re \f$ \n
 * \f$    d1 = Re^2 + V^2 \f$ \n
 * \f$ \left\{ \begin{array}{ll}
 \textrm{if } i \le N -1 & \left\{ \begin{array}{l}
zeros[i]=Re+I \cdot V \\ CoefB[i]=c1 \\ CoefA[i]=d1  \end{array} \right. \\
 else & \left\{ \begin{array}{l}
 poles[i-N]=Re+I \cdot V  \\  CoefD[i-N]=c1 \\ CoefC[i-N]=d1 \end{array} \right.
 \end{array} \right. \f$ \n
 */
void elli(double eps,      // Oscillations in working bandwidth
	  double A,        // Weakening of attenuated band
	  double fa,       // Low frequency transition edge  [Hz]
	  double fb,       // High frequency transition edge [Hz]
	  double fe,       // Sampling frequency  [Hz]
	  int NCellMax,         // Maximum number of cells
	  int *NCells,          // Output number of cells
	  std::complex<double> poles[], // Poles of the cells (imaginary part >= 0) 
	  std::complex<double> zeros[], // Zeros of the cells (imaginary part >= 0)
	  double CoefA[],  // A coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2)
	  double CoefB[],  // B coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2)
	  double CoefC[],  // C coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2)
	  double CoefD[]   // D coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2)
	  ) {

  double *C, *D;
  double T, dk1, dk, dkp, ak1, ak2, ak3, ak4;
  double U0, xmag, Q, R, sigma;
  double a1, b1, dn, de, omega;
  double X, Y, Re, V, c1, d1;
  double wr, wc;
  int i, j, N, NDeu;
  std::complex<double> p;
  
  wc = fb*2*M_PI;
  wr = fa*2*M_PI;

  T = 1/fe;
  dk1 = eps/sqrt(A*A-1);
  dk = tan(wc*T/2.) / tan(wr*T/2.);
  dkp = sqrt(1-dk*dk);
  ak1 = ak(dk);
  ak2 = ak(dk1);
  ak3 = ak(dkp);
  //ak4 = ak(sqrt(1-dk1*dk1));
  ak4 = cak(dk1*dk1);

  N = (int)(ak4*ak1/(ak2*ak3));
  N = (N/2) + 1;
  NDeu = 2*N;

  *NCells = N;
  if ( N<0 || N>NCellMax) {
    fprintf(stderr,"\n\n ***  Maximum number of cells (%d) reached. %d cells needed. Exiting ... ***\n\n",NCellMax,N);
    exit(1);
    return;
  }
  C = (double*)malloc(NDeu*sizeof(double));
  D = (double*)malloc(NDeu*sizeof(double));

  U0 = (-ak3/ak4)*alog((1+sqrtl(1+eps*eps))/eps);
  for (i=1;i<=N;i++) {
    xmag = (2*i-1)*ak1/NDeu;
    zeros[i-1] = -ak3 + Im*xmag;
    poles[i-1] =   U0 + Im*xmag;
  }

  for (i=1; i<=NDeu; i++) {
    if (i<=N) {
      Q = real(zeros[i-1]);
      R = imag(zeros[i-1]);
    }
    else {
      Q = real(poles[i-N-1]);
      R = imag(poles[i-N-1]);
    }

    sigma = 0.;
    a1 = sn(Q, dkp, ak3, ak1);
    b1 = sn(R, dk,  ak1, ak3);

    dn = sqrt(1.-(dk*b1)*(dk*b1));
    de = 1-(a1*dn)*(a1*dn);
    
    if ( i > N ) {
      sigma = a1*sqrt((1-a1*a1)*(1-b1*b1))*dn/de;
    }
    
    omega = b1*sqrt(1-(dkp*a1)*(dkp*a1))/de;

    C[i-1] = -2*sigma*wc;
    D[i-1] = (sigma*sigma + omega*omega)*wc*wc;
    sigma = sigma*tan(wc*T/2.);
    omega = omega*tan(wc*T/2.);
    
    if ( i <= N ) {
      zeros[i-1] = sigma + Im*omega;
    }
    else {
      poles[i-N-1] = sigma + Im*omega;
    }
  }

  for (i=NDeu; i>=1; i--) {
    j = i;
    
    if ( j > NDeu/2 ) j = j - NDeu/2;
    
    
    if ( i<=N ) {
      X = real(zeros[i-1]);
      X = 0;
      Y = imag(zeros[i-1]);
    }
    else {
      X = real(poles[i-N-1]);
      Y = imag(poles[i-N-1]);
    }
    
    Re = (1-X*X-Y*Y)/((1-X)*(1-X)+Y*Y);
    V = 2*Y/((1-X)*(1-X)+Y*Y);
    c1 = -2*Re;
    d1 = Re*Re + V*V;
    if ( i <= N ) {
      zeros[i-1] = Re + Im*V;
      CoefB[i-1] = c1;
      CoefA[i-1] = d1;
    }
    else {
      poles[i-N-1] = Re + Im*V;
      CoefD[i-N-1] = c1;
      CoefC[i-N-1] = d1;
    }

  }


  free(C);
  free(D);

  return;
}
Esempio n. 22
0
Complex Complex::operator/(double b) const {
  return Complex(real() / b, imag() / b);
}
Esempio n. 23
0
/* Compute analytic dynamics */
void computeAnalyticOutputs(std::map<const std::string, bool> &outs,
    struct PARAMETERS * p) {

  // energy spacing in bulk
  std::complex <double> dE ((p->kBandTop-p->kBandEdge)/(p->Nk-1), 0);
  // bulk-QD coupling
  std::complex <double> Vee (p->Vnobridge[0], 0);
  // rate constant (can be defined also as K/2)
  std::complex <double> K = std::complex <double> (3.1415926535,0)*pow(Vee,2)/dE;
  // time
  std::complex <double> t (0, 0);
  // energy differences
  std::complex <double> wnm (0, 0);
  std::complex <double> wnnp (0, 0);
  std::complex <double> wnpm (0, 0);
  // coefficients
  std::complex <double> cm (0, 0);
  std::complex <double> cn (0, 0);
  std::complex <double> cn_term1 (0, 0);
  std::complex <double> cn_term2 (0, 0);
  std::complex <double> cn_diag (0, 0);
  std::complex <double> cn_offdiag (0, 0);
  double cn_tot;
  // complex numbers are dumb
  std::complex <double> C0 (0.0, 0.0);
  std::complex <double> C1 (1.0, 0.0);
  std::complex <double> NEGC1 (-1.0, 0.0);
  std::complex <double> CI (0.0, 1.0);
  std::complex <double> NEGCI (0.0, -1.0);

  // unpack params a bit
  int Nk = p->Nk;
  int Nc = p->Nc;
  int Ik = p->Ik;
  int Ic = p->Ic;
  int N = p->NEQ;
  double * energies = &(p->energies[0]);
  double * startWfn = &(p->startWfn[0]);

  // Create matrix of energy differences
  std::vector<std::complex <double>> Elr (Nk*Nc, std::complex <double> (0.0, 0.0));
  for (int ii = 0; ii < Nk; ii++) {
    for (int jj = 0; jj < Nc; jj++) {
      // array follows convention that first index is for QC state
      // e.g. Elr[i*Nc + j] = E_{ij}
      Elr[ii*Nc + jj] = std::complex <double> (energies[Ik + ii] - energies[Ic + jj], 0);
    }
  }
#ifdef DEBUG_ANALYTIC
  std::cout << std::endl;
  std::cout << "Energy gaps:" << std::endl;
  for (int ii = 0; ii < Nc*Nk; ii++) {
    std::cout << Elr[ii] << " ";
  }
  std::cout << std::endl;
  std::cout << std::endl;
#endif

  // Create matrix of prefactors for each QC (n) state
  std::complex <double> pref;
  std::vector<std::complex <double>> prefQC (Nk*Nc, std::complex <double> (0.0, 0.0));
  for (int ii = 0; ii < Nk; ii++) {
    // V*c_l/(E_{lr} + i\kappa)
    pref = Vee*(std::complex <double> (startWfn[Ik + ii], startWfn[Ik + N + ii]));
    std::cout << startWfn[Ik + ii] << "," << pref << " ";
    for (int jj = 0; jj < Nc; jj++) {
      prefQC[ii*Nc + jj] = pref/(Elr[ii*Nc + jj] + CI*K);
    }
  }
#ifdef DEBUG_ANALYTIC
  std::cout << std::endl;
  for (int ii = 0; ii < Nc*Nk; ii++) {
    std::cout << prefQC[ii] << " ";
  }
  std::cout << std::endl;
  std::cout << std::endl;
#endif

  // calculate wavefunction coefficients on electron-accepting side over time
  std::vector<std::complex <double>> crt (Nc*p->numOutputSteps, std::complex <double> (0.0, 0.0));

  int timeIndex = 0;
  for (std::complex <double> t = C0; std::real(t) <= p->tout;
       t += std::complex <double> (p->tout/p->numOutputSteps, 0.0), timeIndex++) {
    for (int ii = 0; ii < Nc; ii++) {
      // TODO add bit for multiple state terms
      for (int jj = 0; jj < Nk; jj++) {
	crt[timeIndex*Nc + ii] += prefQC[jj]*(exp(NEGCI*Elr[jj*Nc + ii]*t) - exp(NEGC1*K*t));
      }
    }
  }
  
  // calculate populations on electron-accepting side over time
  std::vector<double> Prt (Nc*p->numOutputSteps, 0.0);
  for (int ii = 0; ii <= p->numOutputSteps; ii++) {
    for (int jj = 0; jj < Nc; jj++) {
      Prt[ii*Nc + jj] = pow(real(crt[ii*Nc + jj]), 2) + pow(imag(crt[ii*Nc + jj]), 2);
    }
  }

  if (isOutput(outs, "analytic_tcprob.out")) {
    std::ofstream output("analytic_tcprob.out");
    for (int ii = 0; ii <= p->numOutputSteps; ii++) {
      output << p->times[ii];
      for (int jj = 0; jj < Nc; jj++) {
	output << " " << Prt[ii*Nc + jj];
	output << " " << real(crt[ii*Nc + jj]) << " " << imag(crt[ii*Nc + jj]);
      }
      output << std::endl;
    }
    output.close();
  }

  return;
}
Esempio n. 24
0
Complex Complex::cc() const {
  return Complex(real(), -imag());
}
inline void Density::add_k_point_contribution_rg(K_point* kp__)
{
    PROFILE("sirius::Density::add_k_point_contribution_rg");

    int nfv = ctx_.num_fv_states();
    double omega = unit_cell_.omega();

    auto& fft = ctx_.fft_coarse();
    
    /* get preallocated memory */
    double* ptr = static_cast<double*>(ctx_.memory_buffer(fft.local_size() * (ctx_.num_mag_dims() + 1) * sizeof(double)));

    mdarray<double, 2> density_rg(ptr, fft.local_size(), ctx_.num_mag_dims() + 1, "density_rg");
    density_rg.zero();

    if (fft.pu() == GPU) {
        density_rg.allocate(memory_t::device);
        density_rg.zero<memory_t::device>();
    }

    fft.prepare(kp__->gkvec().partition());

    /* non-magnetic or collinear case */
    if (ctx_.num_mag_dims() != 3) {
        /* loop over pure spinor components */
        for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) {
            /* trivial case */
            if (!kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col().global_index_size()) {
                continue;
            }

            for (int i = 0; i < kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col().local_size(); i++) {
                int j = kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col()[i];
                double w = kp__->band_occupancy(j + ispn * nfv) * kp__->weight() / omega;

                ///* transform to real space; in case of GPU wave-function stays in GPU memory */
                fft.transform<1>(kp__->gkvec().partition(),
                                 kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<CPU>(0, i));
                //switch (fft.pu()) {
                //    case CPU: {
                //        fft.transform<1>(kp__->gkvec().partition(),
                //                         kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<CPU>(0, i));
                //        break;
                //    }
                //    case GPU: {
                //        fft.transform<1, GPU>(kp__->gkvec().partition(),
                //                              kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<GPU>(0, i));
                //        break;
                //    }
                //}
                
                /* add to density */
                switch (fft.pu()) {
                    case CPU: {
                        #pragma omp parallel for schedule(static)
                        for (int ir = 0; ir < fft.local_size(); ir++) {
                            auto z = fft.buffer(ir);
                            density_rg(ir, ispn) += w * (std::pow(z.real(), 2) + std::pow(z.imag(), 2));
                        }
                        break;
                    }
                    case GPU: {
                        #ifdef __GPU
                        update_density_rg_1_gpu(fft.local_size(), fft.buffer().at<GPU>(), w, density_rg.at<GPU>(0, ispn));
                        #else
                        TERMINATE_NO_GPU
                        #endif
                        break;
                    }
                }
            }
        }
    } else { /* non-collinear case */
Esempio n. 26
0
Complex Complex::operator-(const Complex & a) const {
  return Complex(real() - a.real(), imag() - a.imag());
}
Esempio n. 27
0
void digisource::initTR (void) {
  vector * values = getPropertyVector ("times");
  T = real (sum (*values));
  initDC ();
}
Esempio n. 28
0
sgetris::backgrounds::flakes::object::object(
	boost::program_options::variables_map &_program_options,
	sge::renderer::device &_renderer,
	// The flakes get the loader because in a later version more than one flake image could be loaded
	// from a directory
	texture_manager &_texture_manager)
:
	texture_manager_(
		_texture_manager),
	ss_(
		_renderer),
	clock_(),
	frame_timer_(
		sgetris::diff_timer::parameters(
			fcppt::chrono::second(1)
		)
	),
	flakes_()
{
	texture_manager_.load(
		media_path()/FCPPT_TEXT("backgrounds")/FCPPT_TEXT("flakes")/FCPPT_TEXT("textures.ini"));
	fcppt::random::uniform<sprite::scalar>
		xposition_rng(
			fcppt::random::make_inclusive_range(
				static_cast<sprite::scalar>(0),
				static_cast<sprite::scalar>(
					_renderer->screen_size().w()))),
		yposition_rng(
			fcppt::random::make_inclusive_range(
				static_cast<sprite::scalar>(0),
				static_cast<sprite::scalar>(
					_renderer.screen_size().h())));

	// Those pairs are real to avoid ugly casting below, they'll be cast
	// one time sprite::scalar
	std::pair<real,real>
		size_range(
			static_cast<real>(
				_renderer.screen_size().w())*
			_program_options["flakes-size-min"].as<real>(),
			static_cast<real>(
				_renderer.screen_size().w())*
			_program_options["flakes-size-max"].as<real>());

	std::pair<real,real>
		speed_range(
			_program_options["flakes-speed-min"].as<real>(),
			_program_options["flakes-speed-max"].as<real>());

	fcppt::random::uniform<real> rng(
		fcppt::random::make_inclusive_range(
			static_cast<real>(
				0),
			static_cast<real>(
				1)));

	for(
		flake_count i = 0,
		fc = _program_options["flakes-count"].as<flake_count>();
		i < fc;
		++i)
	{
		// Roll the dice
		real const v =
			rng();

		sprite::vector const position(
			xposition_rng(),
			yposition_rng());

		sprite::dim const size(
			fcppt::math::dim::structure_cast<sprite::dim>(
				fcppt::math::dim::make(
					size_range.first + v * (size_range.second - size_range.first),
					size_range.first + v * (size_range.second - size_range.first))));

		sprite::scalar const speed =
			static_cast<sprite::scalar>(
				speed_range.first + v * (speed_range.second - speed_range.first));

		flakes_.push_back(
			new flake(
				real(
					v),
				_renderer.screen_size(),
				sprite::parameters()
					.system(
						&ss_)
					.order(
						0u)
					.pos(
						position)
					.texture(
						texture_manager_.texture(
							FCPPT_TEXT("flake")))
					.size(
						size),
				speed));
	}

	_renderer.state(
		sge::renderer::state::list
			(sge::renderer::state::bool_::clear_backbuffer = true)
			(sge::renderer::state::color::clear_color
				= sge::image::colors::black()
			)
	);
}
Esempio n. 29
0
        // ax + by + cz = d; a^2 + b^2 + c^2 = 1;
        void CalPlane(vector<Point3D>& cPointSet, GridMap &cgridmap)
        {
            int pointNum = cPointSet.size();
            MatrixXf pointSet(pointNum,3);
            Matrix3f A(3,3);
            A<<0, 0, 0, 0, 0, 0, 0, 0, 0;

            for(int i = 0; i < pointNum; i++)
            {
                pointSet(i,0) = cPointSet[i].X;
                pointSet(i,1) = cPointSet[i].Y;
                pointSet(i,2) = cPointSet[i].Z;
            }

            float xBar = pointSet.col(0).sum()/pointNum;
            float yBar = pointSet.col(1).sum()/pointNum;
            float zBar = pointSet.col(2).sum()/pointNum;

            for(int i = 0; i < pointNum; i++)
            {
                A(0, 0) += (pointSet(i,0) - xBar)*(pointSet(i,0) - xBar);
                A(0, 1) += (pointSet(i,0) - xBar)*(pointSet(i,1) - yBar);
                A(0, 2) += (pointSet(i,0) - xBar)*(pointSet(i,2) - zBar);
                A(1, 0) += (pointSet(i,1) - yBar)*(pointSet(i,0) - xBar);
                A(1, 1) += (pointSet(i,1) - yBar)*(pointSet(i,1) - yBar);
                A(1, 2) += (pointSet(i,1) - yBar)*(pointSet(i,2) - zBar);
                A(2, 0) += (pointSet(i,2) - zBar)*(pointSet(i,0) - xBar);
                A(2, 1) += (pointSet(i,2) - zBar)*(pointSet(i,1) - yBar);
                A(2, 2) += (pointSet(i,2) - zBar)*(pointSet(i,2) - zBar);
            }

            EigenSolver<MatrixXf> es(A);

            VectorXcf eigvals = es.eigenvalues();
            Vector3f eigvalues;
            eigvalues<<real(eigvals(0)), real(eigvals(1)), real(eigvals(2));

            MatrixXcf eigvect = es.eigenvectors();
            Matrix3f eigvectors;
            eigvectors <<real(eigvect(0,0)), real(eigvect(0,1)), real(eigvect(0,2)), real(eigvect(1,0)), real(eigvect(1,1)), real(eigvect(1,2)),
                       real(eigvect(2,0)), real(eigvect(2,1)), real(eigvect(2,2));

            float minValue = eigvalues(0);
            int minNum = 0;

            for(int i = 1; i < 3; i++)
            {
                if(eigvalues(i) < minValue)
                {
                    minValue = eigvalues(i);
                    minNum = i;
                }
            }

            float planePara[4] = {0, 0, 0, 0};

            planePara[0] = eigvectors(0, minNum);
            planePara[1] = eigvectors(1, minNum);
            planePara[2] = eigvectors(2, minNum);

            planePara[3] = planePara[0]*xBar + planePara[1]*yBar + planePara[2]*zBar;

            if(planePara[0] < 0)
            {
                for(int i = 0; i < 4; i++)
                {
                    cgridmap.planePara[i] = -planePara[i];
                }
            }
            else
            {
                for(int i = 0; i < 4; i++)
                {
                    cgridmap.planePara[i] = planePara[i];
                }
            }

            float distance1 = 0;
            float distance2 = sqrt(cgridmap.planePara[0]*cgridmap.planePara[0] + cgridmap.planePara[1]*cgridmap.planePara[1] + cgridmap.planePara[2]*cgridmap.planePara[2]);

            for(int i = 0; i < pointNum; i++)
            {
                distance1 += fabs(cgridmap.planePara[0]*pointSet(i,0) + cgridmap.planePara[1]*pointSet(i,1) + cgridmap.planePara[2]*pointSet(i,2) - cgridmap.planePara[3]);
            }

            cgridmap.planeDegree = distance1/distance2/pointNum;
            cgridmap.normalVector = acos(cgridmap.planePara[1]/distance2)/3.1415926*180;
        }
Esempio n. 30
0
/*
 * parameters
 *
 * i:	line number for deciding format
 * nl:	line number for finding data   usually identical
 */
void
putline(int i, int nl)
{
int c, lf, ct, form, lwid, vspf, ip = -1, cmidx, exvspen, vforml;
int vct, chfont;
char *s, *size, *fn;
watchout=vspf=exvspen=0;
if (i==0) once=0;
if (i==0 && ( allflg || boxflg || dboxflg))
	fullwide(0,   dboxflg? '=' : '-');
if (instead[nl]==0 && fullbot[nl] ==0)
for(c=0; c<ncol; c++)
	{
	s = table[nl][c].col;
	if (s==0) continue;
	if (vspen(s))
		{
		for(ip=nl; ip<nlin; ip=next(ip))
			if (!vspen(s=table[ip][c].col)) break;
		if (s>(char *)0 && s<(char *)128)
		fprintf(tabout, ".ne \\n(%c|u+\\n(.Vu\n",s);
		continue;
		}
	if (point(s)) continue;
	fprintf(tabout, ".ne \\n(%c|u+\\n(.Vu\n",s);
	watchout=1;
	}
if (linestop[nl])
	fprintf(tabout, ".mk #%c\n", linestop[nl]+'a'-1);
lf = prev(nl);
if (instead[nl])
	{
	puts(instead[nl]);
	return;
	}
if (fullbot[nl])
	{
	switch (ct=fullbot[nl])
		{
		case '=':
		case '-':
			fullwide(nl,ct);
		}
	return;
	}
for(c=0; c<ncol; c++)
	{
	if (instead[nl]==0 && fullbot[nl]==0)
	if (vspen(table[nl][c].col)) vspf=1;
	if (lf>=0)
		if (vspen(table[lf][c].col)) vspf=1;
	}
if (vspf)
	{
	fprintf(tabout, ".nr #^ \\n(\\*(#du\n");
	fprintf(tabout, ".nr #- \\n(#^\n"); /* current line position relative to bottom */
	}
vspf=0;
chfont=0;
for(c=0; c<ncol; c++)
	{
	s = table[nl][c].col;
	if (s==0) continue;
	chfont |= (int)(font[stynum[nl]][c]);
	if (point(s) ) continue;
	lf=prev(nl);
	if (lf>=0 && vspen(table[lf][c].col))
		fprintf(tabout, ".if (\\n(%c|+\\n(^%c-1v)>\\n(#- .nr #- +(\\n(%c|+\\n(^%c-\\n(#--1v)\n",s,'a'+c,s,'a'+c);
	else
		fprintf(tabout, ".if (\\n(%c|+\\n(#^-1v)>\\n(#- .nr #- +(\\n(%c|+\\n(#^-\\n(#--1v)\n",s,s);
	}
if (allflg && once>0 )
	fullwide(i,'-');
once=1;
runtabs(i, nl);
if (allh(nl) && !pr1403)
	{
	fprintf(tabout, ".nr %d \\n(.v\n", SVS);
	fprintf(tabout, ".vs \\n(.vu-\\n(.sp\n");
	}
if (chfont)
	fprintf(tabout, ".nr %2d \\n(.f\n", S1);
fprintf(tabout, ".nr 35 1m\n");
fprintf(tabout, "\\&");
vct = 0;
for(c=0; c<ncol; c++)
	{
	if (watchout==0 && i+1<nlin && (lf=left(i,c, &lwid))>=0)
		{
		tohcol(c);
		drawvert(lf, i, c, lwid);
		vct += 2;
		}
	if (rightl && c+1==ncol) continue;
	vforml=i;
	for(lf=prev(nl); lf>=0 && vspen(table[lf][c].col); lf=prev(lf))
		vforml= lf;
	form= ctype(vforml,c);
	if (form != 's')
		{
		ct = c+CLEFT;
		if (form=='a') ct = c+CMID;
		if (form=='n' && table[nl][c].rcol && lused[c]==0) ct= c+CMID;
		fprintf(tabout, "\\h'|\\n(%du'", ct);
		}
	s= table[nl][c].col;
	fn = font[stynum[vforml]][c];
	size = csize[stynum[vforml]][c];
	if (*size==0)size=0;
	switch(ct=ctype(vforml, c))
		{
		case 'n':
		case 'a':
			if (table[nl][c].rcol)
				{
			   if (lused[c]) /*Zero field width*/
				{
				ip = prev(nl);
				if (ip>=0)
				if (vspen(table[ip][c].col))
					{
					if (exvspen==0)
						{
						fprintf(tabout, "\\v'-(\\n(\\*(#du-\\n(^%cu", c+'a');
						if (cmidx)
							fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a');
						vct++;
						fprintf(tabout, "'");
						exvspen=1;
						}
					}
				fprintf(tabout, "%c%c",F1,F2);
				puttext(s,fn,size);
				fprintf(tabout, "%c",F1);
				}
				s= table[nl][c].rcol;
				form=1;
				break;
				}
			/* FALLTHROUGH */
		case 'c':
			form=3; break;
		case 'r':
			form=2; break;
		case 'l':
			form=1; break;
		case '-':
		case '=':
			if (real(table[nl][c].col))
				fprintf(stderr,gettext("%s: line %d: Data ignored on table line %d\n"), ifile, iline-1, i+1);
			makeline(i,c,ct);
			continue;
		default:
			continue;
		}
	if (realsplit ? rused[c]: used[c]) /*Zero field width*/
		{
		/* form: 1 left, 2 right, 3 center adjust */
		if (ifline(s))
			{
			makeline(i,c,ifline(s));
			continue;
			}
		if (filler(s))
			{
			printf("\\l'|\\n(%du\\&%s'", c+CRIGHT, s+2);
			continue;
			}
		ip = prev(nl);
		cmidx = ctop[stynum[nl]][c]==0;
		if (ip>=0)
		if (vspen(table[ip][c].col))
			{
			if (exvspen==0)
				{
				fprintf(tabout, "\\v'-(\\n(\\*(#du-\\n(^%cu", c+'a');
				if (cmidx)
					fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a');
				vct++;
				fprintf(tabout, "'");
				}
			}
		fprintf(tabout, "%c", F1);
		if (form!= 1)
			fprintf(tabout, "%c", F2);
		if (vspen(s))
			vspf=1;
		else
		puttext(s, fn, size);
		if (form !=2)
			fprintf(tabout, "%c", F2);
		fprintf(tabout, "%c", F1);
		}
	if (ip>=0)
	if (vspen(table[ip][c].col))
		{
		exvspen = (c+1 < ncol) && vspen(table[ip][c+1].col) &&
			(topat[c] == topat[c+1]) &&
			(cmidx == (ctop [stynum[nl]][c+1]==0)) && (left(i,c+1,&lwid)<0);
		if (exvspen==0)
			{
			fprintf(tabout, "\\v'(\\n(\\*(#du-\\n(^%cu", c+'a');
			if (cmidx)
				fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a');
			vct++;
			fprintf(tabout, "'");
			}
		}
	else
		exvspen=0;
	/* if lines need to be split for gcos here is the place for a backslash */
	if (vct > 7 && c < ncol)
		{
		fprintf(tabout, "\n.sp-1\n\\&");
		vct=0;
		}
	}
fprintf(tabout, "\n");
if (allh(nl) && !pr1403) fprintf(tabout, ".vs \\n(%du\n", SVS);
if (watchout)
	funnies(i,nl);
if (vspf)
	{
	for(c=0; c<ncol; c++)
		if (vspen(table[nl][c].col) && (nl==0 || (lf=prev(nl))<0 || !vspen(table[lf][c].col)))
			{
			fprintf(tabout, ".nr ^%c \\n(#^u\n", 'a'+c);
			topat[c]=nl;
			}
	}
}