ptri2d read_tri2d(const char *name) { FILE *in; ptri2d t2; real(*x)[2]; uint(*e)[2]; uint(*t)[3]; uint *xb; uint *eb; uint i; uint vertices, edges, triangles; uint items; char buf[80], *res; in = fopen(name, "r"); if (!in) { (void) fprintf(stderr, "Could not open file \"%s\" for reading\n", name); return 0; } res = fgets(buf, BUFSIZE, in); assert(res != NULL); while (!feof(in) && buf[0] == '#') { res = fgets(buf, 80, in); assert(res != NULL); } items = sscanf(buf, "%u %u %u", &vertices, &edges, &triangles); if (items != 3) { (void) fprintf(stderr, "Could not get sizes from file \"%s\"\n", name); (void) fclose(in); return 0; } t2 = new_tri2d(vertices, edges, triangles); x = t2->x; e = t2->e; t = t2->t; xb = t2->xb; eb = t2->eb; /*vertices */ for (i = 0; i < vertices; i++) { res = fgets(buf, 80, in); assert(res != NULL); while (!feof(in) && buf[0] == '#') { res = fgets(buf, 80, in); assert(res != NULL); } items = sscanf(buf, "%" SCANF_PREFIX "f %" SCANF_PREFIX "f %u", x[i], x[i] + 1, xb + i); if (items != 3) { (void) fprintf(stderr, "Could not read vertex %u from file \"%s\"\n", i, name); del_tri2d(t2); (void) fclose(in); return 0; } } /*edges */ for (i = 0; i < edges; i++) { res = fgets(buf, 80, in); assert(res != NULL); while (!feof(in) && buf[0] == '#') { res = fgets(buf, 80, in); assert(res != NULL); } items = sscanf(buf, "%u %u %u", e[i], e[i] + 1, eb + i); if (items != 3) { (void) fprintf(stderr, "Could not read edge %u from file \"%s\"\n", i, name); del_tri2d(t2); (void) fclose(in); return 0; } } /*triangles */ for (i = 0; i < triangles; i++) { res = fgets(buf, 80, in); assert(res != NULL); while (!feof(in) && buf[0] == '#') { res = fgets(buf, 80, in); assert(res != NULL); } items = sscanf(buf, "%u %u %u", t[i], t[i] + 1, t[i] + 2); if (items != 3) { (void) fprintf(stderr, "Could not read triangle %u from file \"%s\"\n", i, name); del_tri2d(t2); (void) fclose(in); return 0; } } (void) fclose(in); return t2; }
// calculate fft and then get the power arma::mat powerFFT(arma::mat mat, int nfft) { arma::cx_mat cmat = fft(mat, nfft); arma::mat result = square(real(cmat)) + square(imag(cmat)); return result; }
int main(int argc, char* argv[]) { // Choose a Butcher's table or define your own. ButcherTable bt(butcher_table_type); if (bt.is_explicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage explicit R-K method.", bt.get_size()); if (bt.is_diagonally_implicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage diagonally implicit R-K method.", bt.get_size()); if (bt.is_fully_implicit()) Hermes::Mixins::Loggable::Static::info("Using a %d-stage fully implicit R-K method.", bt.get_size()); // Load the mesh. Mesh mesh; MeshReaderH2D mloader; mloader.load("square.mesh", &mesh); // Initial mesh refinements. for(int i = 0; i < INIT_REF_NUM; i++) mesh.refine_all_elements(); // Convert initial condition into a Solution<std::complex<double> >. CustomInitialCondition psi_time_prev(&mesh); Solution<std::complex<double> > psi_time_new(&mesh); // Initialize the weak formulation. double current_time = 0; CustomWeakFormGPRK wf(h, m, g, omega); // Initialize boundary conditions. DefaultEssentialBCConst<std::complex<double> > bc_essential("Bdy", 0.0); EssentialBCs<std::complex<double> > bcs(&bc_essential); // Create an H1 space with default shapeset. H1Space<std::complex<double> > space(&mesh, &bcs, P_INIT); int ndof = space.get_num_dofs(); Hermes::Mixins::Loggable::Static::info("ndof = %d", ndof); // Initialize the FE problem. DiscreteProblem<std::complex<double> > dp(&wf, &space); // Initialize views. ScalarView sview_real("Solution - real part", new WinGeom(0, 0, 600, 500)); ScalarView sview_imag("Solution - imaginary part", new WinGeom(610, 0, 600, 500)); sview_real.fix_scale_width(80); sview_imag.fix_scale_width(80); // Initialize Runge-Kutta time stepping. RungeKutta<std::complex<double> > runge_kutta(&wf, &space, &bt); // Time stepping: int ts = 1; int nstep = (int)(T_FINAL/time_step + 0.5); for(int ts = 1; ts <= nstep; ts++) { // Perform one Runge-Kutta time step according to the selected Butcher's table. Hermes::Mixins::Loggable::Static::info("Runge-Kutta time step (t = %g s, time step = %g s, stages: %d).", current_time, time_step, bt.get_size()); try { runge_kutta.setTime(current_time); runge_kutta.setTimeStep(time_step); runge_kutta.rk_time_step_newton(&psi_time_prev, &psi_time_new); } catch(Exceptions::Exception& e) { e.printMsg(); throw Hermes::Exceptions::Exception("Runge-Kutta time step failed"); } // Show the new time level solution. char title[100]; sprintf(title, "Solution - real part, Time %3.2f s", current_time); sview_real.set_title(title); sprintf(title, "Solution - imaginary part, Time %3.2f s", current_time); sview_imag.set_title(title); RealFilter real(&psi_time_new); ImagFilter imag(&psi_time_new); sview_real.show(&real); sview_imag.show(&imag); // Copy solution for the new time step. psi_time_prev.copy(&psi_time_new); // Increase current time and time step counter. current_time += time_step; ts++; } // Wait for the view to be closed. View::wait(); return 0; }
DSPFLOAT fm_Demodulator::demodulate (DSPCOMPLEX z) { DSPFLOAT res; DSPFLOAT I, Q; #define DCAlpha 0.0001 //#define DCAlpha 0.000001 if (abs (z) <= 0.001) I = Q = 0.001; // do not make these 0 too often else { I = real (z) / abs (z); Q = imag (z) / abs (z); } z = DSPCOMPLEX (I, Q); switch (selectedDecoder) { default: case FM1DECODER: res = Imin1 * (Q - Qmin2) - Qmin1 * (I - Imin2); res /= Imin1 * Imin1 + Qmin1 * Qmin1; Imin2 = Imin1; Qmin2 = Qmin1; fm_afc = (1 - DCAlpha) * fm_afc + DCAlpha * res; res = (res - fm_afc) * fm_cvt; res /= K_FM; break; case FM2DECODER: res = arg (z * DSPCOMPLEX (Imin1, - Qmin1)); fm_afc = (1 - DCAlpha) * fm_afc + DCAlpha * res; res = (res - fm_afc) * fm_cvt; res /= K_FM; break; case FM3DECODER: res = myAtan. atan2 (Q * Imin1 - I * Qmin1, I * Imin1 + Q * Qmin1); fm_afc = (1 - DCAlpha) * fm_afc + DCAlpha * res; res = (res - fm_afc) * fm_cvt; res /= K_FM; break; // case FM4DECODER: myfm_pll -> do_pll (z); // lowpass the NCO frequency term to get a DC offset fm_afc = (1 - DCAlpha) * fm_afc + DCAlpha * myfm_pll -> getPhaseIncr (); res = (myfm_pll -> getPhaseIncr () - fm_afc) * fm_cvt; res /= K_FM; break; case FM5DECODER: res = (Imin1 * Q - Qmin1 * I + 1.0) / 2.0; res = Arcsine [(int)(res * ArcsineSize)]; fm_afc = (1 - DCAlpha) * fm_afc + DCAlpha * res; res = (res - fm_afc) * fm_cvt; res /= K_FM; break; } // // and shift ... Imin1 = I; Qmin1 = Q; return res; }
/* //////////////////////////////////////////////////////////////////////////// -- testing zdot */ int main( int argc, char** argv ) { magma_int_t info = 0; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); const float one = MAGMA_S_MAKE(1.0, 0.0); const float zero = MAGMA_S_MAKE(0.0, 0.0); float alpha; TESTING_INIT(); magma_s_matrix a={Magma_CSR}, b={Magma_CSR}, x={Magma_CSR}, y={Magma_CSR}, skp={Magma_CSR}; printf("%%=======================================================================================================================================================================\n"); printf("\n"); printf(" | runtime | GFLOPS\n"); printf("%% n num_vecs | CUDOT CUGEMV MAGMAGEMV MDOT MDGM MDGM_SHFL | CUDOT CUGEMV MAGMAGEMV MDOT MDGM MDGM_SHFL\n"); printf("%%------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"); printf("\n"); for( magma_int_t num_vecs=1; num_vecs <= 32; num_vecs += 1 ) { for( magma_int_t n=500000; n < 500001; n += 10000 ) { int iters = 10; float computations = (2.* n * iters * num_vecs); #define ENABLE_TIMER #ifdef ENABLE_TIMER real_Double_t mdot1, mdot2, mdgm1, mdgm2, magmagemv1, magmagemv2, cugemv1, cugemv2, cudot1, cudot2; real_Double_t mdot_time, mdgm_time, mdgmshf_time, magmagemv_time, cugemv_time, cudot_time; #endif CHECK( magma_svinit( &a, Magma_DEV, n, num_vecs, one, queue )); CHECK( magma_svinit( &b, Magma_DEV, n, 1, one, queue )); CHECK( magma_svinit( &x, Magma_DEV, n, 8, one, queue )); CHECK( magma_svinit( &y, Magma_DEV, n, 8, one, queue )); CHECK( magma_svinit( &skp, Magma_DEV, 1, num_vecs, zero, queue )); // warm up CHECK( magma_sgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); // CUDOT #ifdef ENABLE_TIMER cudot1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { for( int l=0; l<num_vecs; l++){ alpha = magma_sdot( n, a.dval+l*a.num_rows, 1, b.dval, 1, queue ); //cudaDeviceSynchronize(); } //cudaDeviceSynchronize(); } #ifdef ENABLE_TIMER cudot2 = magma_sync_wtime( queue ); cudot_time=cudot2-cudot1; #endif // CUGeMV #ifdef ENABLE_TIMER cugemv1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { magma_sgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue ); } #ifdef ENABLE_TIMER cugemv2 = magma_sync_wtime( queue ); cugemv_time=cugemv2-cugemv1; #endif // MAGMAGeMV #ifdef ENABLE_TIMER magmagemv1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { magmablas_sgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue ); } #ifdef ENABLE_TIMER magmagemv2 = magma_sync_wtime( queue ); magmagemv_time=magmagemv2-magmagemv1; #endif // MDOT #ifdef ENABLE_TIMER mdot1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { for( int c = 0; c<num_vecs/2; c++ ){ CHECK( magma_smdotc( n, 2, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } for( int c = 0; c<num_vecs%2; c++ ){ CHECK( magma_smdotc( n, 1, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } //h++; } #ifdef ENABLE_TIMER mdot2 = magma_sync_wtime( queue ); mdot_time=mdot2-mdot1; #endif // MDGM #ifdef ENABLE_TIMER mdgm1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { CHECK( magma_sgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); //h++; } #ifdef ENABLE_TIMER mdgm2 = magma_sync_wtime( queue ); mdgm_time=mdgm2-mdgm1; #endif // MDGM_shfl #ifdef ENABLE_TIMER mdgm1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { CHECK( magma_sgemvmdot_shfl( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } #ifdef ENABLE_TIMER mdgm2 = magma_sync_wtime( queue ); mdgmshf_time=mdgm2-mdgm1; #endif //magma_sprint_gpu(num_vecs,1,skp.dval,num_vecs); //Chronometry #ifdef ENABLE_TIMER printf("%d %d %e %e %e %e %e %e || %e %e %e %e %e %e\n", int(n), int(num_vecs), cudot_time/iters, (cugemv_time)/iters, (magmagemv_time)/iters, (mdot_time)/iters, (mdgm_time)/iters, (mdgmshf_time)/iters, computations/(cudot_time*1e9), computations/(cugemv_time*1e9), computations/(magmagemv_time*1e9), computations/(mdot_time*1e9), computations/(mdgm_time*1e9), computations/(mdgmshf_time*1e9) ); #endif magma_smfree(&a, queue ); magma_smfree(&b, queue ); magma_smfree(&x, queue ); magma_smfree(&y, queue ); magma_smfree(&skp, queue ); } //printf("%%================================================================================================================================================\n"); //printf("\n"); //printf("\n"); } // use alpha to silence compiler warnings if ( isnan( real( alpha ))) { info = -1; } cleanup: magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* This function creates a logarithmic stepped vector of values starting at the given start value, ending with the given stop value and containing points elements. */ void logsweep::create (nr_double_t start, nr_double_t stop, int points) { vector v = logspace (start, stop, points); setSize (points); for (int i = 0; i < points; i++) set (i, real (v.get (i))); }
int tmatrix<nr_type_t>::isFinite (void) { for (int i = 0; i < rows * cols; i++) if (!std::isfinite (real (data[i]))) return 0; return 1; }
int main() { #ifdef _OPENMP const u_char num_procs = omp_get_num_procs(); //!< number of available processors #else const u_char num_procs = 1; #endif std::cerr << "processors in use: " << short(num_procs) << std::endl; // generation of childrens, e.g.: only root = 0, grand-children = 2 // total number of nodes, including (childsbyDimension) boundary elements real simulationTime = g_span[dimX]/g_velocity*5; // 5 periods #ifdef REGULAR monores_grid_t grid(g_level); #else multires_grid_t grid(g_level); #endif auto start = std::chrono::steady_clock::now(); do { grid.timeStep(); } while(grid.getTime() < simulationTime); auto done = std::chrono::steady_clock::now(); std::cerr << "simulation time passed: " << grid.getTime() << std::endl; double elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(done - start).count(); std::cerr << "calculation time: " << elapsed_time << std::endl; size_t size = grid.size(); size_t NN = pow(1 << g_level, g_dimension); std::cerr << "used nodes: " << size << "/" << NN << "=" << real(size)/NN << std::endl; // output file #ifndef REGULAR grid.unfold(g_level); #endif std::cerr << "after unfold: size = " << grid.size() << std::endl; std::ofstream file("/tmp/output.txt"); file << "# x y phi" << std::endl; for(const point_t point: grid) { // std::cerr << point.m_x[dimX] << " : " << point.m_phi << std::endl; /* */ file << boost::format("%e %e %e\n") % point.m_x[dimX] % point.m_x[dimY] // % point.m_index[dimX] // % point.m_index[dimY] % point.m_phi; /* file << boost::format("%e ") % point.m_phi; static size_t count = 0; const size_t N = (1 << g_level); if (++count % N == 0 ) file << std::endl; */ } file.close(); return 0; }
static DFBResult __CoreInputDeviceDispatch__Dispatch( CoreInputDevice *obj, FusionID caller, int method, void *ptr, unsigned int length, void *ret_ptr, unsigned int ret_size, unsigned int *ret_length ) { D_UNUSED DFBResult ret; DirectFB::IInputDevice_Real real( core_dfb, obj ); switch (method) { case CoreInputDevice_SetKeymapEntry: { D_UNUSED CoreInputDeviceSetKeymapEntry *args = (CoreInputDeviceSetKeymapEntry *) ptr; CoreInputDeviceSetKeymapEntryReturn *return_args = (CoreInputDeviceSetKeymapEntryReturn *) ret_ptr; D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_SetKeymapEntry\n" ); return_args->result = real.SetKeymapEntry( args->key_code, &args->entry ); if (return_args->result == DFB_OK) { } *ret_length = sizeof(CoreInputDeviceSetKeymapEntryReturn); return DFB_OK; } case CoreInputDevice_ReloadKeymap: { D_UNUSED CoreInputDeviceReloadKeymap *args = (CoreInputDeviceReloadKeymap *) ptr; CoreInputDeviceReloadKeymapReturn *return_args = (CoreInputDeviceReloadKeymapReturn *) ret_ptr; D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_ReloadKeymap\n" ); return_args->result = real.ReloadKeymap( ); if (return_args->result == DFB_OK) { } *ret_length = sizeof(CoreInputDeviceReloadKeymapReturn); return DFB_OK; } case CoreInputDevice_SetConfiguration: { D_UNUSED CoreInputDeviceSetConfiguration *args = (CoreInputDeviceSetConfiguration *) ptr; CoreInputDeviceSetConfigurationReturn *return_args = (CoreInputDeviceSetConfigurationReturn *) ret_ptr; D_DEBUG_AT( DirectFB_CoreInputDevice, "=-> CoreInputDevice_SetConfiguration\n" ); return_args->result = real.SetConfiguration( &args->config ); if (return_args->result == DFB_OK) { } *ret_length = sizeof(CoreInputDeviceSetConfigurationReturn); return DFB_OK; } } return DFB_NOSUCHMETHOD; }
/** \brief ensure this remains a valid rotation */ void normalize() { (*this) /= std::sqrt( real()*real() + imag()*imag() ); }
inline rot_complex operator*(const rot_complex<T2> &r) const { return rot_complex( real() * r.real() - imag() * r.imag(), imag() * r.real() + real() * r.imag() ); }
inline void invert(rot_complex<T2> &out) const { T denom = real()*real() + imag()*imag(); out.real() = real() / denom; out.imag() = imag() / denom; }
void PreconCG::operator()(cudaColorSpinorField &x, cudaColorSpinorField &b) { profile.Start(QUDA_PROFILE_INIT); // Check to see that we're not trying to invert on a zero-field source const double b2 = norm2(b); if(b2 == 0){ profile.Stop(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x=b; param.true_res = 0.0; param.true_res_hq = 0.0; } int k=0; int rUpdate=0; cudaColorSpinorField* minvrPre; cudaColorSpinorField* rPre; cudaColorSpinorField* minvr; cudaColorSpinorField* minvrSloppy; cudaColorSpinorField* p; ColorSpinorParam csParam(b); cudaColorSpinorField r(b); if(K) minvr = new cudaColorSpinorField(b); csParam.create = QUDA_ZERO_FIELD_CREATE; cudaColorSpinorField y(b,csParam); mat(r, x, y); // => r = A*x; double r2 = xmyNormCuda(b,r); csParam.setPrecision(param.precision_sloppy); cudaColorSpinorField tmpSloppy(x,csParam); cudaColorSpinorField Ap(x,csParam); cudaColorSpinorField *r_sloppy; if(param.precision_sloppy == x.Precision()) { r_sloppy = &r; minvrSloppy = minvr; }else{ csParam.create = QUDA_COPY_FIELD_CREATE; r_sloppy = new cudaColorSpinorField(r,csParam); if(K) minvrSloppy = new cudaColorSpinorField(*minvr,csParam); } cudaColorSpinorField *x_sloppy; if(param.precision_sloppy == x.Precision() || !param.use_sloppy_partial_accumulator) { csParam.create = QUDA_REFERENCE_FIELD_CREATE; x_sloppy = &x; }else{ csParam.create = QUDA_COPY_FIELD_CREATE; x_sloppy = new cudaColorSpinorField(x,csParam); } cudaColorSpinorField &xSloppy = *x_sloppy; cudaColorSpinorField &rSloppy = *r_sloppy; if(&x != &xSloppy){ copyCuda(y, x); // copy x to y zeroCuda(xSloppy); }else{ zeroCuda(y); // no reliable updates // NB: check this } const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false; if(K){ csParam.create = QUDA_COPY_FIELD_CREATE; csParam.setPrecision(param.precision_precondition); rPre = new cudaColorSpinorField(rSloppy,csParam); // Create minvrPre minvrPre = new cudaColorSpinorField(*rPre); globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; p = new cudaColorSpinorField(*minvrSloppy); }else{ p = new cudaColorSpinorField(rSloppy); } profile.Stop(QUDA_PROFILE_INIT); profile.Start(QUDA_PROFILE_PREAMBLE); double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver double heavy_quark_res = 0.0; // heavy quark residual if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z); int heavy_quark_check = 10; // how often to check the heavy quark residual double alpha = 0.0, beta=0.0; double pAp; double rMinvr = 0; double rMinvr_old = 0.0; double r_new_Minvr_old = 0.0; double r2_old = 0; r2 = norm2(r); double rNorm = sqrt(r2); double r0Norm = rNorm; double maxrx = rNorm; double maxrr = rNorm; double delta = param.delta; if(K) rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); profile.Stop(QUDA_PROFILE_PREAMBLE); profile.Start(QUDA_PROFILE_COMPUTE); quda::blas_flops = 0; int steps_since_reliable = 1; const int maxResIncrease = 0; while(!convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter){ matSloppy(Ap, *p, tmpSloppy); double sigma; bool breakdown = false; pAp = reDotProductCuda(*p,Ap); alpha = (K) ? rMinvr/pAp : r2/pAp; Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy); // r --> r - alpha*A*p r2_old = r2; r2 = real(cg_norm); sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k-1 - r_k) breaks if(K) rMinvr_old = rMinvr; rNorm = sqrt(r2); if(rNorm > maxrx) maxrx = rNorm; if(rNorm > maxrr) maxrr = rNorm; int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0; int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0; // force a reliable update if we are within target tolerance (only if doing reliable updates) if( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1; if( !(updateR || updateX) ){ if(K){ r_new_Minvr_old = reDotProductCuda(rSloppy,*minvrSloppy); *rPre = rSloppy; globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); beta = (rMinvr - r_new_Minvr_old)/rMinvr_old; axpyZpbxCuda(alpha, *p, xSloppy, *minvrSloppy, beta); }else{ beta = sigma/r2_old; // use the alternative beta computation axpyZpbxCuda(alpha, *p, xSloppy, rSloppy, beta); } } else { // reliable update axpyCuda(alpha, *p, xSloppy); // xSloppy += alpha*p copyCuda(x, xSloppy); xpyCuda(x, y); // y += x // Now compute r mat(r, y, x); // x is just a temporary here r2 = xmyNormCuda(b, r); copyCuda(rSloppy, r); // copy r to rSloppy zeroCuda(xSloppy); // break-out check if we have reached the limit of the precision static int resIncrease = 0; if(sqrt(r2) > r0Norm && updateX) { // reuse r0Norm for this warningQuda("PCG: new reliable residual norm %e is greater than previous reliable residual norm %e", sqrt(r2), r0Norm); k++; rUpdate++; if(++resIncrease > maxResIncrease) break; }else{ resIncrease = 0; } rNorm = sqrt(r2); maxrr = rNorm; maxrx = rNorm; r0Norm = rNorm; ++rUpdate; if(K){ *rPre = rSloppy; globalReduce = false; (*K)(*minvrPre, *rPre); globalReduce = true; *minvrSloppy = *minvrPre; rMinvr = reDotProductCuda(rSloppy,*minvrSloppy); beta = rMinvr/rMinvr_old; xpayCuda(*minvrSloppy, beta, *p); // p = minvrSloppy + beta*p }else{ // standard CG - no preconditioning // explicitly restore the orthogonality of the gradient vector double rp = reDotProductCuda(rSloppy, *p)/(r2); axpyCuda(-rp, rSloppy, *p); beta = r2/r2_old; xpayCuda(rSloppy, beta, *p); steps_since_reliable = 0; } } breakdown = false; ++k; PrintStats("PCG", k, r2, b2, heavy_quark_res); } profile.Stop(QUDA_PROFILE_COMPUTE); profile.Start(QUDA_PROFILE_EPILOGUE); if(x.Precision() != param.precision_sloppy) copyCuda(x, xSloppy); xpyCuda(y, x); // x += y param.secs = profile.Last(QUDA_PROFILE_COMPUTE); double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9; reduceDouble(gflops); param.gflops = gflops; param.iter += k; if (k==param.maxiter) warningQuda("Exceeded maximum iterations %d", param.maxiter); if (getVerbosity() >= QUDA_VERBOSE) printfQuda("CG: Reliable updates = %d\n", rUpdate); // compute the true residual mat(r, x, y); double true_res = xmyNormCuda(b, r); param.true_res = sqrt(true_res / b2); // reset the flops counters quda::blas_flops = 0; mat.flops(); matSloppy.flops(); matPrecon.flops(); profile.Stop(QUDA_PROFILE_EPILOGUE); profile.Start(QUDA_PROFILE_FREE); if(K){ // These are only needed if preconditioning is used delete minvrPre; delete rPre; delete minvr; if(x.Precision() != param.precision_sloppy) delete minvrSloppy; } delete p; if(x.Precision() != param.precision_sloppy){ delete x_sloppy; delete r_sloppy; } profile.Stop(QUDA_PROFILE_FREE); return; }
static types::InternalType* import_sparse(int dataset) { types::Sparse* sp = nullptr; //get sparse dimensions int complex = 0; std::vector<int> pdims; int size = getDimsNode(dataset, &complex, pdims); //get non zeros count int nnz = 0; int datannz = getDataSetIdFromName(dataset, "__nnz__"); readInteger32Matrix(datannz, &nnz); if (nnz == 0) { closeList6(dataset); return new types::Sparse(pdims[0], pdims[1]); } //get inner vector int datain = getDataSetIdFromName(dataset, "__inner__"); int dimin = 0; int sizein = getDatasetInfo(datain, &complex, &dimin, NULL); std::vector<int> dimsin(dimin); sizein = getDatasetInfo(datain, &complex, &dimin, dimsin.data()); std::vector<int> in(sizein); int ret = readInteger32Matrix(datain, in.data()); if (ret < 0) { closeList6(dataset); return nullptr; } //get outer vector int dataout = getDataSetIdFromName(dataset, "__outer__"); int dimout = 0; int sizeout = getDatasetInfo(dataout, &complex, &dimout, NULL); std::vector<int> dimsout(dimout); sizeout = getDatasetInfo(dataout, &complex, &dimout, dimsout.data()); std::vector<int> out(sizeout); ret = readInteger32Matrix(dataout, out.data()); if (ret < 0) { closeList6(dataset); return nullptr; } //get data int ddata = getDataSetIdFromName(dataset, "__data__"); int dimdata = 0; int sizedata = getDatasetInfo(ddata, &complex, &dimdata, NULL); std::vector<int> dimsdata(dimdata); sizedata = getDatasetInfo(ddata, &complex, &dimdata, dimsdata.data()); std::vector<double> real(sizedata); if (complex) { std::vector<double> img(sizedata); ret = readDoubleComplexMatrix(ddata, real.data(), img.data()); if (ret < 0) { closeList6(dataset); return nullptr; } sp = new types::Sparse(pdims[0], pdims[1], nnz, in.data(), out.data(), real.data(), img.data()); } else { ret = readDoubleMatrix(ddata, real.data()); if (ret < 0) { closeList6(dataset); return nullptr; } sp = new types::Sparse(pdims[0], pdims[1], nnz, in.data(), out.data(), real.data(), nullptr); } closeList6(dataset); return sp; }
void exafmm_kernel::M2L(std::vector<real>& CiL, const std::vector<real> CjM, const std::array<std::vector<real>, NDIM>& d, integer N, std::vector<real>& L_r, std::vector<real>& L_i, std::vector<real>& Ynm) { integer Nynm; Nynm = (((N - 1) / 64) + 1) * 64; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { real rho = std::sqrt(d[0][i] * d[0][i] + d[1][i] * d[1][i] + d[2][i] * d[2][i]); real theta = std::acos(d[2][i] / rho); real phi = std::atan2(d[1][i], d[0][i]); real x = std::cos(theta); // x = cos(theta) real y = std::sin(theta); // y = sin(theta) real fact = 1; // Initialize 2 * m + 1 real pn = 1; // Initialize Legendre polynomial Pn real rhom = real(1.0) / rho; // Initialize rho^(-m-1) #pragma novector for (int m = 0; m != FMM_P; ++m) { // Loop over m in Ynm real eim_r = std::cos(real(m) * phi); real eim_i = std::sin(real(m) * phi); real p = pn; // Associated Legendre polynomial Pnm int npn = m * m + 2 * m; // Index of Ynm for m > 0 int nmn = m * m; // Index of Ynm for m < 0 Ynm[npn * Nynm + i] = rhom * p * prefactor[npn] * eim_r; // rho^(-m-1) * Ynm for m > 0 if (npn != nmn) { Ynm[nmn * Nynm + i] = rhom * p * prefactor[npn] * eim_i; // rho^(-m-1) * Ynm for m > 0 } real p1 = p; // Pnm-1 p = x * real(2 * m + 1) * p1; // Pnm using recurrence relation rhom /= rho; // rho^(-m-1) real rhon = rhom; // rho^(-n-1) #pragma novector for (int n = m + 1; n != FMM_P; ++n) { // Loop over n in Ynm int npm = n * n + n + m; // Index of Ynm for m > 0 int nmm = n * n + n - m; // Index of Ynm for m < 0 Ynm[npm * Nynm + i] = rhon * p * prefactor[npm] * eim_r; // rho^n * Ynm for m > 0 if (npm != nmm) { Ynm[nmm * Nynm + i] = rhon * p * prefactor[npm] * eim_i; // rho^n * Ynm for m > 0 } real p2 = p1; // Pnm-2 p1 = p; // Pnm-1 p = (x * real(2 * n + 1) * p1 - real(n + m) * p2) / real(n - m + 1); // Pnm using recurrence relation rhon /= rho; // rho^(-n-1) } // End loop over n in Ynm pn = -pn * fact * y; // Pn fact += real(2); // 2 * m + 1 } // End loop over m in Ynm } for (integer j = 0; j != FMM_P; ++j) { for (integer k = 0; k <= j; ++k) { const integer jkp = j * j + j + k; const integer jkm = j * j + j - k; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { L_r[i] = L_i[i] = real(0.0); } for (integer n = 0; n != FMM_P - j; ++n) { for (integer m = -n; m <= +n; ++m) { const integer nn = n * n + n; const integer nj = (n + j) * ((n + j) + 1); const integer jknm = jkp * FMM_P * FMM_P + n * n + n + m; const integer nmp = nn + std::abs(m); const integer nmm = nn - std::abs(m); const integer jnkmp = nj + std::abs(m - k); const integer jnkmm = nj - std::abs(m - k); real tmp_r, tmp_i; const real sgn = SGN(m-k); COMPLEX_MULT(tmp_r, tmp_i, CjM[nmp], SGN(m) * CjM[nmm], Cnm_r[jknm], Cnm_i[jknm]); const auto Yp = Ynm.data() + Nynm * jnkmp; const auto Ym = Ynm.data() + Nynm * jnkmm; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { COMPLEX_MULT_ADD(L_r[i], L_i[i], tmp_r, tmp_i, Yp[i], sgn * Ym[i]); } } } auto Cp = CiL.data() + N * jkp; auto Cm = CiL.data() + N * jkm; //#pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { Cp[i] = L_r[i]; Cm[i] = (k == 0) ? L_r[i] : L_i[i]; } } } }
bool Complex::operator>=(const Complex & a) const { if (real() >= a.real() || imag() >= a.imag()) return true; else return false; }
/* EAM July 2004 (revised to dynamic buffer July 2005) * There are probably an infinite number of things that can * go wrong if the user mis-matches arguments and format strings * in the call to sprintf, but I hope none will do worse than * result in a garbage output string. */ void f_sprintf(union argument *arg) { struct value a[10], *args; struct value num_params; struct value result; char *buffer; int bufsize; char *next_start, *outpos, tempchar; int next_length; char *prev_start; int prev_pos; int i, remaining; int nargs = 0; int save_errno; enum DATA_TYPES spec_type; /* Retrieve number of parameters from top of stack */ pop(&num_params); nargs = num_params.v.int_val; if (nargs > 10) { /* Fall back to slow but sure allocation */ args = gp_alloc(sizeof(struct value)*nargs, "sprintf args"); } else args = a; for (i=0; i<nargs; i++) pop(&args[i]); /* pop next argument */ /* Make sure we got a format string of some sort */ if (args[nargs-1].type != STRING) int_error(NO_CARET,"First parameter to sprintf must be a format string"); /* Allocate space for the output string. If this isn't */ /* long enough we can reallocate a larger space later. */ bufsize = 80 + strlen(args[nargs-1].v.string_val); buffer = gp_alloc(bufsize, "f_sprintf"); /* Copy leading fragment of format into output buffer */ outpos = buffer; next_start = args[nargs-1].v.string_val; next_length = strcspn(next_start,"%"); strncpy(outpos, next_start, next_length); next_start += next_length; outpos += next_length; /* Format the remaining sprintf() parameters one by one */ prev_start = next_start; prev_pos = next_length; remaining = nargs - 1; /* If the user has set an explicit LC_NUMERIC locale, apply it */ /* to sprintf calls during expression evaluation. */ set_numeric_locale(); /* Each time we start this loop we are pointing to a % character */ while (remaining-->0 && next_start[0] && next_start[1]) { struct value *next_param = &args[remaining]; /* Check for %%; print as literal and don't consume a parameter */ if (!strncmp(next_start,"%%",2)) { next_start++; do { *outpos++ = *next_start++; } while(*next_start && *next_start != '%'); remaining++; continue; } next_length = strcspn(next_start+1,"%") + 1; tempchar = next_start[next_length]; next_start[next_length] = '\0'; spec_type = sprintf_specifier(next_start); /* string value <-> numerical value check */ if ( spec_type == STRING && next_param->type != STRING ) int_error(NO_CARET,"f_sprintf: attempt to print numeric value with string format"); if ( spec_type != STRING && next_param->type == STRING ) int_error(NO_CARET,"f_sprintf: attempt to print string value with numeric format"); #ifdef HAVE_SNPRINTF /* Use the format to print next arg */ save_errno = errno; switch(spec_type) { case INTGR: snprintf(outpos,bufsize-(outpos-buffer), next_start, (int)real(next_param)); break; case CMPLX: snprintf(outpos,bufsize-(outpos-buffer), next_start, real(next_param)); break; case STRING: snprintf(outpos,bufsize-(outpos-buffer), next_start, next_param->v.string_val); break; default: int_error(NO_CARET,"internal error: invalid spec_type"); } #if _MSC_VER buffer[bufsize-1] = '\0'; /* VC++ is not ANSI-compliant */ if (errno == ERANGE) errno = save_errno; #endif #else /* FIXME - this is bad; we should dummy up an snprintf equivalent */ switch(spec_type) { case INTGR: sprintf(outpos, next_start, (int)real(next_param)); break; case CMPLX: sprintf(outpos, next_start, real(next_param)); break; case STRING: sprintf(outpos, next_start, next_param->v.string_val); break; default: int_error(NO_CARET,"internal error: invalid spec_type"); } #endif next_start[next_length] = tempchar; next_start += next_length; outpos = &buffer[strlen(buffer)]; /* Check whether previous parameter output hit the end of the buffer */ /* If so, reallocate a larger buffer, go back and try it again. */ if (strlen(buffer) >= bufsize-2) { bufsize *= 2; buffer = gp_realloc(buffer, bufsize, "f_sprintf"); next_start = prev_start; outpos = buffer + prev_pos; remaining++; continue; } else { prev_start = next_start; prev_pos = outpos - buffer; } } /* Copy the trailing portion of the format, if any */ /* We could just call snprintf(), but it doesn't check for */ /* whether there really are more variables to handle. */ i = bufsize - (outpos-buffer); while (*next_start && --i > 0) { if (*next_start == '%' && *(next_start+1) == '%') next_start++; *outpos++ = *next_start++; } *outpos = '\0'; FPRINTF((stderr," snprintf result = \"%s\"\n",buffer)); push(Gstring(&result, buffer)); free(buffer); /* Free any strings from parameters we have now used */ for (i=0; i<nargs; i++) gpfree_string(&args[i]); if (args != a) free(args); /* Return to C locale for internal use */ reset_numeric_locale(); }
double Complex::mod() const { return sqrt(sqr(real()) + sqr(imag())); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zunmbr */ int main( int argc, char** argv ) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; double Cnorm, error, dwork[1]; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t ione = 1; magma_int_t m, n, k, mi, ni, mm, nn, nq, size, info; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t nb, ldc, lda, lwork, lwork_max; magmaDoubleComplex *C, *R, *A, *work, *tau, *tauq, *taup; double *d, *e; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); // need slightly looser bound (60*eps instead of 30*eps) for some tests opts.tolerance = max( 60., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); // test all combinations of input parameters magma_vect_t vect [] = { MagmaQ, MagmaP }; magma_side_t side [] = { MagmaLeft, MagmaRight }; magma_trans_t trans[] = { Magma_ConjTrans, MagmaNoTrans }; printf("%% M N K vect side trans CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||QC||_F\n"); printf("%%==============================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int ivect = 0; ivect < 2; ++ivect ) { for( int iside = 0; iside < 2; ++iside ) { for( int itran = 0; itran < 2; ++itran ) { for( int iter = 0; iter < opts.niter; ++iter ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; nb = magma_get_zgebrd_nb( m, n ); ldc = m; // A is nq x k (vect=Q) or k x nq (vect=P) // where nq=m (left) or nq=n (right) nq = (side[iside] == MagmaLeft ? m : n ); mm = (vect[ivect] == MagmaQ ? nq : k ); nn = (vect[ivect] == MagmaQ ? k : nq); lda = mm; // MBR calls either MQR or MLQ in various ways if ( vect[ivect] == MagmaQ ) { if ( nq >= k ) { gflops = FLOPS_ZUNMQR( m, n, k, side[iside] ) / 1e9; } else { if ( side[iside] == MagmaLeft ) { mi = m - 1; ni = n; } else { mi = m; ni = n - 1; } gflops = FLOPS_ZUNMQR( mi, ni, nq-1, side[iside] ) / 1e9; } } else { if ( nq > k ) { gflops = FLOPS_ZUNMLQ( m, n, k, side[iside] ) / 1e9; } else { if ( side[iside] == MagmaLeft ) { mi = m - 1; ni = n; } else { mi = m; ni = n - 1; } gflops = FLOPS_ZUNMLQ( mi, ni, nq-1, side[iside] ) / 1e9; } } // workspace for gebrd is (mm + nn)*nb // workspace for unmbr is m*nb or n*nb, depending on side lwork_max = max( (mm + nn)*nb, max( m*nb, n*nb )); // this rounds it up slightly if needed to agree with lwork query below lwork_max = int( real( magma_zmake_lwork( lwork_max ))); TESTING_MALLOC_CPU( C, magmaDoubleComplex, ldc*n ); TESTING_MALLOC_CPU( R, magmaDoubleComplex, ldc*n ); TESTING_MALLOC_CPU( A, magmaDoubleComplex, lda*nn ); TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork_max ); TESTING_MALLOC_CPU( d, double, min(mm,nn) ); TESTING_MALLOC_CPU( e, double, min(mm,nn) ); TESTING_MALLOC_CPU( tauq, magmaDoubleComplex, min(mm,nn) ); TESTING_MALLOC_CPU( taup, magmaDoubleComplex, min(mm,nn) ); // C is full, m x n size = ldc*n; lapackf77_zlarnv( &ione, ISEED, &size, C ); lapackf77_zlacpy( "Full", &m, &n, C, &ldc, R, &ldc ); size = lda*nn; lapackf77_zlarnv( &ione, ISEED, &size, A ); // compute BRD factorization to get Householder vectors in A, tauq, taup //lapackf77_zgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info ); magma_zgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info ); if (info != 0) { printf("magma_zgebrd returned error %d: %s.\n", (int) info, magma_strerror( info )); } if ( vect[ivect] == MagmaQ ) { tau = tauq; } else { tau = taup; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); lapackf77_zunmbr( lapack_vect_const( vect[ivect] ), lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ), &m, &n, &k, A, &lda, tau, C, &ldc, work, &lwork_max, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_zunmbr returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ // query for workspace size lwork = -1; magma_zunmbr( vect[ivect], side[iside], trans[itran], m, n, k, A, lda, tau, R, ldc, work, lwork, &info ); if (info != 0) { printf("magma_zunmbr (lwork query) returned error %d: %s.\n", (int) info, magma_strerror( info )); } lwork = (magma_int_t) MAGMA_Z_REAL( work[0] ); if ( lwork < 0 || lwork > lwork_max ) { printf("Warning: optimal lwork %d > allocated lwork_max %d\n", (int) lwork, (int) lwork_max ); lwork = lwork_max; } gpu_time = magma_wtime(); magma_zunmbr( vect[ivect], side[iside], trans[itran], m, n, k, A, lda, tau, R, ldc, work, lwork, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_zunmbr returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== compute relative error |QC_magma - QC_lapack| / |QC_lapack| =================================================================== */ size = ldc*n; blasf77_zaxpy( &size, &c_neg_one, C, &ione, R, &ione ); Cnorm = lapackf77_zlange( "Fro", &m, &n, C, &ldc, dwork ); error = lapackf77_zlange( "Fro", &m, &n, R, &ldc, dwork ) / (magma_dsqrt(m*n) * Cnorm); printf( "%5d %5d %5d %c %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) m, (int) n, (int) k, lapacke_vect_const( vect[ivect] ), lapacke_side_const( side[iside] ), lapacke_trans_const( trans[itran] ), cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed") ); status += ! (error < tol); TESTING_FREE_CPU( C ); TESTING_FREE_CPU( R ); TESTING_FREE_CPU( A ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( d ); TESTING_FREE_CPU( e ); TESTING_FREE_CPU( taup ); TESTING_FREE_CPU( tauq ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } }}} // end ivect, iside, itran printf( "\n" ); } opts.cleanup(); TESTING_FINALIZE(); return status; }
Complex Complex::operator*(const double & b) const { return Complex(b * real(), b * imag()); }
/*! \brief Poles, zeros and elliptic cells coefficients computation. * * Inputs are: * \arg eps : Oscillations in working bandwidth * \arg A : Weakening of attenuated band * \arg f : Low frequency transition edge [Hz] * \arg fb : High frequency transition edge [Hz] * \arg fe : Sampling frequency [Hz] * \arg NCellMax : Maximum number of cells * * Outputs are : * \arg NCells : number of cells, must be positive and lower or equal to NCellMax * \arg poles : poles of the cells (imaginary part positive or null) * \arg zero : zeros of the cells (imaginary part positive or null) * \arg CoefA : coefficient A of the cells * \arg CoefB : coefficient B of the cells * \arg CoefC : coefficient C of the cells * \arg CoefD : coefficient D of the cells * * Computations : \n * \f$ \omega_c = fb \cdot 2 \cdot \pi \f$ \n * \f$ \omega_r = fa \cdot 2 \cdot \pi \f$ \n * \f$ T = 1/fe \f$ \n * \f$ dk1 = \frac{eps}{\sqrt{A^2-1}} \f$ \n * \f$ dk = \frac{tan(\omega_c \cdot \frac{T}{2})}{tan(\omega_r \cdot \frac{T}{2})} \f$ \n * \f$ dkp = \sqrt{1-dk^2} \f$ \n * \f$ ak1 = ak(dk) \textrm{ using ak function} \f$ \n * \f$ ak2 = ak(dk1) \textrm{ using ak function} \f$ \n * \f$ ak3 = ak(dkp) \textrm{ using ak function} \f$ \n * \f$ ak4 = cak(dk1^2) \textrm{ using cak function} \f$ \n * * \f$ N = \frac{1}{2} \cdot ceil \big( ceil(\frac{ak4 \cdot ak1}{ak2 \cdot ak3}+1) \big) \f$ \n * N is checked : \f$ 0 \le N \le NCellMax \f$ \n * * \f$ U_0 = -\frac{ak3}{ak4} \cdot \frac{alog(1+\sqrt{(1+eps^2)})}{eps} \f$ \n * \arg for \f$ i=0,\dots,N-1 \f$ \n * \f$ xmag = 2 \cdot i \cdot \frac{ak1}{2 \cdot N} \f$ \n * \f$ zeros[i] = -ak3 + I \cdot xmag \f$ \n * \f$ poles[i] = U_0 + I \cdot xmag \f$ \n * \arg for \f$ i=0,\dots,2 \cdot N-1 \f$ \n * \f$ Q = real(zeros[mod(i,N)]) \f$ \n * \f$ R = imag(zeros[mod(i,N)]) \f$ \n * \f$ a1 = sn(Q, dkp, ak3, ak1) \textrm{ using sn function} \f$ \n * \f$ b1 = sn(R, dk, ak1, ak3) \textrm{ using sn function} \f$ \n * \f$ \sigma= \left\{ \begin{array}{ll} 0 & \textrm{if } i \le N \\ a1 \cdot \sqrt{(1-a1^2)*(1-b1^2)} \cdot \frac{dn}{de} & else \end{array} \right. \f$ \n * \f$ dn = \sqrt{1-{(dk \cdot b1)}^2} \f$ \n * \f$ de = 1-{(a1 \cdot dn)}^2 \f$ \n * \f$ \omega = b1 \cdot \frac{\sqrt{(1-(dkp \cdot a1)^2)}}{de} \f$ \n * \f$ C[i] = -2 \cdot \sigma \cdot \omega_c \f$ \n * \f$ D[i] = (\sigma^2 + \omega^2) \cdot {\omega_c}^2 \f$ \n * \f$ \sigma = \sigma \cdot tan(\omega_c \cdot \frac{T}{2}) \f$ \n * \f$ \omega = \omega \cdot tan(\omega_c \cdot \frac{T}{2}) \f$ \n * \f$ \left\{ \begin{array}{ll} \textrm{if } i \le N & zeros[i] = \sigma + I \cdot \omega \\ \textrm{else} & poles[i] = \sigma + I \cdot \omega \end{array} \right. \f$ \n * \arg for \f$ i=2 \cdot N-1,\dots,0 \f$ \n * \f$ \left\{ \begin{array}{ll} \textrm{if } i \le N -1 & (X,Y)=(real(zeros[i]),imag(zeros[i])) \\ else & (X,Y)=(real(poles[i]),imag(poles[i])) \end{array} \right. \f$ \n * \f$ Re = \frac{1-X^2-Y^2}{(1-X)^2+Y^2} \f$ \n * \f$ V = \frac{2 \cdot Y}{(1-X)^2+Y^2} \f$ \n * \f$ c1 = -2 \cdot Re \f$ \n * \f$ d1 = Re^2 + V^2 \f$ \n * \f$ \left\{ \begin{array}{ll} \textrm{if } i \le N -1 & \left\{ \begin{array}{l} zeros[i]=Re+I \cdot V \\ CoefB[i]=c1 \\ CoefA[i]=d1 \end{array} \right. \\ else & \left\{ \begin{array}{l} poles[i-N]=Re+I \cdot V \\ CoefD[i-N]=c1 \\ CoefC[i-N]=d1 \end{array} \right. \end{array} \right. \f$ \n */ void elli(double eps, // Oscillations in working bandwidth double A, // Weakening of attenuated band double fa, // Low frequency transition edge [Hz] double fb, // High frequency transition edge [Hz] double fe, // Sampling frequency [Hz] int NCellMax, // Maximum number of cells int *NCells, // Output number of cells std::complex<double> poles[], // Poles of the cells (imaginary part >= 0) std::complex<double> zeros[], // Zeros of the cells (imaginary part >= 0) double CoefA[], // A coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2) double CoefB[], // B coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2) double CoefC[], // C coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2) double CoefD[] // D coef from H(Z) = (1+BZ-1+AZ-2) / (1+DZ-1+BZ-2) ) { double *C, *D; double T, dk1, dk, dkp, ak1, ak2, ak3, ak4; double U0, xmag, Q, R, sigma; double a1, b1, dn, de, omega; double X, Y, Re, V, c1, d1; double wr, wc; int i, j, N, NDeu; std::complex<double> p; wc = fb*2*M_PI; wr = fa*2*M_PI; T = 1/fe; dk1 = eps/sqrt(A*A-1); dk = tan(wc*T/2.) / tan(wr*T/2.); dkp = sqrt(1-dk*dk); ak1 = ak(dk); ak2 = ak(dk1); ak3 = ak(dkp); //ak4 = ak(sqrt(1-dk1*dk1)); ak4 = cak(dk1*dk1); N = (int)(ak4*ak1/(ak2*ak3)); N = (N/2) + 1; NDeu = 2*N; *NCells = N; if ( N<0 || N>NCellMax) { fprintf(stderr,"\n\n *** Maximum number of cells (%d) reached. %d cells needed. Exiting ... ***\n\n",NCellMax,N); exit(1); return; } C = (double*)malloc(NDeu*sizeof(double)); D = (double*)malloc(NDeu*sizeof(double)); U0 = (-ak3/ak4)*alog((1+sqrtl(1+eps*eps))/eps); for (i=1;i<=N;i++) { xmag = (2*i-1)*ak1/NDeu; zeros[i-1] = -ak3 + Im*xmag; poles[i-1] = U0 + Im*xmag; } for (i=1; i<=NDeu; i++) { if (i<=N) { Q = real(zeros[i-1]); R = imag(zeros[i-1]); } else { Q = real(poles[i-N-1]); R = imag(poles[i-N-1]); } sigma = 0.; a1 = sn(Q, dkp, ak3, ak1); b1 = sn(R, dk, ak1, ak3); dn = sqrt(1.-(dk*b1)*(dk*b1)); de = 1-(a1*dn)*(a1*dn); if ( i > N ) { sigma = a1*sqrt((1-a1*a1)*(1-b1*b1))*dn/de; } omega = b1*sqrt(1-(dkp*a1)*(dkp*a1))/de; C[i-1] = -2*sigma*wc; D[i-1] = (sigma*sigma + omega*omega)*wc*wc; sigma = sigma*tan(wc*T/2.); omega = omega*tan(wc*T/2.); if ( i <= N ) { zeros[i-1] = sigma + Im*omega; } else { poles[i-N-1] = sigma + Im*omega; } } for (i=NDeu; i>=1; i--) { j = i; if ( j > NDeu/2 ) j = j - NDeu/2; if ( i<=N ) { X = real(zeros[i-1]); X = 0; Y = imag(zeros[i-1]); } else { X = real(poles[i-N-1]); Y = imag(poles[i-N-1]); } Re = (1-X*X-Y*Y)/((1-X)*(1-X)+Y*Y); V = 2*Y/((1-X)*(1-X)+Y*Y); c1 = -2*Re; d1 = Re*Re + V*V; if ( i <= N ) { zeros[i-1] = Re + Im*V; CoefB[i-1] = c1; CoefA[i-1] = d1; } else { poles[i-N-1] = Re + Im*V; CoefD[i-N-1] = c1; CoefC[i-N-1] = d1; } } free(C); free(D); return; }
Complex Complex::operator/(double b) const { return Complex(real() / b, imag() / b); }
/* Compute analytic dynamics */ void computeAnalyticOutputs(std::map<const std::string, bool> &outs, struct PARAMETERS * p) { // energy spacing in bulk std::complex <double> dE ((p->kBandTop-p->kBandEdge)/(p->Nk-1), 0); // bulk-QD coupling std::complex <double> Vee (p->Vnobridge[0], 0); // rate constant (can be defined also as K/2) std::complex <double> K = std::complex <double> (3.1415926535,0)*pow(Vee,2)/dE; // time std::complex <double> t (0, 0); // energy differences std::complex <double> wnm (0, 0); std::complex <double> wnnp (0, 0); std::complex <double> wnpm (0, 0); // coefficients std::complex <double> cm (0, 0); std::complex <double> cn (0, 0); std::complex <double> cn_term1 (0, 0); std::complex <double> cn_term2 (0, 0); std::complex <double> cn_diag (0, 0); std::complex <double> cn_offdiag (0, 0); double cn_tot; // complex numbers are dumb std::complex <double> C0 (0.0, 0.0); std::complex <double> C1 (1.0, 0.0); std::complex <double> NEGC1 (-1.0, 0.0); std::complex <double> CI (0.0, 1.0); std::complex <double> NEGCI (0.0, -1.0); // unpack params a bit int Nk = p->Nk; int Nc = p->Nc; int Ik = p->Ik; int Ic = p->Ic; int N = p->NEQ; double * energies = &(p->energies[0]); double * startWfn = &(p->startWfn[0]); // Create matrix of energy differences std::vector<std::complex <double>> Elr (Nk*Nc, std::complex <double> (0.0, 0.0)); for (int ii = 0; ii < Nk; ii++) { for (int jj = 0; jj < Nc; jj++) { // array follows convention that first index is for QC state // e.g. Elr[i*Nc + j] = E_{ij} Elr[ii*Nc + jj] = std::complex <double> (energies[Ik + ii] - energies[Ic + jj], 0); } } #ifdef DEBUG_ANALYTIC std::cout << std::endl; std::cout << "Energy gaps:" << std::endl; for (int ii = 0; ii < Nc*Nk; ii++) { std::cout << Elr[ii] << " "; } std::cout << std::endl; std::cout << std::endl; #endif // Create matrix of prefactors for each QC (n) state std::complex <double> pref; std::vector<std::complex <double>> prefQC (Nk*Nc, std::complex <double> (0.0, 0.0)); for (int ii = 0; ii < Nk; ii++) { // V*c_l/(E_{lr} + i\kappa) pref = Vee*(std::complex <double> (startWfn[Ik + ii], startWfn[Ik + N + ii])); std::cout << startWfn[Ik + ii] << "," << pref << " "; for (int jj = 0; jj < Nc; jj++) { prefQC[ii*Nc + jj] = pref/(Elr[ii*Nc + jj] + CI*K); } } #ifdef DEBUG_ANALYTIC std::cout << std::endl; for (int ii = 0; ii < Nc*Nk; ii++) { std::cout << prefQC[ii] << " "; } std::cout << std::endl; std::cout << std::endl; #endif // calculate wavefunction coefficients on electron-accepting side over time std::vector<std::complex <double>> crt (Nc*p->numOutputSteps, std::complex <double> (0.0, 0.0)); int timeIndex = 0; for (std::complex <double> t = C0; std::real(t) <= p->tout; t += std::complex <double> (p->tout/p->numOutputSteps, 0.0), timeIndex++) { for (int ii = 0; ii < Nc; ii++) { // TODO add bit for multiple state terms for (int jj = 0; jj < Nk; jj++) { crt[timeIndex*Nc + ii] += prefQC[jj]*(exp(NEGCI*Elr[jj*Nc + ii]*t) - exp(NEGC1*K*t)); } } } // calculate populations on electron-accepting side over time std::vector<double> Prt (Nc*p->numOutputSteps, 0.0); for (int ii = 0; ii <= p->numOutputSteps; ii++) { for (int jj = 0; jj < Nc; jj++) { Prt[ii*Nc + jj] = pow(real(crt[ii*Nc + jj]), 2) + pow(imag(crt[ii*Nc + jj]), 2); } } if (isOutput(outs, "analytic_tcprob.out")) { std::ofstream output("analytic_tcprob.out"); for (int ii = 0; ii <= p->numOutputSteps; ii++) { output << p->times[ii]; for (int jj = 0; jj < Nc; jj++) { output << " " << Prt[ii*Nc + jj]; output << " " << real(crt[ii*Nc + jj]) << " " << imag(crt[ii*Nc + jj]); } output << std::endl; } output.close(); } return; }
Complex Complex::cc() const { return Complex(real(), -imag()); }
inline void Density::add_k_point_contribution_rg(K_point* kp__) { PROFILE("sirius::Density::add_k_point_contribution_rg"); int nfv = ctx_.num_fv_states(); double omega = unit_cell_.omega(); auto& fft = ctx_.fft_coarse(); /* get preallocated memory */ double* ptr = static_cast<double*>(ctx_.memory_buffer(fft.local_size() * (ctx_.num_mag_dims() + 1) * sizeof(double))); mdarray<double, 2> density_rg(ptr, fft.local_size(), ctx_.num_mag_dims() + 1, "density_rg"); density_rg.zero(); if (fft.pu() == GPU) { density_rg.allocate(memory_t::device); density_rg.zero<memory_t::device>(); } fft.prepare(kp__->gkvec().partition()); /* non-magnetic or collinear case */ if (ctx_.num_mag_dims() != 3) { /* loop over pure spinor components */ for (int ispn = 0; ispn < ctx_.num_spins(); ispn++) { /* trivial case */ if (!kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col().global_index_size()) { continue; } for (int i = 0; i < kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col().local_size(); i++) { int j = kp__->spinor_wave_functions(ispn).pw_coeffs().spl_num_col()[i]; double w = kp__->band_occupancy(j + ispn * nfv) * kp__->weight() / omega; ///* transform to real space; in case of GPU wave-function stays in GPU memory */ fft.transform<1>(kp__->gkvec().partition(), kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<CPU>(0, i)); //switch (fft.pu()) { // case CPU: { // fft.transform<1>(kp__->gkvec().partition(), // kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<CPU>(0, i)); // break; // } // case GPU: { // fft.transform<1, GPU>(kp__->gkvec().partition(), // kp__->spinor_wave_functions(ispn).pw_coeffs().extra().template at<GPU>(0, i)); // break; // } //} /* add to density */ switch (fft.pu()) { case CPU: { #pragma omp parallel for schedule(static) for (int ir = 0; ir < fft.local_size(); ir++) { auto z = fft.buffer(ir); density_rg(ir, ispn) += w * (std::pow(z.real(), 2) + std::pow(z.imag(), 2)); } break; } case GPU: { #ifdef __GPU update_density_rg_1_gpu(fft.local_size(), fft.buffer().at<GPU>(), w, density_rg.at<GPU>(0, ispn)); #else TERMINATE_NO_GPU #endif break; } } } } } else { /* non-collinear case */
Complex Complex::operator-(const Complex & a) const { return Complex(real() - a.real(), imag() - a.imag()); }
void digisource::initTR (void) { vector * values = getPropertyVector ("times"); T = real (sum (*values)); initDC (); }
sgetris::backgrounds::flakes::object::object( boost::program_options::variables_map &_program_options, sge::renderer::device &_renderer, // The flakes get the loader because in a later version more than one flake image could be loaded // from a directory texture_manager &_texture_manager) : texture_manager_( _texture_manager), ss_( _renderer), clock_(), frame_timer_( sgetris::diff_timer::parameters( fcppt::chrono::second(1) ) ), flakes_() { texture_manager_.load( media_path()/FCPPT_TEXT("backgrounds")/FCPPT_TEXT("flakes")/FCPPT_TEXT("textures.ini")); fcppt::random::uniform<sprite::scalar> xposition_rng( fcppt::random::make_inclusive_range( static_cast<sprite::scalar>(0), static_cast<sprite::scalar>( _renderer->screen_size().w()))), yposition_rng( fcppt::random::make_inclusive_range( static_cast<sprite::scalar>(0), static_cast<sprite::scalar>( _renderer.screen_size().h()))); // Those pairs are real to avoid ugly casting below, they'll be cast // one time sprite::scalar std::pair<real,real> size_range( static_cast<real>( _renderer.screen_size().w())* _program_options["flakes-size-min"].as<real>(), static_cast<real>( _renderer.screen_size().w())* _program_options["flakes-size-max"].as<real>()); std::pair<real,real> speed_range( _program_options["flakes-speed-min"].as<real>(), _program_options["flakes-speed-max"].as<real>()); fcppt::random::uniform<real> rng( fcppt::random::make_inclusive_range( static_cast<real>( 0), static_cast<real>( 1))); for( flake_count i = 0, fc = _program_options["flakes-count"].as<flake_count>(); i < fc; ++i) { // Roll the dice real const v = rng(); sprite::vector const position( xposition_rng(), yposition_rng()); sprite::dim const size( fcppt::math::dim::structure_cast<sprite::dim>( fcppt::math::dim::make( size_range.first + v * (size_range.second - size_range.first), size_range.first + v * (size_range.second - size_range.first)))); sprite::scalar const speed = static_cast<sprite::scalar>( speed_range.first + v * (speed_range.second - speed_range.first)); flakes_.push_back( new flake( real( v), _renderer.screen_size(), sprite::parameters() .system( &ss_) .order( 0u) .pos( position) .texture( texture_manager_.texture( FCPPT_TEXT("flake"))) .size( size), speed)); } _renderer.state( sge::renderer::state::list (sge::renderer::state::bool_::clear_backbuffer = true) (sge::renderer::state::color::clear_color = sge::image::colors::black() ) ); }
// ax + by + cz = d; a^2 + b^2 + c^2 = 1; void CalPlane(vector<Point3D>& cPointSet, GridMap &cgridmap) { int pointNum = cPointSet.size(); MatrixXf pointSet(pointNum,3); Matrix3f A(3,3); A<<0, 0, 0, 0, 0, 0, 0, 0, 0; for(int i = 0; i < pointNum; i++) { pointSet(i,0) = cPointSet[i].X; pointSet(i,1) = cPointSet[i].Y; pointSet(i,2) = cPointSet[i].Z; } float xBar = pointSet.col(0).sum()/pointNum; float yBar = pointSet.col(1).sum()/pointNum; float zBar = pointSet.col(2).sum()/pointNum; for(int i = 0; i < pointNum; i++) { A(0, 0) += (pointSet(i,0) - xBar)*(pointSet(i,0) - xBar); A(0, 1) += (pointSet(i,0) - xBar)*(pointSet(i,1) - yBar); A(0, 2) += (pointSet(i,0) - xBar)*(pointSet(i,2) - zBar); A(1, 0) += (pointSet(i,1) - yBar)*(pointSet(i,0) - xBar); A(1, 1) += (pointSet(i,1) - yBar)*(pointSet(i,1) - yBar); A(1, 2) += (pointSet(i,1) - yBar)*(pointSet(i,2) - zBar); A(2, 0) += (pointSet(i,2) - zBar)*(pointSet(i,0) - xBar); A(2, 1) += (pointSet(i,2) - zBar)*(pointSet(i,1) - yBar); A(2, 2) += (pointSet(i,2) - zBar)*(pointSet(i,2) - zBar); } EigenSolver<MatrixXf> es(A); VectorXcf eigvals = es.eigenvalues(); Vector3f eigvalues; eigvalues<<real(eigvals(0)), real(eigvals(1)), real(eigvals(2)); MatrixXcf eigvect = es.eigenvectors(); Matrix3f eigvectors; eigvectors <<real(eigvect(0,0)), real(eigvect(0,1)), real(eigvect(0,2)), real(eigvect(1,0)), real(eigvect(1,1)), real(eigvect(1,2)), real(eigvect(2,0)), real(eigvect(2,1)), real(eigvect(2,2)); float minValue = eigvalues(0); int minNum = 0; for(int i = 1; i < 3; i++) { if(eigvalues(i) < minValue) { minValue = eigvalues(i); minNum = i; } } float planePara[4] = {0, 0, 0, 0}; planePara[0] = eigvectors(0, minNum); planePara[1] = eigvectors(1, minNum); planePara[2] = eigvectors(2, minNum); planePara[3] = planePara[0]*xBar + planePara[1]*yBar + planePara[2]*zBar; if(planePara[0] < 0) { for(int i = 0; i < 4; i++) { cgridmap.planePara[i] = -planePara[i]; } } else { for(int i = 0; i < 4; i++) { cgridmap.planePara[i] = planePara[i]; } } float distance1 = 0; float distance2 = sqrt(cgridmap.planePara[0]*cgridmap.planePara[0] + cgridmap.planePara[1]*cgridmap.planePara[1] + cgridmap.planePara[2]*cgridmap.planePara[2]); for(int i = 0; i < pointNum; i++) { distance1 += fabs(cgridmap.planePara[0]*pointSet(i,0) + cgridmap.planePara[1]*pointSet(i,1) + cgridmap.planePara[2]*pointSet(i,2) - cgridmap.planePara[3]); } cgridmap.planeDegree = distance1/distance2/pointNum; cgridmap.normalVector = acos(cgridmap.planePara[1]/distance2)/3.1415926*180; }
/* * parameters * * i: line number for deciding format * nl: line number for finding data usually identical */ void putline(int i, int nl) { int c, lf, ct, form, lwid, vspf, ip = -1, cmidx, exvspen, vforml; int vct, chfont; char *s, *size, *fn; watchout=vspf=exvspen=0; if (i==0) once=0; if (i==0 && ( allflg || boxflg || dboxflg)) fullwide(0, dboxflg? '=' : '-'); if (instead[nl]==0 && fullbot[nl] ==0) for(c=0; c<ncol; c++) { s = table[nl][c].col; if (s==0) continue; if (vspen(s)) { for(ip=nl; ip<nlin; ip=next(ip)) if (!vspen(s=table[ip][c].col)) break; if (s>(char *)0 && s<(char *)128) fprintf(tabout, ".ne \\n(%c|u+\\n(.Vu\n",s); continue; } if (point(s)) continue; fprintf(tabout, ".ne \\n(%c|u+\\n(.Vu\n",s); watchout=1; } if (linestop[nl]) fprintf(tabout, ".mk #%c\n", linestop[nl]+'a'-1); lf = prev(nl); if (instead[nl]) { puts(instead[nl]); return; } if (fullbot[nl]) { switch (ct=fullbot[nl]) { case '=': case '-': fullwide(nl,ct); } return; } for(c=0; c<ncol; c++) { if (instead[nl]==0 && fullbot[nl]==0) if (vspen(table[nl][c].col)) vspf=1; if (lf>=0) if (vspen(table[lf][c].col)) vspf=1; } if (vspf) { fprintf(tabout, ".nr #^ \\n(\\*(#du\n"); fprintf(tabout, ".nr #- \\n(#^\n"); /* current line position relative to bottom */ } vspf=0; chfont=0; for(c=0; c<ncol; c++) { s = table[nl][c].col; if (s==0) continue; chfont |= (int)(font[stynum[nl]][c]); if (point(s) ) continue; lf=prev(nl); if (lf>=0 && vspen(table[lf][c].col)) fprintf(tabout, ".if (\\n(%c|+\\n(^%c-1v)>\\n(#- .nr #- +(\\n(%c|+\\n(^%c-\\n(#--1v)\n",s,'a'+c,s,'a'+c); else fprintf(tabout, ".if (\\n(%c|+\\n(#^-1v)>\\n(#- .nr #- +(\\n(%c|+\\n(#^-\\n(#--1v)\n",s,s); } if (allflg && once>0 ) fullwide(i,'-'); once=1; runtabs(i, nl); if (allh(nl) && !pr1403) { fprintf(tabout, ".nr %d \\n(.v\n", SVS); fprintf(tabout, ".vs \\n(.vu-\\n(.sp\n"); } if (chfont) fprintf(tabout, ".nr %2d \\n(.f\n", S1); fprintf(tabout, ".nr 35 1m\n"); fprintf(tabout, "\\&"); vct = 0; for(c=0; c<ncol; c++) { if (watchout==0 && i+1<nlin && (lf=left(i,c, &lwid))>=0) { tohcol(c); drawvert(lf, i, c, lwid); vct += 2; } if (rightl && c+1==ncol) continue; vforml=i; for(lf=prev(nl); lf>=0 && vspen(table[lf][c].col); lf=prev(lf)) vforml= lf; form= ctype(vforml,c); if (form != 's') { ct = c+CLEFT; if (form=='a') ct = c+CMID; if (form=='n' && table[nl][c].rcol && lused[c]==0) ct= c+CMID; fprintf(tabout, "\\h'|\\n(%du'", ct); } s= table[nl][c].col; fn = font[stynum[vforml]][c]; size = csize[stynum[vforml]][c]; if (*size==0)size=0; switch(ct=ctype(vforml, c)) { case 'n': case 'a': if (table[nl][c].rcol) { if (lused[c]) /*Zero field width*/ { ip = prev(nl); if (ip>=0) if (vspen(table[ip][c].col)) { if (exvspen==0) { fprintf(tabout, "\\v'-(\\n(\\*(#du-\\n(^%cu", c+'a'); if (cmidx) fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a'); vct++; fprintf(tabout, "'"); exvspen=1; } } fprintf(tabout, "%c%c",F1,F2); puttext(s,fn,size); fprintf(tabout, "%c",F1); } s= table[nl][c].rcol; form=1; break; } /* FALLTHROUGH */ case 'c': form=3; break; case 'r': form=2; break; case 'l': form=1; break; case '-': case '=': if (real(table[nl][c].col)) fprintf(stderr,gettext("%s: line %d: Data ignored on table line %d\n"), ifile, iline-1, i+1); makeline(i,c,ct); continue; default: continue; } if (realsplit ? rused[c]: used[c]) /*Zero field width*/ { /* form: 1 left, 2 right, 3 center adjust */ if (ifline(s)) { makeline(i,c,ifline(s)); continue; } if (filler(s)) { printf("\\l'|\\n(%du\\&%s'", c+CRIGHT, s+2); continue; } ip = prev(nl); cmidx = ctop[stynum[nl]][c]==0; if (ip>=0) if (vspen(table[ip][c].col)) { if (exvspen==0) { fprintf(tabout, "\\v'-(\\n(\\*(#du-\\n(^%cu", c+'a'); if (cmidx) fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a'); vct++; fprintf(tabout, "'"); } } fprintf(tabout, "%c", F1); if (form!= 1) fprintf(tabout, "%c", F2); if (vspen(s)) vspf=1; else puttext(s, fn, size); if (form !=2) fprintf(tabout, "%c", F2); fprintf(tabout, "%c", F1); } if (ip>=0) if (vspen(table[ip][c].col)) { exvspen = (c+1 < ncol) && vspen(table[ip][c+1].col) && (topat[c] == topat[c+1]) && (cmidx == (ctop [stynum[nl]][c+1]==0)) && (left(i,c+1,&lwid)<0); if (exvspen==0) { fprintf(tabout, "\\v'(\\n(\\*(#du-\\n(^%cu", c+'a'); if (cmidx) fprintf(tabout, "-((\\n(#-u-\\n(^%cu)/2u)", c+'a'); vct++; fprintf(tabout, "'"); } } else exvspen=0; /* if lines need to be split for gcos here is the place for a backslash */ if (vct > 7 && c < ncol) { fprintf(tabout, "\n.sp-1\n\\&"); vct=0; } } fprintf(tabout, "\n"); if (allh(nl) && !pr1403) fprintf(tabout, ".vs \\n(%du\n", SVS); if (watchout) funnies(i,nl); if (vspf) { for(c=0; c<ncol; c++) if (vspen(table[nl][c].col) && (nl==0 || (lf=prev(nl))<0 || !vspen(table[lf][c].col))) { fprintf(tabout, ".nr ^%c \\n(#^u\n", 'a'+c); topat[c]=nl; } } }