// v should be preallocated with right size int readFileToVector(vecd & v, const char * fname){ ifstream is(fname, ios::binary); if (is.fail()) { printf("Can't open %s.\n", fname); return -1; } is.read(reinterpret_cast<char*>(v.data()), v.size() * sizeof(double)); if (is.fail()) { printf("Can't read %s.\n", fname); return -2; } return 0; }
void widget3d::plot(const vecd& x, const vecd& y, const vecd& z, const options& o) { if (x.size() != y.size() && y.size() != z.size()) { throw std::logic_error("sizes missmatched"); } size_t size = std::min(x.size(), y.size()); for (size_t i = 0; i < size; ++i) { pixel current(x.at(i), y.at(i), z.at(i)); m_pixels.push_back(std::make_pair(current, o)); } }
void bench( const vecd & vis , const vecd & gcf , complexd * uvg ){ memset(uvg, 0, fullSize * 2 * sizeof(double)); printf("%s, gcf size %d:\n", chunked ? "Chunked" : "Linear", gcf_size); typedef const complexd (*gcf_t)[over][over][gcf_size][gcf_size]; for(int rep = 0; rep < 4; rep++) { kern( t2 , uvg , *reinterpret_cast<gcf_t>(gcf.data()) , *reinterpret_cast<typename dlayout<chunked>::type*>(vis.data()) , gridSize ); } }
void runGridder(int nthreads, SGridderConfig cfg, const vecd &vis, const vecd &gcf, const vecd &uvg) { SGridder gridder(cfg); // Use this to figure out the argument order for the kernel. The // general scheme is: // 1. All input buffers in alphabetical order // 2. All scalar parameters in alphabetical order // 3. The JIT user context // 4. The output buffer(s) in alphabetical order // The call below will give 1+2, but not 3+4! //for( Argument a : gridder.uvg.infer_arguments() ) { printf("%s ", a.name.data()); } //puts(""); // Compile. We measure the time, for sanity-checks Target target_plain(get_target_from_environment().os, Target::X86, 64, { Target::SSE41, Target::AVX}); target_plain.set_feature(Target::NoAsserts, true); timespec ts; clock_gettime(CLOCK_REALTIME, &ts); kernfun_t kernfun = reinterpret_cast<kernfun_t>(gridder.uvg.compile_jit(target_plain)); printf("\t%ld\t -", clock_diff(&ts)); fflush(stdout); // Calculate number of chunks int nchunks = numOfVis / cfg.steps / cfg.chunks; for (int i = 0; i < 4; i++) { clock_gettime(CLOCK_REALTIME, &ts); #pragma omp parallel for num_threads(nthreads) for (int j = 0; j < nthreads; j++) { memset(tohost(uvg.data() + j * fullSize * 2), 0, fullSize * 2 * sizeof(double)); } #pragma omp parallel for schedule(dynamic) num_threads(nthreads) for (int chunk = nchunks-1; chunk >= 0; chunk--){ // Set input and output buffers for Halide. It should only use // an appropriate chunk of visibilities and output to its own grid. int vis0 = cfg.steps * cfg.chunks * chunk; int vis1 = cfg.steps * cfg.chunks * (chunk + 1); if (chunk == nchunks - 1) vis1 = numOfVis; buffer_t vis_buffer = mkHalideBuf<double>(vis1 - vis0,5) , gcf_buffer = mkHalideBuf<double>(over2, cfg.gcfSize, cfg.gcfSize, 2) , uvg_buffer = mkHalideBuf<double>(cfg.gridSize, cfg.gridSize, 2) ; vis_buffer.host = tohost(vis.data() + 5 * vis0); vis_buffer.min[1] = vis0; gcf_buffer.host = tohost(gcf.data()); uvg_buffer.host = tohost(uvg.data() + omp_get_thread_num() * fullSize * 2); JITUserContext jit_context; timespec ts2; clock_gettime(CLOCK_REALTIME, &ts2); kernfun(&gcf_buffer, &vis_buffer, cfg.gridSize, t2, &jit_context, &uvg_buffer); } printf("\t%ld", clock_diff(&ts)); fflush(stdout); } }