void Ctmp_ra_file::decode(void* d) const { int cx = get_cblocks_x(); int cy = get_cblocks_y(); int c_tiles = get_c_tiles(); if (cx == 1 && cy == 1) cy = c_tiles; memset(d, 0, cb_image()); int i = 0; for (int ty = 0; ty < cy; ty++) { for (int tx = 0; tx < cx; tx++) { if (get_index1()[i] != 0xff) { const byte* r = get_image(i); byte* w = reinterpret_cast<byte*>(d) + 24 * (tx + 24 * cx * ty); for (int y = 0; y < 24; y++) { memcpy(w, r, 24); r += 24; w += 24 * cx; } } i++; } } }
int main(int argc, char** argv) { if (3 == argc) { L = std::atoi(argv[1]); T = std::atoi(argv[2]); } std::cout << "L = " << L << "\t" << "T = " << T << "\t" << "threads = " << omp_get_max_threads() << "\t" << std::endl; lx.resize(2); lx[0] = L; lx[1] = L; vol.resize(2); vol[0] = 1; vol[1] = lx[0]; N = lx[0] * lx[1]; wsfpf.resize(N); a.resize(N); #pragma omp parallel for for (int i = 0; i < N; i++) { for (int k = 0; k < 2; k++) { int x = (i / vol[k] ) % lx[k]; if (x == lx[k] - 1) wsfpf[i].set(k, true); else wsfpf[i].set(k, false); } } double start, end; start = omp_get_wtime(); for (int t = 0; t < T; t++) { #pragma omp parallel for for (int y = 0; y < lx[1]; y++) { for (int x = 0; x < lx[0]; x++) { int j; int i = x + lx[0] * y; get_index1(j, x, y, (i * i) & 1); a[i] += (j & 1) + (a[i] & 2); } } } end = omp_get_wtime(); std::cout << "xy based: " << end - start << std::endl; start = omp_get_wtime(); for (int t = 0; t < T; t++) { #pragma omp parallel for for (int i = 0; i < N; i++) { int j; get_index2(j, i, (i * i) & 1); a[i] += (j & 1) + (a[i] & 2); } } end = omp_get_wtime(); std::cout << "i based: " << end - start << std::endl; int sum = 0; #pragma omp parallel for reduction(+:sum) for (int i = 0; i < N; i++) { sum += a[i] & 1; } std::cout << sum << std::endl; return 0; }