bool all_equal_device(T* a_d, int N, T k) { thrust::device_vector<T> b_d(N, 0); thrust::device_vector<T> true_d(N, 1); all_equal_kernel<T><<<1, 16>>>(a_d, thrust::raw_pointer_cast(&b_d[0]), N, k); return thrust::equal(b_d.begin(), b_d.end(), true_d.begin()); }
void obliqueangle_engine::render(level_ptr level, boost::shared_ptr<image_operations> oper) { pos_t iw, ih; part_c.get_obliqueangle_limits(iw, ih); BlockRotation b_r(s, level->get_blocks()); BlockRotation b_d(s, level->get_data()); BlockRotation bl_r(s, level->get_blocklight()); BlockRotation sl_r(s, level->get_skylight()); BlockRotation hm_r(s, level->get_heightmap()); pos_t bmt = iw * ih; boost::scoped_array<bool> blocked(new bool[bmt]); memset(blocked.get(), 0x0, sizeof(bool) * bmt); oper->set_limits(iw + 1, ih); for (int z = mc::MapZ - 1; z >= 0; z--) { for (int x = mc::MapX - 1; x >= 0; x--) { bool cave_initial = true; bool hell_initial = true; bool hell_solid = true; hm_r.set_xz(x, z); b_r.set_xz(x, z); b_d.set_xz(x, z); bl_r.set_xz(x, z); sl_r.set_xz(x, z); int hmval = hm_r.get8(); if (s.hellmode) { for (int y = s.top; y >= s.bottom && hell_solid; y--) { hell_solid = !is_open(b_r.get8(y)); } } for (int y = s.top; y >= s.bottom; y--) { int bt = b_r.get8(y); if (s.cavemode && cave_ignore_block(s, y, bt, b_r, cave_initial)) { continue; } if (s.hellmode && !hell_solid && hell_ignore_block(s, y, bt, b_r, hell_initial)) { continue; } if (s.excludes[bt]) { continue; } point p(x, y, z); pos_t px, py; part_c.project_obliqueangle(p, px, py); color top, side; if (bt == mc::Wool) { int md = b_d.get4(y); top = mc::WoolColor[md]; side = mc::WoolColor[md]; } else if ((bt == mc::Step) || (bt == mc::DoubleStep)) { int md = b_d.get4(y); top = mc::StepColor[md]; side = mc::StepColor[md]; } else { top = mc::MaterialColor[bt]; side = mc::MaterialSideColor[bt]; } if (mc::MaterialModes[bt] == mc::Block) { int bp = px + iw * py; if (blocked[bp]) { continue; } blocked[bp] = top.is_opaque(); } int bl = bl_r.get4(y + 1); apply_shading(s, bl, sl_r.get4(y + 1), hmval, y, top); apply_shading(s, bl, -1, hmval, y, side); switch(mc::MaterialModes[bt]) { case mc::Block: oper->add_pixel(px, py, top); oper->add_pixel(px + 1, py, top); oper->add_pixel(px, py + 1, side); side.lighten(0x20); oper->add_pixel(px + 1, py + 1, side); break; case mc::HalfBlock: oper->add_pixel(px, py + 1, top); oper->add_pixel(px + 1, py + 1, top); break; case mc::TorchBlock: oper->add_pixel(px, py, top); top.lighten(0x20); top.a -= 0xb0; oper->add_pixel(px - 1, py, top); oper->add_pixel(px + 2, py, top); oper->add_pixel(px, py - 1, top); oper->add_pixel(px, py + 1, top); oper->add_pixel(px, py + 1, side); break; } } } } }
int bind_default ( node_t *root, int stackOffset) { return b_d(root,stackOffset); }
bool GPUMatrixTest::performTest() { cout << "Creating 2x2 matrix" << endl; Matrix a(2, 2, generators::zero); a(0,1) = 2; a(1,0) = 4; cout << a; cout << "Creating 2x3 matrix" << endl; Matrix b(2, 3, generators::zero); b(0,0) = 3; b(1,0) = 5; b(0,1) = 7; b(1,2) = 6; cout << b; // Matrix * Matrix on CPU //const double correctResultMult[] = {10, 0, 12, 12, 28, 0}; Matrix c(2, 3); cout << "---" << endl << "Product on CPU:" << endl; a.multWithMatrix(b, &c); cout << c; // Matrix * Matrix on GPU GPUMatrix a_d(a); Matrix agpu(2,2, generators::zero); a_d.getMatrix(&agpu); cout << agpu << endl; //return true; GPUMatrix b_d(b); GPUMatrix c_d(c); a_d.multWithMatrix(b_d, &c_d); Matrix cgpu(2, 3, generators::zero); c_d.getMatrix(&cgpu); cout << "Product on GPU:" << endl << cgpu; srand(1); Matrix d(9, 10, generators::random); cout << "D = " << endl << d; GPUMatrix d_d(d); // Row sum GPUMatrix rsums_d(d.rows(), 1); // "col vector" d_d.rowSums(&rsums_d); Matrix rsums(d.rows(), 1); rsums_d.getMatrix(&rsums); cout << "Row sums of D: " << endl; cout << rsums << endl; for (unsigned int i = 0; i < d.rows(); ++i) { //cout << sums(i, 0) << " "; if (!epsilonCheck(rsums(i, 0), d.rowSum(i), 1e-6)) return false; } // Col sum GPUMatrix csums_d(1, d.cols()); // "row vector" d_d.colSums(&csums_d); Matrix csums(1, d.cols()); csums_d.getMatrix(&csums); cout << "Column sums of D: " << endl; cout << csums << endl; for (unsigned int i = 0; i < d.cols(); ++i) { //cout << sums(i, 0) << " "; if (!epsilonCheck(csums(0, i), d.colSum(i), 1e-6)) return false; } // Matrix + Matrix on GPU Matrix e(9, 10, generators::random); cout << "E = " << endl << e; GPUMatrix e_d(e); GPUMatrix f_d(9, 10); Matrix fgpu(9, 10, generators::zero); d_d.add(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D+E: " << endl << fgpu << endl; d_d.sub(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D-E: " << endl << fgpu << endl; d_d.elementWiseMult(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D.*E: " << endl << fgpu << endl; d_d.elementWiseDiv(e_d, &f_d); f_d.getMatrix(&fgpu); cout << "Result D./E: " << endl << fgpu << endl; d_d.elementWisePow(2.5, &f_d); f_d.getMatrix(&fgpu); cout << "Result D.^2.5: " << endl << fgpu << endl; // Scaling const double alpha = .5f; f_d.scale(alpha, 2, 4); f_d.getMatrix(&fgpu); cout << "Scale columns 2 to 4 by " << alpha << ":" << endl << fgpu << endl; // Zero of submatrix f_d.zero(2, 3, 7, 8); f_d.getMatrix(&fgpu); cout << "Set [2,3]->[7,8] to zero:" << endl << fgpu << endl; // Zero whole matrix f_d.zero(); f_d.getMatrix(&fgpu); cout << "Zero matrix:" << endl << fgpu << endl; // // Large matrix multiplication and verification against CPU gold standard // cout << "Matrix multiplication on CPU ... " << endl; int m = 999; int k = 199; int n = 1; Matrix left(m, k, generators::random); Matrix right(k, n, generators::unity); Matrix resultCPU(m, n); left.multWithMatrix(right, &resultCPU); cout << "Matrix multiplication on GPU ... " << endl; GPUMatrix leftGPU(left); GPUMatrix rightGPU(right); GPUMatrix resultGPU(resultCPU.rows(), resultCPU.cols()); Matrix resultGPUtransfer(resultCPU.rows(), resultCPU.cols()); leftGPU.multWithMatrix(rightGPU, &resultGPU); resultGPU.getMatrix(&resultGPUtransfer); //cout << resultGPUtransfer << endl; int nwarn = 0; for (unsigned int i = 0; i < resultCPU.rows(); ++i) { for (unsigned int j = 0; j < resultCPU.cols(); ++j) { if (abs(resultCPU(i, j) - resultGPUtransfer(i, j)) > 1e-3) { cout << "WARN " << i << " " << j << ": CPU = " << resultCPU(i, j) << "; GPU = " << resultGPUtransfer(i, j) << endl; nwarn++; if (nwarn > 50) return false; } } } return true; }
void fatiso_engine::render(level_ptr level, boost::shared_ptr<image_operations> oper) { BlockRotation b_r(s, level->get_blocks()); BlockRotation b_d(s, level->get_data()); BlockRotation bl_r(s, level->get_blocklight()); BlockRotation sl_r(s, level->get_skylight()); BlockRotation hm_r(s, level->get_heightmap()); pos_t iw, ih; part_c.get_fatiso_limits(iw, ih); pos_t bmt = iw * ih; boost::scoped_array<bool> blocked(new bool[bmt]); memset(blocked.get(), 0x0, sizeof(bool) * bmt); oper->set_limits(iw + 1, ih); for (int z = mc::MapZ - 1; z >= 0; z--) { for (int x = mc::MapX - 1; x >= 0; x--) { bool cave_initial = true; bool hell_initial = true; bool hell_solid = true; hm_r.set_xz(x, z); b_r.set_xz(x, z); b_d.set_xz(x, z); bl_r.set_xz(x, z); sl_r.set_xz(x, z); int hmval = hm_r.get8(); if (s.hellmode) { for (int y = s.top; y >= s.bottom && hell_solid; y--) { hell_solid = !is_open(b_r.get8(y)); } } for (int y = s.top; y >= s.bottom; y--) { int bt = b_r.get8(y); if (s.cavemode && cave_ignore_block(s, y, bt, b_r, cave_initial)) { continue; } if (s.hellmode && !hell_solid && hell_ignore_block(s, y, bt, b_r, hell_initial)) { continue; } if (s.excludes[bt]) { continue; } point p(x, y, z); pos_t px, py; part_c.project_fatiso(p, px, py); color top = blockColor_top(bt, y, b_d), side = blockColor_side(bt, y, b_d); if (mc::MaterialModes[bt] == mc::Block) { int bp = px + iw * py; if (blocked[bp]) { continue; } blocked[bp] = top.is_opaque() && bt != mc::Fence; } int bl = bl_r.get4(y + 1); apply_shading(s, bl, sl_r.get4(y + 1), hmval, y, top); apply_shading(s, bl, -1, hmval, y, side); color topdark(top); color toplight(top); color sidelight(side); if (bt == mc::Grass) { topdark.darken(0x20); toplight.darken(0x10); sidelight.lighten(0x20); } else { toplight = color(side); topdark = color(side); } oper->add_pixel(px + 0, py + 0, side); oper->add_pixel(px + 0, py + 1, side); oper->add_pixel(px + 0, py + 2, side); oper->add_pixel(px + 1, py - 1, side); oper->add_pixel(px + 1, py + 0, side); oper->add_pixel(px + 1, py + 1, side); oper->add_pixel(px + 2, py - 1, side); oper->add_pixel(px + 2, py + 0, side); oper->add_pixel(px + 2, py + 1, side); oper->add_pixel(px + 3, py - 2, side); oper->add_pixel(px + 3, py - 1, side); oper->add_pixel(px + 3, py + 0, side); oper->add_pixel(px - 1, py + 0, sidelight); oper->add_pixel(px - 1, py + 1, sidelight); oper->add_pixel(px - 1, py + 2, sidelight); oper->add_pixel(px - 2, py - 1, sidelight); oper->add_pixel(px - 2, py + 0, sidelight); oper->add_pixel(px - 2, py + 1, sidelight); oper->add_pixel(px - 3, py - 1, sidelight); oper->add_pixel(px - 3, py + 0, sidelight); oper->add_pixel(px - 3, py + 1, sidelight); oper->add_pixel(px - 4, py - 2, sidelight); oper->add_pixel(px - 4, py - 1, sidelight); oper->add_pixel(px - 4, py + 0, sidelight); oper->add_pixel(px + 0, py - 2, topdark); oper->add_pixel(px + 0, py - 1, topdark); oper->add_pixel(px + 1, py - 3, topdark); oper->add_pixel(px + 1, py - 2, topdark); oper->add_pixel(px + 2, py - 3, topdark); oper->add_pixel(px + 2, py - 2, topdark); oper->add_pixel(px + 3, py - 4, topdark); oper->add_pixel(px + 3, py - 3, topdark); oper->add_pixel(px - 1, py - 2, toplight); oper->add_pixel(px - 1, py - 1, toplight); oper->add_pixel(px - 2, py - 3, toplight); oper->add_pixel(px - 2, py - 2, toplight); oper->add_pixel(px - 3, py - 3, toplight); oper->add_pixel(px - 3, py - 2, toplight); oper->add_pixel(px - 4, py - 4, toplight); oper->add_pixel(px - 4, py - 3, toplight); oper->add_pixel(px - 3, py - 5, top); oper->add_pixel(px - 3, py - 4, top); oper->add_pixel(px - 2, py - 5, top); oper->add_pixel(px - 2, py - 4, top); oper->add_pixel(px - 1, py - 6, top); oper->add_pixel(px - 1, py - 5, top); oper->add_pixel(px - 1, py - 4, top); oper->add_pixel(px - 1, py - 3, top); oper->add_pixel(px + 0, py - 6, top); oper->add_pixel(px + 0, py - 5, top); oper->add_pixel(px + 0, py - 4, top); oper->add_pixel(px + 0, py - 3, top); oper->add_pixel(px + 1, py - 5, top); oper->add_pixel(px + 1, py - 4, top); oper->add_pixel(px + 2, py - 5, top); oper->add_pixel(px + 2, py - 4, top); } } } return; }