void generate_d( const std::string& src_filename, std::ostream& os, const GenerateOptions& options, const std::map<std::string, Type>&, const std::map<std::string, Type>& nonterminal_types, const std::vector<std::string>& tokens, const action_map_type& actions, const tgt::parsing_table& table) { std::string module_name = boost::filesystem::path(src_filename).stem().string(); // notice / URL / module / imports stencil( os, R"( // This file was automatically generated by Caper. // (http://jonigata.github.io/caper/caper.html) module ${module_name}; import std.array; import std.stdio; )", {"module_name", module_name} ); if (!options.external_token) { // token enumeration stencil( os, R"( enum Token { $${tokens} } string tokenLabel(Token t) { static string[] labels = [ $${labels} ]; return labels[t]; } )", {"tokens", [&](std::ostream& os){ for(const auto& token: tokens) { stencil( os, R"( ${prefix}${token}, )", {"prefix", options.token_prefix}, {"token", token} ); } }},
void RenderState::debugOverdraw(bool enable, bool clear) { if (Properties::debugOverdraw && mFramebuffer == 0) { if (clear) { scissor().setEnabled(false); stencil().clear(); } if (enable) { stencil().enableDebugWrite(); } else { stencil().disable(); } } }
TEST(TestISTLMatrix, AssembleMPI) { InspectMatrixSIM sim(1); sim.read("src/LinAlg/Test/refdata/petsc_test.xinp"); sim.opt.solver = SystemMatrix::ISTL; sim.preprocess(); sim.initSystem(SystemMatrix::ISTL); Matrix stencil(4,4); stencil(1,1) = stencil(2,2) = stencil(3,3) = stencil(4,4) = 1.0; for (int iel = 1; iel <= sim.getSAM()->getNoElms(); ++iel) sim.getMatrix()->assemble(stencil, *sim.getSAM(), iel); sim.getMatrix()->beginAssembly(); sim.getMatrix()->endAssembly(); // now inspect the matrix const ProcessAdm& adm = sim.getProcessAdm(); ISTL::Mat& mat = static_cast<ISTLMatrix*>(sim.getMatrix())->getMatrix(); ISTL::Vec b(mat.N()), b2(mat.N()); try { Dune::OwnerOverlapCopyCommunication<int,int> comm(*adm.getCommunicator()); comm.indexSet().beginResize(); typedef Dune::ParallelLocalIndex<Dune::OwnerOverlapCopyAttributeSet::AttributeSet> LI; for (size_t i = 0; i < adm.dd.getMLGEQ().size(); ++i) { int gid = adm.dd.getGlobalEq(i+1); comm.indexSet().add(gid-1, LI(i, gid >= adm.dd.getMinEq() ? Dune::OwnerOverlapCopyAttributeSet::owner : Dune::OwnerOverlapCopyAttributeSet::overlap)); } comm.indexSet().endResize(); comm.remoteIndices().setIncludeSelf(true); comm.remoteIndices().template rebuild<false>(); ISTL::ParMatrixAdapter op(mat, comm); b = 1.0; op.apply(b, b2); } catch (Dune::ISTLError& e) { std::cerr << e << std::endl; ASSERT_TRUE(false); } IntVec v = readIntVector("src/LinAlg/Test/refdata/petsc_matrix_diagonal.ref"); for (size_t i = 1; i <= adm.dd.getMLGEQ().size(); ++i) ASSERT_FLOAT_EQ(v[adm.dd.getGlobalEq(i)-1], b2[i-1]); }
Patch *SubdAccBuilder::run(SubdFace *face) { SubdFaceRing ring(face, face->edge); GregoryAccStencil stencil(&ring); float3 position[20]; computeCornerStencil(&ring, &stencil); computeEdgeStencil(&ring, &stencil); computeInteriorStencil(&ring, &stencil); ring.evaluate_stencils(position, stencil.stencil, 20); if(face->num_edges() == 3) { GregoryTrianglePatch *patch = new GregoryTrianglePatch(); memcpy(patch->hull, position, sizeof(float3)*20); return patch; } else if(face->num_edges() == 4) { GregoryQuadPatch *patch = new GregoryQuadPatch(); memcpy(patch->hull, position, sizeof(float3)*20); return patch; } assert(0); /* n-gons should have been split already */ return NULL; }
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createBoundaryVertex( peano::applications::poisson::multigrid::SpacetreeGridVertex& fineGridVertex, const tarch::la::Vector<DIMENSIONS,double>& fineGridX, const tarch::la::Vector<DIMENSIONS,double>& fineGridH, peano::applications::poisson::multigrid::SpacetreeGridVertex const * const coarseGridVertices, const peano::kernel::gridinterface::VertexEnumerator& coarseGridVerticesEnumerator, const peano::applications::poisson::multigrid::SpacetreeGridCell& coarseGridCell, const tarch::la::Vector<DIMENSIONS,int>& fineGridPositionOfVertex ) { logTraceInWith6Arguments( "createBoundaryVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex ); // if (tarch::la::volume(fineGridH) > _refinementThreshold) { // fineGridVertex.refine(); // } if (coarseGridVerticesEnumerator.getLevel() < 3) { fineGridVertex.refine(); } peano::toolbox::stencil::Stencil stencil(0.0); fineGridVertex.setStencil(stencil); peano::toolbox::stencil::ProlongationMatrix prolongation (0.0); fineGridVertex.setP(prolongation); peano::toolbox::stencil::RestrictionMatrix restriction(0.0); fineGridVertex.setR(restriction); fineGridVertex.clearTempAP(); fineGridVertex.clearTempP(); logTraceOutWith1Argument( "createBoundaryVertex(...)", fineGridVertex ); }
int main() { int i,j,k; int error = 0; printf("Start stencil\n"); for (i=0;i<N;i++) { for (k=0;k<M;k++) A[i*M+k] = i+k+1; W[i] = i+2; } for (j = 0; j<2; j++) { stencil(A, h_R, W); } for (i=0;i<N;i++) { for (k=0;k<M;k++) { if (RESULT_STENCIL[i*M+k] != h_R[i*M+k]) { error = error + 1; printf("Error occurred at i=%d k=%d; Computed result R=%d does not match expected Result=%d\n",i,k,h_R[i*M+k],RESULT_STENCIL[i*M+k]); } } } print_summary(error); return 0; }
void Visit(const AirspaceCircle& airspace) { RasterPoint screen_center = projection.GeoToScreen(airspace.GetCenter()); unsigned screen_radius = projection.GeoToScreenDistance(airspace.GetRadius()); GLEnable stencil(GL_STENCIL_TEST); { GLEnable blend(GL_BLEND); setup_interior(airspace); if (m_warnings.is_warning(airspace) || m_warnings.is_inside(airspace) || airspace_look.thick_pen.GetWidth() >= 2 * screen_radius) { // fill whole circle canvas.circle(screen_center.x, screen_center.y, screen_radius); } else { // draw a ring inside the circle Color color = airspace_look.colors[settings.colours[airspace.GetType()]]; Pen pen_donut(airspace_look.thick_pen.GetWidth() / 2, color.WithAlpha(90)); canvas.SelectHollowBrush(); canvas.Select(pen_donut); canvas.circle(screen_center.x, screen_center.y, screen_radius - airspace_look.thick_pen.GetWidth() / 4); } } // draw outline setup_outline(airspace); canvas.circle(screen_center.x, screen_center.y, screen_radius); }
void compute(int flag, TYPE orig[][UNROLL_C][(tile_size+2+UNROLL_R-1)/UNROLL_R][(tile_size+2+UNROLL_C-1)/UNROLL_C], TYPE sol[][tile_size+2], TYPE filter[f_size], size_t row, size_t col) { #pragma HLS inline off if (flag && row>2 && col>2) { stencil(orig, sol, filter, row, col); } }
void VisitCircle(const AirspaceCircle &airspace) { RasterPoint screen_center = projection.GeoToScreen(airspace.GetCenter()); unsigned screen_radius = projection.GeoToScreenDistance(airspace.GetRadius()); GLEnable stencil(GL_STENCIL_TEST); if (!warning_manager.IsAcked(airspace) && settings.classes[airspace.GetType()].fill_mode != AirspaceClassRendererSettings::FillMode::NONE) { GLEnable blend(GL_BLEND); SetupInterior(airspace); if (warning_manager.HasWarning(airspace) || warning_manager.IsInside(airspace) || look.thick_pen.GetWidth() >= 2 * screen_radius || settings.classes[airspace.GetType()].fill_mode == AirspaceClassRendererSettings::FillMode::ALL) { // fill whole circle canvas.DrawCircle(screen_center.x, screen_center.y, screen_radius); } else { // draw a ring inside the circle Color color = settings.classes[airspace.GetType()].fill_color; Pen pen_donut(look.thick_pen.GetWidth() / 2, color.WithAlpha(90)); canvas.SelectHollowBrush(); canvas.Select(pen_donut); canvas.DrawCircle(screen_center.x, screen_center.y, screen_radius - look.thick_pen.GetWidth() / 4); } } // draw outline if (SetupOutline(airspace)) canvas.DrawCircle(screen_center.x, screen_center.y, screen_radius); }
void Visit(const AirspacePolygon& airspace) { if (!prepare_polygon(airspace.GetPoints())) return; bool fill_airspace = m_warnings.is_warning(airspace) || m_warnings.is_inside(airspace); GLEnable stencil(GL_STENCIL_TEST); if (!m_warnings.is_acked(airspace)) { if (!fill_airspace) { // set stencil for filling (bit 0) set_fillstencil(); draw_prepared(); } // fill interior without overpainting any previous outlines { setup_interior(airspace, !fill_airspace); GLEnable blend(GL_BLEND); draw_prepared(); } if (!fill_airspace) { // clear fill stencil (bit 0) clear_fillstencil(); draw_prepared(); } } // draw outline setup_outline(airspace); draw_prepared(); }
bool verifyResult( bool verbose ){ assert( space[0] != NULL && space[1] != NULL ); double* endSpace = (double*) malloc( (problemSize + 2) * sizeof(double) ); for( int x = 0; x < problemSize + 2; ++x ){ endSpace[x] = space[T & 1][x]; } initSpace(); int read = 0, write = 1; for( int t = 1; t <= T; ++t ){ for( int x = lowerBound; x <= upperBound; ++x ){ stencil(read, write, x); } read = write; write = 1 - write; } bool failed = false; for( int x = lowerBound; x <= upperBound; ++x ){ if( endSpace[x] != space[T & 1][x] ){ failed = true; if( verbose ) printf( "FAILED\n");// %f != %f at %d\n", endSpace[x], space[T & 1][x], x ); break; } } if( verbose && !failed ) printf( "SUCCESS\n" ); free( endSpace ); return !failed; }
//-------------------------------------------------------------- void testApp::draw(){ stringstream ss; ss << "FPS: " << ofGetFrameRate(); ofDrawBitmapString(ss.str(), ofPoint(50,50)); ofRectangle stencil( 0, 0, 500,500 ); ofPushMatrix(); { ofTranslate(100, 100); squareMesh.draw(); squareWorld.draw(stencil); ofPushMatrix(); { ofTranslate(500, 0); quadWorld.getWorldQuad().draw(); quadWorld.draw(stencil); } ofPopMatrix(); } ofPopMatrix(); }
TEST(TestISTLPETScMatrix, SchurComplement) { ASMmxBase::Type = ASMmxBase::FULL_CONT_RAISE_BASIS1; ASMmxBase::geoBasis = 2; Matrix stencil(13,13); for (size_t i = 1; i<= 13; ++i) for (size_t j = 1; j <= 13; ++j) stencil(i,j) = 1.0; std::array<InspectMatrixSIM,2> sim; for (size_t i = 0; i < 2; ++i) { sim[i].read("src/LinAlg/Test/refdata/petsc_test_blocks_basis.xinp"); sim[i].opt.solver = i == 0 ? SystemMatrix::PETSC : SystemMatrix::ISTL; sim[i].preprocess(); sim[i].initSystem(i == 0 ? SystemMatrix::PETSC : SystemMatrix::ISTL); for (int iel = 1; iel <= sim[i].getSAM()->getNoElms(); ++iel) sim[i].getMatrix()->assemble(stencil, *sim[i].getSAM(), iel); sim[i].getMatrix()->beginAssembly(); sim[i].getMatrix()->endAssembly(); } const ProcessAdm& adm = sim[1].getProcessAdm(); ISTL::Mat& A = static_cast<ISTLMatrix*>(sim[1].getMatrix())->getMatrix(); ISTL::BlockPreconditioner block(A, adm.dd, "upper"); ISTL::Mat& S = block.getBlock(1); PETScSolParams params(LinSolParams(), adm); params.setupSchurComplement(static_cast<PETScMatrix*>(sim[0].getMatrix())->getBlockMatrices()); // check that matrices are the same for (size_t r = 0; r < S.N(); ++r) { const PetscInt* cols; PetscInt ncols; const PetscScalar* vals; MatGetRow(params.getSchurComplement(), r, &ncols, &cols, &vals); for (PetscInt i = 0; i < ncols; ++i) ASSERT_FLOAT_EQ(vals[i], S[r][cols[i]]); MatRestoreRow(params.getSchurComplement(), r, &ncols, &cols, &vals); } }
void GrStencilPathOp::onExecute(GrOpFlushState* state) { GrRenderTarget* rt = state->drawOpArgs().renderTarget(); SkASSERT(rt); int numStencilBits = rt->renderTargetPriv().numStencilBits(); GrStencilSettings stencil(GrPathRendering::GetStencilPassSettings(fFillType), fHasStencilClip, numStencilBits); GrPathRendering::StencilPathArgs args(fUseHWAA, state->drawOpArgs().fProxy, &fViewMatrix, &fScissor, &stencil); state->gpu()->pathRendering()->stencilPath(args, fPath.get()); }
int main () { std::cout << "Starting VTK test" << std::endl; FlowField flowField ( 10, 10, 10 ); clock_t start = clock(); FLOAT velocity [3] = {1,1,1}; for (int k = 0; k < flowField.getNz() + 3; k++ ){ for (int j = 0; j < flowField.getNy() + 3; j++ ){ for (int i = 0; i < flowField.getNx() + 3; i++ ){ flowField.getPressure().getScalar(i,j,k) = (double) k; flowField.getVelocity().setVector(velocity, i,j,k); } } } std::cout << "Initialization time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl; start = clock(); Parameters parameters; parameters.dx = 1; parameters.dy = 1; parameters.dz = 1; VTKStencil stencil( "/tmp/some_file", parameters ); std::cout << "Stencil creation time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl; start = clock(); stencil.openFile ( flowField, 5.0/3 ); std::cout << "File-openning and grid data writing time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl; start = clock(); FieldIterator iterator( flowField, stencil ); iterator.iterateInnerCells(); std::cout << "Iteration time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl; start = clock(); stencil.write( flowField ); std::cout << "Writing time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl; stencil.closeFile(); }
// naive parallel iteration test suite double test_1(){ int t, x, read = 0, write = 1; double start_time = omp_get_wtime(); for( t = 1; t <= T; ++t ){ #pragma omp parallel for private( x ) //schedule(dynamic) for( x = lowerBound; x <= upperBound; ++x ){ stencil( read, write, x ); } read = write; write = 1 - write; } double end_time = omp_get_wtime(); return (end_time - start_time); }
JSValue JSHTMLCanvasElement::getContext(ExecState* exec) { HTMLCanvasElement* canvas = static_cast<HTMLCanvasElement*>(impl()); const UString& contextId = exec->argument(0).toString(exec)->value(exec); RefPtr<CanvasContextAttributes> attrs; #if ENABLE(WEBGL) if (contextId == "experimental-webgl" || contextId == "webkit-3d") { attrs = WebGLContextAttributes::create(); WebGLContextAttributes* webGLAttrs = static_cast<WebGLContextAttributes*>(attrs.get()); if (exec->argumentCount() > 1 && exec->argument(1).isObject()) { JSObject* jsAttrs = exec->argument(1).getObject(); Identifier alpha(exec, "alpha"); if (jsAttrs->hasProperty(exec, alpha)) webGLAttrs->setAlpha(jsAttrs->get(exec, alpha).toBoolean(exec)); Identifier depth(exec, "depth"); if (jsAttrs->hasProperty(exec, depth)) webGLAttrs->setDepth(jsAttrs->get(exec, depth).toBoolean(exec)); Identifier stencil(exec, "stencil"); if (jsAttrs->hasProperty(exec, stencil)) webGLAttrs->setStencil(jsAttrs->get(exec, stencil).toBoolean(exec)); Identifier antialias(exec, "antialias"); if (jsAttrs->hasProperty(exec, antialias)) webGLAttrs->setAntialias(jsAttrs->get(exec, antialias).toBoolean(exec)); Identifier premultipliedAlpha(exec, "premultipliedAlpha"); if (jsAttrs->hasProperty(exec, premultipliedAlpha)) webGLAttrs->setPremultipliedAlpha(jsAttrs->get(exec, premultipliedAlpha).toBoolean(exec)); Identifier preserveDrawingBuffer(exec, "preserveDrawingBuffer"); if (jsAttrs->hasProperty(exec, preserveDrawingBuffer)) webGLAttrs->setPreserveDrawingBuffer(jsAttrs->get(exec, preserveDrawingBuffer).toBoolean(exec)); } } #endif CanvasRenderingContext* context = canvas->getContext(ustringToString(contextId), attrs.get()); if (!context) return jsNull(); JSValue jsValue = toJS(exec, globalObject(), WTF::getPtr(context)); #if ENABLE(WEBGL) if (context->is3d() && InspectorInstrumentation::hasFrontends()) { ScriptObject glContext(exec, jsValue.getObject()); ScriptObject wrapped = InspectorInstrumentation::wrapWebGLRenderingContextForInstrumentation(canvas->document(), glContext); if (!wrapped.hasNoValue()) return wrapped.jsValue(); } #endif return jsValue; }
void VisitPolygon(const AirspacePolygon &airspace) { if (!PreparePolygon(airspace.GetPoints())) return; const AirspaceClassRendererSettings &class_settings = settings.classes[airspace.GetType()]; bool fill_airspace = warning_manager.HasWarning(airspace) || warning_manager.IsInside(airspace) || class_settings.fill_mode == AirspaceClassRendererSettings::FillMode::ALL; if (!warning_manager.IsAcked(airspace) && class_settings.fill_mode != AirspaceClassRendererSettings::FillMode::NONE) { GLEnable stencil(GL_STENCIL_TEST); if (!fill_airspace) { // set stencil for filling (bit 0) SetFillStencil(); DrawPrepared(); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } // fill interior without overpainting any previous outlines { SetupInterior(airspace, !fill_airspace); GLEnable blend(GL_BLEND); DrawPrepared(); } if (!fill_airspace) { // clear fill stencil (bit 0) ClearFillStencil(); DrawPrepared(); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); } } // draw outline if (SetupOutline(airspace)) DrawPrepared(); }
RealType Solver::computeResidual(GridFunction& sourcegridfunction, GridFunctionType& rhs, const PointType& h){ //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< // The pre-value to be returned (return sqrt(doubleSum)): RealType doubleSum = 0.0; /* We need to compute the derivatives p_xx and p_yy, therefore the stencil has to be applied. */ MultiIndexType dim = sourcegridfunction.GetGridDimension(); MultiIndexType bread (0,0); MultiIndexType eread (dim[0]-1,dim[1]-1); MultiIndexType bwrite (1,1); MultiIndexType ewrite (dim[0]-2,dim[1]-2); //Compute the needed derivations for the whole (inner?) area Stencil stencil(3,h); // bzw. Kann man einfach const weitergeben? /Wie? //Get the values for derivative in x-direction: GridFunction Fxx(dim); stencil.ApplyFxxStencilOperator(bread, eread, bwrite, ewrite, sourcegridfunction.GetGridFunction(), Fxx); //Get the values for derivative in y-direction: GridFunction Fyy(dim); stencil.ApplyFyyStencilOperator(bread, eread, bwrite, ewrite, sourcegridfunction.GetGridFunction(), Fyy); // Compute the residual: res = sqrt(Sum_i^I(Sum_j^J((p_xx+p_yy-rightHandSide)²/(I*J)))) RealType derivator; for (IndexType i = 1; i <= dim[0]-2; i++) { for (IndexType j = 1; j <= dim[1]-2; j++) { derivator = Fxx.GetGridFunction()[i][j]+ Fyy.GetGridFunction()[i][j] - rhs[i][j]; doubleSum += derivator*derivator / (dim[0]-2) / (dim[1]-2); } } //std::cout<<doubleSum<<std::endl; return sqrt(doubleSum); }
static sparse_matrix_ptrtype newMatrix( DomainSpace const& Xh, DualImageSpace const& Yh, size_type matrix_properties = NON_HERMITIAN ) { auto s = stencil( _test=Yh,_trial=Xh ); sparse_matrix_ptrtype mat; if ( Yh->worldComm().globalSize()>1 ) mat = sparse_matrix_ptrtype( new petscMPI_sparse_matrix_type( Yh->dof(),Xh->dof() ) ); else // seq mat = sparse_matrix_ptrtype( new petsc_sparse_matrix_type( Yh->dof(),Xh->dof() ) ); mat->setMatrixProperties( matrix_properties ); mat->init( Yh->nDof(), Xh->nDof(), Yh->nLocalDofWithoutGhost(), Xh->nLocalDofWithoutGhost(), s->graph() ); //Yh->nLocalDof(), Xh->nLocalDof() ); #if 0 auto nSpace = DomainSpace::nSpaces; std::vector < std::vector<int> > is( nSpace ); uint cptSpaces=0; //boost::tuple< typename DomainSpace::functionspace_vector_type, uint, std::vector < std::vector<int> > > hola; // auto result = boost::make_tuple(Xh->functionSpaces(),cptSpaces,is); auto result = boost::make_tuple( cptSpaces,is ); boost::fusion::fold( Xh->functionSpaces(), result, computeNDofForEachSpace() ); for ( uint i = 0; i<nSpace; i++ ) { //is[i].resize() } #endif return mat; }
/* create an interpolation matrix inputs: alpha : k*dx M : highest order bessel function to use upsample: ratio to upsample by output: interpolation */ gsl_matrix *create_interp_matrix(double alpha, int M, int upsample) { point *points_in; point *points_out; int npoints_out; double x,y; int i; double step; double r_typical; gsl_matrix *interp; points_in = stencil(); step = 1.0/upsample; r_typical = 3*sqrt(0.5); // this works well for 4x4+2 stencil npoints_out = (upsample+1)*(upsample+1); points_out = (point *)malloc(npoints_out * sizeof(point)); if (points_out == NULL) { // ERROR } i = 0; for (x = -0.5 ; x < 0.5+step/2 ; x += step) { for (y = -0.5 ; y < 0.5+step/2 ; y += step) { points_out[i].x = x; points_out[i++].y = y; } } interp = interp_matrix(alpha, points_in, NUM_STENCIL_POINTS, points_out, npoints_out, M, r_typical); free(points_in); free(points_out); return interp; }
static float qsolve2(int i) /* find new traveltime at gridpoint i */ { int j, k, ix; float a, b, t, res; struct Upd *v[3], x[3], *xj; for (j=0; j<3; j++) { ix = (i/s[j])%n[j]; if (ix > 0) { k = i-s[j]; a = ttime[k]; } else { a = 0.; } if (ix < n[j]-1) { k = i+s[j]; b = ttime[k]; } else { b = 0.; } xj = x+j; xj->delta = rdx[j]; if (a > b) { xj->stencil = xj->value = a; } else { xj->stencil = xj->value = b; } if (order > 1) { if (a > b && ix-2 >= 0) { k = i-2*s[j]; if (in[k] != SF_OUT && a <= (t=ttime[k])) stencil(t,xj); } if (a < b && ix+2 <= n[j]-1) { k = i+2*s[j]; if (in[k] != SF_OUT && b <= (t=ttime[k])) stencil(t,xj); } } } if (x[0].value >= x[1].value) { if (x[1].value >= x[2].value) { v[0] = x; v[1] = x+1; v[2] = x+2; } else if (x[2].value >= x[0].value) { v[0] = x+2; v[1] = x; v[2] = x+1; } else { v[0] = x; v[1] = x+2; v[2] = x+1; } } else { if (x[0].value >= x[2].value) { v[0] = x+1; v[1] = x; v[2] = x+2; } else if (x[2].value >= x[1].value) { v[0] = x+2; v[1] = x+1; v[2] = x; } else { v[0] = x+1; v[1] = x+2; v[2] = x; } } v1=vv[i]; if(v[2]->value > 0) { /* ALL THREE DIRECTIONS CONTRIBUTE */ if (updaten2(3, &res, v) || updaten2(2, &res, v) || updaten2(1, &res, v)) return res; } else if(v[1]->value > 0) { /* TWO DIRECTIONS CONTRIBUTE */ if (updaten2(2, &res, v) || updaten2(1, &res, v)) return res; } else if(v[0]->value > 0) { /* ONE DIRECTION CONTRIBUTES */ if (updaten2(1, &res, v)) return res; } return 0.; }
double test_1(){ double start_time = omp_get_wtime(); int read=0, write = 1; // s is the number of non-pointy bit 2D slices of diamond tiling // that is available for the current tile size. int s = (tau/3) - 2; // subset_s is an input parameter indicating how many of those // slices we want to use in the repeated tiling pattern. // subset_s should be less than s and greater than or equal to 2. if (subset_s > s || subset_s<2) { fprintf(stderr, "Error: need 2<=subset_s<=s\n"); exit(-1); } // Set lower and upper bounds for spatial dimensions. // When did code gen have a non-inclusive upper bound. // Ian's upper bound is inclusive. int Li=1, Lj=1, Ui=upperBound+1, Uj=upperBound+1; // Loop over the tiling pattern. for (int toffset=0; toffset<T; toffset+=subset_s){ // Loop over phases of tiles within repeated tile pattern. // This is like iterating over the A and B trapezoid tile types. for (int c0 = -2; c0 <= 0; c0 += 1){ // Two loops over tiles within one phase. // All of the tiles within one phase can be done in parallel. // updates by Dave W, to the c1 and c2 loops, for OpenMP (from here to the end of the #if BOUNDING_BOX_FOR_PARALLEL_LOOPS // hoist out min_c1 and max_c1, then use that to hoist a bounding box for c2 // initial version is just aiming for correct and parallel, without worrying about a loose boundingbox int c1_lb = max( max( floord(Lj + (tau/3) * c0 + (tau/3), tau), c0 + floord(-2 * T + Lj - 1, tau) + 1), floord(Lj + 1, tau) ); // end init block c1 int c1_ub = min( min( floord(Uj + (tau/3) * c0 - ((tau/3)+2), tau) + 1, floord(T + Uj - 1, tau)), c0 + floord(Uj - 5, tau) + 2 ); // end cond block c1 // The two expressions below are the same as in the previous version, except that // in the c2_lb_min_expr, I have replaced c1 with: // c1_min_value where it appears with a positive coefficient, and // c1_max_value where it appears with a negative coefficient. // and in the c2_ub_max_expr, the opposite (i.e., c1 becomes c1_max_value where positive) // I will be embarrassed if I have done this wrong. /// Note that I assume tau > 0 #define c2_lb_min_expr(c1_min_value, c1_max_value) \ max( \ max( \ max( \ max( \ max( \ max( \ c0 - 2 * c1_max_value + floord(-Ui + Lj + 1,tau), \ -c1_max_value + floord(-2 * Ui - Uj + tau * c0 + tau * c1_min_value - tau-3, tau*2)+1), \ c1_min_value + floord(-Ui - 2 * Uj + 3, tau)), \ floord(-Ui - Uj + 3, tau)), \ c0 - c1_max_value + floord(-Ui - (tau/3) * c0 + ((tau/3)+1), tau)), \ c0 - c1_max_value + floord(-T - Ui, tau) + 1), \ -c1_max_value + floord(-Ui + 4, tau) - 1 \ ) /* end init block c2 */ #define c2_ub_max_expr(c1_min_value, c1_max_value) \ min( \ min( \ min( \ min( \ min( \ min( \ c0 - 2 * c1_min_value + floord(-Li + Uj - 2, tau) + 1, \ c0 - c1_min_value + floord(-Li - 2, tau) + 1), \ c0 - c1_min_value + floord(-Li - (tau/3) * c0 - ((tau/3)+1), tau) + 1), \ floord(T - Li - Lj, tau)), \ -c1_min_value + floord(2 * T - Li, tau)), \ c1_max_value + floord(-Li - 2 * Lj - 1, tau) + 1), \ -c1_min_value + floord(-2 * Li - Lj + tau * c0 + tau * c1_max_value + (tau-1), tau*2) \ ) /* end cond block c2 */ #define c2_lb_expr(c1_value) c2_lb_min_expr(c1_value, c1_value) #define c2_ub_expr(c1_value) c2_ub_max_expr(c1_value, c1_value) #if BOUNDING_BOX_FOR_PARALLEL_LOOPS int c2_box_lb = c2_lb_min_expr(c1_lb, c1_ub); int c2_box_ub = c2_ub_max_expr(c1_lb, c1_ub); #if PARALLEL // don't need to mention c1...c5 below, since they're scoped inside the for loops #pragma omp parallel for shared(start_time, s, Li, Lj, Ui, Uj, toffset, c0, c1_lb, c1_ub, c2_box_lb, c2_box_ub, ) private(read, write) collapse(2) #endif for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) { for (int c2 = c2_box_lb; c2 <= c2_box_ub; c2 += 1) if (c2 >= c2_lb_expr(c1) && c2 <= c2_ub_expr(c1)) { #else for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) { for (int c2 = c2_lb_expr(c1); c2 <= c2_ub_expr(c1); c2 += 1) { #endif //fprintf(stdout, "(%d,%d,%d)\n", c0,c1,c2); // Loop over subset_s time steps within tiling pattern // and within tile c0,c1,c2. // Every time the pattern is repeated, toffset will be // subset_s bigger. // The real t value is c3+toffset. We are just using the // tiling pattern from t=1 to t<=subset_s. for (int c3 = 1; c3 <= min(T-toffset,subset_s); c3 += 1){ int t = c3+toffset; // if t % 2 is 1, then read=0 and write=1 write = t & 1; read = 1-write; // x spatial dimension. for (int c4 = max( max( max( -tau * c1 - tau * c2 + 2 * c3 - (2*tau-2), -Uj - tau * c2 + c3 - (tau-2)), tau * c0 - tau * c1 - tau * c2 - c3), Li ); // end init block c4 c4 <= min( min( min( tau * c0 - tau * c1 - tau * c2 - c3 + (tau-1), -tau * c1 - tau * c2 + 2 * c3), -Lj - tau * c2 + c3), Ui - 1 ); // end cond block c4 c4 += 1){ // y spatial dimension. for (int c5 = max( max( tau * c1 - c3, Lj), -tau * c2 + c3 - c4 - (tau-1) ); // end init block c5 c5 <= min( min( Uj - 1, -tau * c2 + c3 - c4), tau * c1 - c3 + (tau-1) ); // end cond block c5 c5 += 1){ //fprintf(stdout, "(%d,%d,%d,%d,%d,%d)\n", c0,c1,c2,c3,c4,c5); stencil( read, write, c4, c5); } // for c5 } // for c4 } // for c3 } // for c2 } // for c1 } // for c0 } // for toffset double end_time = omp_get_wtime(); return (end_time - start_time); } int main( int argc, char* argv[] ){ setbuf(stdout, NULL); // set buffer to null, so prints ALWAYS print (for debug purposes mainly) bool verify = false; bool printtime = true; // Command line parsing char c; while ((c = getopt (argc, argv, "nc:s:p:T:t:hv")) != -1){ switch( c ) { case 'n': printtime=false; break; case 'c': // problem size cores = parseInt( optarg ); if( cores <= 0 ){ fprintf(stderr, "cores must be greater than 0: %d\n", cores); exit(BAD_RUN_TIME_PARAMETERS); } break; case 's': // subset //globalSeed = parseInt( optarg ); subset_s = parseInt( optarg ); break; case 'p': // problem size problemSize = parseInt( optarg ); if( problemSize <= 0 ){ fprintf(stderr, "problemSize must be greater than 0: %d\n", problemSize); exit(BAD_RUN_TIME_PARAMETERS); } break; case 'T': // T (time steps) T = parseInt( optarg ); if( T <= 0 ){ fprintf(stderr, "T must be greater than 0: %d\n", T); exit(BAD_RUN_TIME_PARAMETERS); } break; case 't': // tau #if defined tau fprintf(stderr, "don't use -t to set tau when you compiled with -Dtau=%d.\n", tau); if (parseInt(optarg) != tau) exit(BAD_COMPILE_TIME_PARAMETERS); #else tau = parseInt( optarg ); #endif break; case 'h': // help printf("usage: %s\n-n \t dont print time \n-p <problem size> \t problem size in elements \n-T <time steps>\t number of time steps\n-c <cores>\tnumber of threads\n-s <subset_s>\t tile parameter\n-t <tau>\t tile parameter\n-h\tthis dialogue\n-v\tverify output\n", argv[0]); exit(0); case 'v': // verify; verify = true; break; case '?': if (optopt == 'p') fprintf (stderr, "Option -%c requires positive int argument: problem size.\n", optopt); else if (optopt == 'T') fprintf (stderr, "Option -%c requires positive int argument: T.\n", optopt); else if (optopt == 's') fprintf (stderr, "Option -%c requires int argument: subset_s.\n", optopt); else if (optopt == 'c') fprintf (stderr, "Option -%c requires int argument: number of cores.\n", optopt); else if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); exit(0); default: exit(0); } } if( !( tau % 3 == 0 && tau >= 15 ) ){ #if defined tau fprintf(stderr, "tau must be a multiple of 3, and >= 15, but the program was compiled with -Dtau=%d, and thus can't run :-(\n", tau); exit(BAD_COMPILE_TIME_PARAMETERS); #else fprintf(stderr, "tau must be a multiple of 3, and >= 15, but it's %d; re-run with a different -t value\n", tau); exit(BAD_RUN_TIME_PARAMETERS); #endif } init(); initSpace(); double time = test_1(); if( printtime ) { printf( "Time: %f\n", time ); } if( verify ){ verifyResult( true ); } }
// returns true if valid result bool verifyResult( bool verbose ){ assert( space[0] != NULL && space[1] != NULL ); double** endSpace; endSpace = (double**) malloc( (problemSize + 2) * sizeof(double*)); if( endSpace == NULL ){ printf( "Could not allocate x axis of verification array\n" ); exit(0); } // allocate y axis for( int x = 0; x < problemSize + 2; ++x ){ endSpace[x] = (double*) malloc( (problemSize + 2) * sizeof(double)); if( endSpace[x] == NULL ){ printf( "Could not allocate y axis of verification array\n" ); exit(0); } } for( int x = 0; x < problemSize + 2; ++x ){ for( int y = lowerBound; y <= upperBound; ++y ){ endSpace[x][y] = space[ T & 1 ][x][y]; } } initSpace(); int t, x, y, read = 0, write = 1; for( t = 1; t <= T; ++t ){ for( x = lowerBound; x <= upperBound; ++x ){ for( y = lowerBound; y <= upperBound; ++y ){ stencil( read, write, x, y); } } read = write; write = 1 - write; } bool failed = false; for( x = lowerBound; x <= upperBound; ++x ){ for( y = lowerBound; y <= upperBound; ++y ){ if( endSpace[x][y] != space[ T & 1 ][x][y] ){ failed = true; if( verbose ) printf( "FAILED! %f != %f at %d, %d\n", endSpace[x][y],space[ T & 1 ][x][y], x, y); break; } } if( failed ) break; } if( verbose && !failed ) printf( "SUCCESS\n" ); for( int x = 0; x < problemSize + 2; ++x ){ free( endSpace[x] ); } free( endSpace ); return !failed; }
void RenderState::dump() { blend().dump(); meshState().dump(); scissor().dump(); stencil().dump(); }
// The MAIN function, from here we start our application and run our Game loop int main() { // Init GLFW glfwInit(); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); glfwWindowHint(GLFW_RESIZABLE, GL_FALSE); GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "Some OpenGL Testing", nullptr, nullptr); // Windowed glfwMakeContextCurrent(window); // Set the required callback functions glfwSetKeyCallback(window, key_callback); glfwSetCursorPosCallback(window, mouse_callback); glfwSetScrollCallback(window, scroll_callback); glfwSetMouseButtonCallback(window, mouse_button_click_callback); // Options glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_NORMAL); // Initialize GLEW to setup the OpenGL Function pointers glewExperimental = GL_TRUE; glewInit(); // Define the viewport dimensions glViewport(0, 0, screenWidth, screenHeight); // Setup some OpenGL options glEnable(GL_DEPTH_TEST); glEnable(GL_STENCIL_TEST); glDepthFunc(GL_LESS); // Set to always pass the depth test (same effect as glDisable(GL_DEPTH_TEST)) glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); // Setup and compile our shaders Shader shader("shaders/advanced.vs", "shaders/advanced.frag"); Shader stencil("shaders/stencil.vs", "shaders/stencil.frag"); #pragma region "object_initialization" // Set the object data (buffers, vertex attributes) GLfloat cubeVertices[] = { // Positions // Texture Coords -0.5f, -0.5f, -0.5f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, 1.0f, 0.0f, 0.5f, 0.5f, -0.5f, 1.0f, 1.0f, 0.5f, 0.5f, -0.5f, 1.0f, 1.0f, -0.5f, 0.5f, -0.5f, 0.0f, 1.0f, -0.5f, -0.5f, -0.5f, 0.0f, 0.0f, -0.5f, -0.5f, 0.5f, 0.0f, 0.0f, 0.5f, -0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 1.0f, 1.0f, -0.5f, 0.5f, 0.5f, 0.0f, 1.0f, -0.5f, -0.5f, 0.5f, 0.0f, 0.0f, -0.5f, 0.5f, 0.5f, 1.0f, 0.0f, -0.5f, 0.5f, -0.5f, 1.0f, 1.0f, -0.5f, -0.5f, -0.5f, 0.0f, 1.0f, -0.5f, -0.5f, -0.5f, 0.0f, 1.0f, -0.5f, -0.5f, 0.5f, 0.0f, 0.0f, -0.5f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f, -0.5f, 1.0f, 1.0f, 0.5f, -0.5f, -0.5f, 0.0f, 1.0f, 0.5f, -0.5f, -0.5f, 0.0f, 1.0f, 0.5f, -0.5f, 0.5f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 1.0f, 0.0f, -0.5f, -0.5f, -0.5f, 0.0f, 1.0f, 0.5f, -0.5f, -0.5f, 1.0f, 1.0f, 0.5f, -0.5f, 0.5f, 1.0f, 0.0f, 0.5f, -0.5f, 0.5f, 1.0f, 0.0f, -0.5f, -0.5f, 0.5f, 0.0f, 0.0f, -0.5f, -0.5f, -0.5f, 0.0f, 1.0f, -0.5f, 0.5f, -0.5f, 0.0f, 1.0f, 0.5f, 0.5f, -0.5f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 1.0f, 0.0f, 0.5f, 0.5f, 0.5f, 1.0f, 0.0f, -0.5f, 0.5f, 0.5f, 0.0f, 0.0f, -0.5f, 0.5f, -0.5f, 0.0f, 1.0f }; GLfloat planeVertices[] = { // Positions 5.0f, -0.5f, 5.0f, 2.0f, 0.0f, -5.0f, -0.5f, 5.0f, 0.0f, 0.0f, -5.0f, -0.5f, -5.0f, 0.0f, 2.0f, 5.0f, -0.5f, 5.0f, 2.0f, 0.0f, -5.0f, -0.5f, -5.0f, 0.0f, 2.0f, 5.0f, -0.5f, -5.0f, 2.0f, 2.0f }; // Setup cube VAO GLuint cubeVAO, cubeVBO; glGenVertexArrays(1, &cubeVAO); glGenBuffers(1, &cubeVBO); glBindVertexArray(cubeVAO); glBindBuffer(GL_ARRAY_BUFFER, cubeVBO); glBufferData(GL_ARRAY_BUFFER, sizeof(cubeVertices), &cubeVertices, GL_STATIC_DRAW); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0); glEnableVertexAttribArray(1); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat))); glBindVertexArray(0); // Setup plane VAO GLuint planeVAO, planeVBO; glGenVertexArrays(1, &planeVAO); glGenBuffers(1, &planeVBO); glBindVertexArray(planeVAO); glBindBuffer(GL_ARRAY_BUFFER, planeVBO); glBufferData(GL_ARRAY_BUFFER, sizeof(planeVertices), &planeVertices, GL_STATIC_DRAW); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0); glEnableVertexAttribArray(1); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat))); glBindVertexArray(0); // Load textures GLuint cubeTexture = loadTexture("media/container.jpg"); GLuint floorTexture = loadTexture("media/awesomeface.png"); #pragma endregion // Game loop while (!glfwWindowShouldClose(window)) { GLfloat currentFrame = glfwGetTime(); deltaTime = currentFrame - lastFrame; lastFrame = currentFrame; // Check and call events glfwPollEvents(); handle_input(window); // Clear the colorbuffer glClearColor(0.1f, 0.1f, 0.1f, 1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); // Set uniforms stencil.use(); glm::mat4 model; glm::mat4 view = camera.get_view_matrix(); glm::mat4 projection = glm::perspective(camera.m_zoom, (float)screenWidth / (float)screenHeight, 0.1f, 100.0f); glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "view"), 1, GL_FALSE, glm::value_ptr(view)); glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "projection"), 1, GL_FALSE, glm::value_ptr(projection)); shader.use(); glUniformMatrix4fv(glGetUniformLocation(shader.Program, "view"), 1, GL_FALSE, glm::value_ptr(view)); glUniformMatrix4fv(glGetUniformLocation(shader.Program, "projection"), 1, GL_FALSE, glm::value_ptr(projection)); // Draw floor as normal, we only care about the containers. The floor should NOT fill the stencil buffer so we set its mask to 0x00 glStencilMask(0x00); // Floor glBindVertexArray(planeVAO); glBindTexture(GL_TEXTURE_2D, floorTexture); model = glm::mat4(); glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model)); glDrawArrays(GL_TRIANGLES, 0, 6); glBindVertexArray(0); // == ============= // 1st. Render pass, draw objects as normal, filling the stencil buffer glStencilFunc(GL_ALWAYS, 1, 0xFF); glStencilMask(0xFF); // Cubes glBindVertexArray(cubeVAO); glBindTexture(GL_TEXTURE_2D, cubeTexture); model = glm::translate(model, glm::vec3(-1.0f, 0.0f, -1.0f)); glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model)); glDrawArrays(GL_TRIANGLES, 0, 36); model = glm::mat4(); model = glm::translate(model, glm::vec3(2.0f, 0.0f, 0.0f)); glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model)); glDrawArrays(GL_TRIANGLES, 0, 36); glBindVertexArray(0); // == ============= // 2nd. Render pass, now draw slightly scaled versions of the objects, this time disabling stencil writing. // Because stencil buffer is now filled with several 1s. The parts of the buffer that are 1 are now not drawn, thus only drawing // the objects' size differences, making it look like borders. glStencilFunc(GL_NOTEQUAL, 1, 0xFF); glStencilMask(0x00); glDisable(GL_DEPTH_TEST); stencil.use(); GLfloat scale = 1.1; // Cubes glBindVertexArray(cubeVAO); glBindTexture(GL_TEXTURE_2D, cubeTexture); model = glm::mat4(); model = glm::translate(model, glm::vec3(-1.0f, 0.0f, -1.0f)); model = glm::scale(model, glm::vec3(scale, scale, scale)); glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "model"), 1, GL_FALSE, glm::value_ptr(model)); glDrawArrays(GL_TRIANGLES, 0, 36); model = glm::mat4(); model = glm::translate(model, glm::vec3(2.0f, 0.0f, 0.0f)); model = glm::scale(model, glm::vec3(scale, scale, scale)); glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "model"), 1, GL_FALSE, glm::value_ptr(model)); glDrawArrays(GL_TRIANGLES, 0, 36); glBindVertexArray(0); glStencilMask(0xFF); glEnable(GL_DEPTH_TEST); // Swap the buffers glfwSwapBuffers(window); } glfwTerminate(); return 0; }
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createInnerVertex( peano::applications::poisson::multigrid::SpacetreeGridVertex& fineGridVertex, const tarch::la::Vector<DIMENSIONS,double>& fineGridX, const tarch::la::Vector<DIMENSIONS,double>& fineGridH, peano::applications::poisson::multigrid::SpacetreeGridVertex const * const coarseGridVertices, const peano::kernel::gridinterface::VertexEnumerator& coarseGridVerticesEnumerator, const peano::applications::poisson::multigrid::SpacetreeGridCell& coarseGridCell, const tarch::la::Vector<DIMENSIONS,int>& fineGridPositionOfVertex ) { logTraceInWith6Arguments( "createInnerVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex ); // if (tarch::la::volume(fineGridH) > _refinementThreshold) { // fineGridVertex.refine(); // } if (coarseGridVerticesEnumerator.getLevel() < 3) { fineGridVertex.refine(); } peano::toolbox::stencil::Stencil stencil; #ifdef Dim2 //if(fineGridVertex.getLevel() == 4){ stencil = // kappa_x * peano::toolbox::stencil::StencilFactory::stencilProduct( peano::toolbox::stencil::StencilFactory::get1DLaplaceStencil(fineGridH(0)), peano::toolbox::stencil::StencilFactory::get1DMassStencil(fineGridH(1)) ) + // kappa-y * peano::toolbox::stencil::StencilFactory::stencilProduct( peano::toolbox::stencil::StencilFactory::get1DMassStencil(fineGridH(0)), peano::toolbox::stencil::StencilFactory::get1DLaplaceStencil(fineGridH(1)) ); assertionNumericalEquals(stencil(0), -1.0/3.0); assertionNumericalEquals(stencil(1), -1.0/3.0); assertionNumericalEquals(stencil(2), -1.0/3.0); assertionNumericalEquals(stencil(3), -1.0/3.0); assertionNumericalEquals(stencil(4), 8.0/3.0); assertionNumericalEquals(stencil(5), -1.0/3.0); assertionNumericalEquals(stencil(6), -1.0/3.0); assertionNumericalEquals(stencil(7), -1.0/3.0); assertionNumericalEquals(stencil(8), -1.0/3.0); #if defined(Asserts) peano::toolbox::stencil::ElementMatrix elementMatrix; peano::toolbox::stencil::ElementWiseAssemblyMatrix testMatrix = elementMatrix.getElementWiseAssemblyMatrix( stencil ); assertionNumericalEquals(testMatrix(0,0), 2.0/3.0); assertionNumericalEquals(testMatrix(0,1), -0.5/3.0); assertionNumericalEquals(testMatrix(0,2), -0.5/3.0); assertionNumericalEquals(testMatrix(0,3), -1.0/3.0); assertionNumericalEquals(testMatrix(1,0), -0.5/3.0); assertionNumericalEquals(testMatrix(1,1), 2.0/3.0); assertionNumericalEquals(testMatrix(1,2), -1.0/3.0); assertionNumericalEquals(testMatrix(1,3), -0.5/3.0); assertionNumericalEquals(testMatrix(2,0), -0.5/3.0); assertionNumericalEquals(testMatrix(2,1), -1.0/3.0); assertionNumericalEquals(testMatrix(2,2), 2.0/3.0); assertionNumericalEquals(testMatrix(2,3), -0.5/3.0); assertionNumericalEquals(testMatrix(3,0), -1.0/3.0); assertionNumericalEquals(testMatrix(3,1), -0.5/3.0); assertionNumericalEquals(testMatrix(3,2), -0.5/3.0); assertionNumericalEquals(testMatrix(3,3), 2.0/3.0); //logDebug( "createInnerVertex(...)", testMatrix ); #endif // tarch::la::assignList(stencil) = -1.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0, 8.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0; //} //else{ // tarch::la::assignList(stencil) = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0; //} fineGridVertex.setStencil(stencil); // double squaredDistanceFromCenter = 0.0; // for (int d=0; d<DIMENSIONS; d++) { // squaredDistanceFromCenter += (0.5 - fineGridX(d)) * (0.5 - fineGridX(d)); // } // if (squaredDistanceFromCenter<0.24*0.24) { // stencil *= 4.2; // } peano::toolbox::stencil::ProlongationMatrix prolongation; tarch::la::assignList(prolongation) = 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0, 2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0, 3.0/9.0, 6.0/9.0, 9.0/9.0, 6.0/9.0, 3.0/9.0, 2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0, 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0; fineGridVertex.setP(prolongation); peano::toolbox::stencil::RestrictionMatrix restriction; tarch::la::assignList(restriction) = 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0, 2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0, 3.0/9.0, 6.0/9.0, 9.0/9.0, 6.0/9.0, 3.0/9.0, 2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0, 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0; fineGridVertex.setR(restriction); fineGridVertex.setRhs(1.0, fineGridH); #else assertionMsg( false, "not implemented yet" ); #endif logTraceOutWith1Argument( "createInnerVertex(...)", fineGridVertex ); }
int main(int argc, char** argv) { int numIterations = NB_ITER; if(argc > 0) { numIterations = atoi(argv[1]); ydim_gpu = atoi(argv[2]); } const unsigned int line_size = LINESIZE; const unsigned int mem_size= TOTALSIZE*sizeof(float); const unsigned int mem_size_gpu = SIZE_GPU * sizeof(float); float *h_idata = NULL; float *h_odata = NULL; struct double_matrice container; struct timeval tv1,tv2,tcpu1,tcpu2; // Allocation of input & output matrices // h_idata = malloc(mem_size); h_odata = malloc(mem_size); container.in = h_idata + LINESIZE * (YDIM_GPU) + OFFSET; container.out = h_odata + LINESIZE * (YDIM_GPU) + OFFSET; container.ydim_cpu = YDIM_CPU; // Initialization of input & output matrices // srand(1234); for(unsigned int i = 0; i < TOTALSIZE; i++) { h_idata[i]=rand(); h_odata[i]=0.0; } /* Version cpu pour comparaison */ void * tmp_switch; float* reference = (float*) malloc(mem_size); float* reference_i = (float*) malloc(mem_size); for(unsigned int i = 0; i < TOTALSIZE; i++) { reference[i] = 0.0; reference_i[i] = h_idata[i]; } gettimeofday(&tcpu1,NULL); for(int i=0; i<numIterations; ++i) { stencil(reference + OFFSET, reference_i + OFFSET, YDIM); tmp_switch = reference; reference = reference_i; reference_i = tmp_switch; } if(numIterations%2) { tmp_switch = reference; reference = reference_i; reference_i = tmp_switch; } gettimeofday(&tcpu2,NULL); float timecpu=((float)TIME_DIFF(tcpu1,tcpu2)) / 1000; pthread_t thread; printf("nombre d'itérations: %d\n",numIterations); gettimeofday(&tv1, NULL); for(int i = 0; i<numIterations; i++) // Iterations are done inside the kernel { stencil_multi(container.out,container.in,container.ydim_cpu); tmp_switch = container.out; container.out = container.in; container.in = tmp_switch; } gettimeofday(&tv2, NULL); tmp_switch = d_odata; d_odata = d_idata; d_idata = tmp_switch; float time1=((float)TIME_DIFF(tv1,tv2)) / 1000; // Read back the results from the device to verify the output // printf("%f\t%f ms (%fGo/s)\t%f ms (%fGo/s)\n", timecpu/time1, time1, numIterations * 3*mem_size / time1 / 1000000, timecpu, numIterations * 3*mem_size / timecpu / 1000000); // Validate our results // unsigned int errors=0; float * h_fdata = h_odata; for(unsigned int i=0; i<TOTALSIZE; i++) { if((reference[i]-h_fdata[i])/reference[i] > 1e-6) { if(errors < 10) printf(" %u %f vs %f\n", i, h_fdata[i], reference[i]); errors++; } } if(errors) fprintf(stderr,"%d erreurs !\n", errors); else fprintf(stderr,"pas d'erreurs, cool !\n"); free(reference_i); free(reference); // Shutdown and cleanup // free(h_odata); free(h_idata); return 0; }
void LLOcclusionCullingGroup::doOcclusion(LLCamera* camera, const LLVector4a* shift) { LLGLDisable stencil(GL_STENCIL_TEST); if (mSpatialPartition->isOcclusionEnabled() && LLPipeline::sUseOcclusion > 1) { //move mBounds to the agent space if necessary LLVector4a bounds[2]; bounds[0] = mBounds[0]; bounds[1] = mBounds[1]; if(shift != NULL) { bounds[0].add(*shift); } // Don't cull hole/edge water, unless we have the GL_ARB_depth_clamp extension if (earlyFail(camera, bounds)) { LLFastTimer t(FTM_OCCLUSION_EARLY_FAIL); setOcclusionState(LLOcclusionCullingGroup::DISCARD_QUERY); assert_states_valid(this); clearOcclusionState(LLOcclusionCullingGroup::OCCLUDED, LLOcclusionCullingGroup::STATE_MODE_DIFF); assert_states_valid(this); } else { if (!isOcclusionState(QUERY_PENDING) || isOcclusionState(DISCARD_QUERY)) { { //no query pending, or previous query to be discarded LLFastTimer t(FTM_RENDER_OCCLUSION); if (!mOcclusionQuery[LLViewerCamera::sCurCameraID]) { LLFastTimer t(FTM_OCCLUSION_ALLOCATE); mOcclusionQuery[LLViewerCamera::sCurCameraID] = getNewOcclusionQueryObjectName(); } // Depth clamp all water to avoid it being culled as a result of being // behind the far clip plane, and in the case of edge water to avoid // it being culled while still visible. bool const use_depth_clamp = gGLManager.mHasDepthClamp && (mSpatialPartition->mDrawableType == LLDrawPool::POOL_WATER || mSpatialPartition->mDrawableType == LLDrawPool::POOL_VOIDWATER); LLGLEnable clamp(use_depth_clamp ? GL_DEPTH_CLAMP : 0); #if !LL_DARWIN U32 mode = gGLManager.mHasOcclusionQuery2 ? GL_ANY_SAMPLES_PASSED : GL_SAMPLES_PASSED_ARB; #else U32 mode = GL_SAMPLES_PASSED_ARB; #endif #if LL_TRACK_PENDING_OCCLUSION_QUERIES sPendingQueries.insert(mOcclusionQuery[LLViewerCamera::sCurCameraID]); #endif { LLFastTimer t(FTM_PUSH_OCCLUSION_VERTS); //store which frame this query was issued on mOcclusionIssued[LLViewerCamera::sCurCameraID] = gFrameCount; { LLFastTimer t(FTM_OCCLUSION_BEGIN_QUERY); glBeginQueryARB(mode, mOcclusionQuery[LLViewerCamera::sCurCameraID]); } LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr; llassert(shader); shader->uniform3fv(LLShaderMgr::BOX_CENTER, 1, bounds[0].getF32ptr()); //static LLVector4a fudge(SG_OCCLUSION_FUDGE); static LLCachedControl<F32> vel("SHOcclusionFudge",SG_OCCLUSION_FUDGE); LLVector4a fudge(SG_OCCLUSION_FUDGE); static LLVector4a fudged_bounds; fudged_bounds.setAdd(fudge, bounds[1]); shader->uniform3fv(LLShaderMgr::BOX_SIZE, 1, fudged_bounds.getF32ptr()); if (!use_depth_clamp && mSpatialPartition->mDrawableType == LLDrawPool::POOL_VOIDWATER) { LLFastTimer t(FTM_OCCLUSION_DRAW_WATER); LLGLSquashToFarClip squash(glh_get_current_projection(), 1); if (camera->getOrigin().isExactlyZero()) { //origin is invalid, draw entire box gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, 0); gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, b111*8); } else { gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, get_box_fan_indices(camera, bounds[0])); } } else { LLFastTimer t(FTM_OCCLUSION_DRAW); if (camera->getOrigin().isExactlyZero()) { //origin is invalid, draw entire box gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, 0); gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, b111*8); } else { gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, get_box_fan_indices(camera, bounds[0])); } } { LLFastTimer t(FTM_OCCLUSION_END_QUERY); glEndQueryARB(mode); } } } { LLFastTimer t(FTM_SET_OCCLUSION_STATE); setOcclusionState(LLOcclusionCullingGroup::QUERY_PENDING); clearOcclusionState(LLOcclusionCullingGroup::DISCARD_QUERY); } } } } }
// Parallel Tiling double test_1(){ double start_time = omp_get_wtime(); int write, read; // read and write buffers int t0, t1, x0, x1, dx0, dx1; // most values of the tile tuples int t, x; // indices into space (t,x) // for all t0 in t0..T by timeBand for( t0 = 1; t0 <= T; t0 += timeBand ) { // set and clamp t1 from t0 t1 = min(t0 + timeBand - 1, T); // Do A-tiles // set dx0 and dx1 to correct A-tile values dx0 = 1; dx1 = -1; // iterate over all x0 points for A-tiles #pragma omp parallel for private( x0, x1, write, read, t, x) schedule(dynamic, A_tiles_per_core) for( x0 = tiles_A_start; x0 <= upperBound; x0 += betweenTiles ){ x1 = x0 + width_max - 1; // set x1 from x0 // Set read and write buffer. // this is equivilent to t0 % 2 but assumed faster read = (t0 - 1) & 1; write = 1 - read; // if x0 is at or below lower bound (left edge tile) if( x0 <= lowerBound ) { //printf("%d, %d, %d, %d, %d, %d\n", lowerBound, 0, x1, dx1, t0, t1 ); // for t in t0 ... t1 for( t = t0; t<= t1; ++t ){ //#pragma omp parallel for private( x ) schedule(static) // for x in lowerBound ... x1'ish int minVal = min(x1 + dx1 * (t - t0), upperBound ); for( x = lowerBound; x <= minVal; ++x){ stencil( read, write, x ); // stencil computation }// for x // flip write buffer read = write; write = 1 - write; }// for t }// if x0 <= lowerBound // if x1 is at or above upper bound (right edge tile) else if( x1 >= upperBound ){ //printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, upperBound, 0, t0, t1 ); // for t in t0...t1 for( t = t0; t<= t1; ++t ){ //#pragma omp parallel for private( x ) schedule(static) // for x in x0'ish ... upperbound for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= upperBound; ++x){ stencil( read, write, x ); // stencil computation }// for x // flip write buffer read = write; write = 1 - write; }// for t }// else if x1 >= upperBound // otherwise regular ol' tile else { //printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, x1, dx1, t0, t1 ); // for t in t0 ... t1 for( t = t0; t<= t1; ++t ){ //#pragma omp parallel for private( x ) schedule(static) // for x in x0'ish ... x1'ish int minVal = min(x1 + dx1 * (t - t0), upperBound ); for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= minVal; ++x){ stencil( read, write, x ); // stencil computation }// for x // flip write buffer read = write; write = 1 - write; }// for t }// else }// for A-tiles // Do B-tiles // set dx0 and dx1 to correct B-tile values dx0 = -1; dx1 = 1; // iterate over x0 points for B-tiles #pragma omp parallel for private( x0, x1, write, read, t, x) schedule(dynamic,B_tiles_per_core) for( x0 = tiles_B_start; x0 <= upperBound; x0 += betweenTiles ){ x1 = x0 + width_min - 1; // set x1 from x0 // Set write buffer. // this is equivilent to (t0 - 1 )% 2, but assumed faster read = (t0 - 1) & 1; write = 1 - read; // if x1 is at or above upper bound (right edge tile) if( x1 >= upperBound ){ //printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, upperBound, 0, t0, t1 ); // for t in t0 ... t1 for( t = t0; t <= t1; ++t ){ //#pragma omp parallel for private( x ) schedule(static) // for x in x0'ish ... upper bound for( x = max( x0 + dx0 * (t - t0), lowerBound); x <= upperBound; ++x){ stencil( read, write, x ); // stencil computation }// for x // flip write buffer read = write; write = 1 - write; }// for t }// if x1 >= upperBound // regular ol' tile else { //printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, x1, dx1, t0, t1 ); // for t in t0 ... t1 for( t = t0; t<= t1; ++t ){ //#pragma omp parallel for private( x ) schedule(static) // for x in x0'ish ... x1'ish int minVal = min(x1 + dx1 * (t - t0), upperBound); for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= minVal; ++x){ stencil( read, write, x ); // stencil computation }// for x // flip write buffer read = write; write = 1 - write; } // for t }// else } // for B-tiles }// for t0 double end_time = omp_get_wtime(); return (end_time - start_time); }