Exemplo n.º 1
0
void generate_d(
    const std::string&                  src_filename,
    std::ostream&                       os,
    const GenerateOptions&              options,
    const std::map<std::string, Type>&,
    const std::map<std::string, Type>&  nonterminal_types,
    const std::vector<std::string>&     tokens,
    const action_map_type&              actions,
    const tgt::parsing_table&           table) {

    std::string module_name =
        boost::filesystem::path(src_filename).stem().string();

    // notice / URL / module / imports
    stencil(
        os, R"(
// This file was automatically generated by Caper.
// (http://jonigata.github.io/caper/caper.html)

module ${module_name};

import std.array;
import std.stdio;

)",
        {"module_name", module_name}
        );

    if (!options.external_token) {
        // token enumeration
        stencil(
            os, R"(
enum Token {
$${tokens}
}

string tokenLabel(Token t) {
    static string[] labels = [
$${labels}
    ];
    return labels[t];
}

)",
            {"tokens", [&](std::ostream& os){
                    for(const auto& token: tokens) {
                        stencil(
                            os, R"(
    ${prefix}${token},
)",
                            {"prefix", options.token_prefix},
                            {"token", token}
                            );
                    }
                }},
void RenderState::debugOverdraw(bool enable, bool clear) {
    if (Properties::debugOverdraw && mFramebuffer == 0) {
        if (clear) {
            scissor().setEnabled(false);
            stencil().clear();
        }
        if (enable) {
            stencil().enableDebugWrite();
        } else {
            stencil().disable();
        }
    }
}
Exemplo n.º 3
0
TEST(TestISTLMatrix, AssembleMPI)
{
  InspectMatrixSIM sim(1);
  sim.read("src/LinAlg/Test/refdata/petsc_test.xinp");
  sim.opt.solver = SystemMatrix::ISTL;
  sim.preprocess();
  sim.initSystem(SystemMatrix::ISTL);

  Matrix stencil(4,4);
  stencil(1,1) = stencil(2,2) = stencil(3,3) = stencil(4,4) = 1.0;

  for (int iel = 1; iel <= sim.getSAM()->getNoElms(); ++iel)
    sim.getMatrix()->assemble(stencil, *sim.getSAM(), iel);

  sim.getMatrix()->beginAssembly();
  sim.getMatrix()->endAssembly();

  // now inspect the matrix
  const ProcessAdm& adm = sim.getProcessAdm();
  ISTL::Mat& mat = static_cast<ISTLMatrix*>(sim.getMatrix())->getMatrix();
  ISTL::Vec b(mat.N()), b2(mat.N());

  try {
    Dune::OwnerOverlapCopyCommunication<int,int> comm(*adm.getCommunicator());
    comm.indexSet().beginResize();
    typedef Dune::ParallelLocalIndex<Dune::OwnerOverlapCopyAttributeSet::AttributeSet> LI;
    for (size_t i = 0; i < adm.dd.getMLGEQ().size(); ++i) {
      int gid = adm.dd.getGlobalEq(i+1);
      comm.indexSet().add(gid-1, LI(i, gid >= adm.dd.getMinEq() ?
                                       Dune::OwnerOverlapCopyAttributeSet::owner :
                                       Dune::OwnerOverlapCopyAttributeSet::overlap));
    }
    comm.indexSet().endResize();
    comm.remoteIndices().setIncludeSelf(true);
    comm.remoteIndices().template rebuild<false>();

    ISTL::ParMatrixAdapter op(mat, comm);

    b = 1.0;
    op.apply(b, b2);
  } catch (Dune::ISTLError& e) {
    std::cerr << e << std::endl;
    ASSERT_TRUE(false);
  }

  IntVec v = readIntVector("src/LinAlg/Test/refdata/petsc_matrix_diagonal.ref");
  for (size_t i = 1; i <= adm.dd.getMLGEQ().size(); ++i)
    ASSERT_FLOAT_EQ(v[adm.dd.getGlobalEq(i)-1], b2[i-1]);
}
Exemplo n.º 4
0
Patch *SubdAccBuilder::run(SubdFace *face)
{
	SubdFaceRing ring(face, face->edge);
	GregoryAccStencil stencil(&ring);
	float3 position[20];

	computeCornerStencil(&ring, &stencil);
	computeEdgeStencil(&ring, &stencil);
	computeInteriorStencil(&ring, &stencil);

	ring.evaluate_stencils(position, stencil.stencil, 20);

	if(face->num_edges() == 3) {
		GregoryTrianglePatch *patch = new GregoryTrianglePatch();
		memcpy(patch->hull, position, sizeof(float3)*20);
		return patch;
	}
	else if(face->num_edges() == 4) {
		GregoryQuadPatch *patch = new GregoryQuadPatch();
		memcpy(patch->hull, position, sizeof(float3)*20);
		return patch;
	}

	assert(0); /* n-gons should have been split already */
	return NULL;
}
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createBoundaryVertex(
  peano::applications::poisson::multigrid::SpacetreeGridVertex&               fineGridVertex,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridX,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridH,
  peano::applications::poisson::multigrid::SpacetreeGridVertex const * const  coarseGridVertices,
  const peano::kernel::gridinterface::VertexEnumerator&                       coarseGridVerticesEnumerator,
  const peano::applications::poisson::multigrid::SpacetreeGridCell&           coarseGridCell,
  const tarch::la::Vector<DIMENSIONS,int>&                                    fineGridPositionOfVertex
) {
  logTraceInWith6Arguments( "createBoundaryVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex );
  //  if (tarch::la::volume(fineGridH) > _refinementThreshold) {
  //    fineGridVertex.refine();
  //  }
    if (coarseGridVerticesEnumerator.getLevel() < 3) {
      fineGridVertex.refine();
    }

    peano::toolbox::stencil::Stencil stencil(0.0);
    fineGridVertex.setStencil(stencil);

    peano::toolbox::stencil::ProlongationMatrix prolongation (0.0);
    fineGridVertex.setP(prolongation);

    peano::toolbox::stencil::RestrictionMatrix restriction(0.0);
    fineGridVertex.setR(restriction);

    fineGridVertex.clearTempAP();
    fineGridVertex.clearTempP();

  logTraceOutWith1Argument( "createBoundaryVertex(...)", fineGridVertex );
}
Exemplo n.º 6
0
Arquivo: stencil.c Projeto: 8l/pulpino
int main()
{
  int i,j,k;
  int error = 0;


  printf("Start stencil\n");

  for (i=0;i<N;i++) {
    for (k=0;k<M;k++)
      A[i*M+k] = i+k+1;
      W[i] = i+2;
  }

  for (j = 0; j<2; j++) {

    stencil(A, h_R, W);
  }

  for (i=0;i<N;i++) {
    for (k=0;k<M;k++) {
      if (RESULT_STENCIL[i*M+k] != h_R[i*M+k]) {
        error = error + 1;
        printf("Error occurred at i=%d k=%d; Computed result R=%d does not match expected Result=%d\n",i,k,h_R[i*M+k],RESULT_STENCIL[i*M+k]);
      }
    }
  }

  print_summary(error);

  return 0;
}
Exemplo n.º 7
0
  void Visit(const AirspaceCircle& airspace) {
    RasterPoint screen_center = projection.GeoToScreen(airspace.GetCenter());
    unsigned screen_radius = projection.GeoToScreenDistance(airspace.GetRadius());
    GLEnable stencil(GL_STENCIL_TEST);

    {
      GLEnable blend(GL_BLEND);
      setup_interior(airspace);
      if (m_warnings.is_warning(airspace) ||
          m_warnings.is_inside(airspace) ||
          airspace_look.thick_pen.GetWidth() >= 2 * screen_radius) {
        // fill whole circle
        canvas.circle(screen_center.x, screen_center.y, screen_radius);
      } else {
        // draw a ring inside the circle
        Color color = airspace_look.colors[settings.colours[airspace.GetType()]];
        Pen pen_donut(airspace_look.thick_pen.GetWidth() / 2, color.WithAlpha(90));
        canvas.SelectHollowBrush();
        canvas.Select(pen_donut);
        canvas.circle(screen_center.x, screen_center.y,
                      screen_radius - airspace_look.thick_pen.GetWidth() / 4);
      }
    }

    // draw outline
    setup_outline(airspace);
    canvas.circle(screen_center.x, screen_center.y, screen_radius);
  }
Exemplo n.º 8
0
void compute(int flag, TYPE orig[][UNROLL_C][(tile_size+2+UNROLL_R-1)/UNROLL_R][(tile_size+2+UNROLL_C-1)/UNROLL_C], TYPE sol[][tile_size+2], TYPE filter[f_size], size_t row, size_t col) {
#pragma HLS inline off
    if (flag && row>2 && col>2) {
        stencil(orig, sol, filter, row, col);
    }

}
Exemplo n.º 9
0
  void VisitCircle(const AirspaceCircle &airspace) {
    RasterPoint screen_center = projection.GeoToScreen(airspace.GetCenter());
    unsigned screen_radius = projection.GeoToScreenDistance(airspace.GetRadius());
    GLEnable stencil(GL_STENCIL_TEST);

    if (!warning_manager.IsAcked(airspace) &&
        settings.classes[airspace.GetType()].fill_mode !=
        AirspaceClassRendererSettings::FillMode::NONE) {
      GLEnable blend(GL_BLEND);
      SetupInterior(airspace);
      if (warning_manager.HasWarning(airspace) ||
          warning_manager.IsInside(airspace) ||
          look.thick_pen.GetWidth() >= 2 * screen_radius ||
          settings.classes[airspace.GetType()].fill_mode ==
          AirspaceClassRendererSettings::FillMode::ALL) {
        // fill whole circle
        canvas.DrawCircle(screen_center.x, screen_center.y, screen_radius);
      } else {
        // draw a ring inside the circle
        Color color = settings.classes[airspace.GetType()].fill_color;
        Pen pen_donut(look.thick_pen.GetWidth() / 2, color.WithAlpha(90));
        canvas.SelectHollowBrush();
        canvas.Select(pen_donut);
        canvas.DrawCircle(screen_center.x, screen_center.y,
                      screen_radius - look.thick_pen.GetWidth() / 4);
      }
    }

    // draw outline
    if (SetupOutline(airspace))
      canvas.DrawCircle(screen_center.x, screen_center.y, screen_radius);
  }
Exemplo n.º 10
0
  void Visit(const AirspacePolygon& airspace) {
    if (!prepare_polygon(airspace.GetPoints()))
      return;

    bool fill_airspace = m_warnings.is_warning(airspace) ||
                         m_warnings.is_inside(airspace);
    GLEnable stencil(GL_STENCIL_TEST);

    if (!m_warnings.is_acked(airspace)) {
      if (!fill_airspace) {
        // set stencil for filling (bit 0)
        set_fillstencil();
        draw_prepared();
      }

      // fill interior without overpainting any previous outlines
      {
        setup_interior(airspace, !fill_airspace);
        GLEnable blend(GL_BLEND);
        draw_prepared();
      }

      if (!fill_airspace) {
        // clear fill stencil (bit 0)
        clear_fillstencil();
        draw_prepared();
      }
    }

    // draw outline
    setup_outline(airspace);
    draw_prepared();
  }
bool verifyResult( bool verbose ){
	assert( space[0] != NULL && space[1] != NULL );
	
	double* endSpace = (double*) malloc( (problemSize + 2) * sizeof(double) );
	
	for( int x = 0; x < problemSize + 2; ++x ){
		endSpace[x] = space[T & 1][x];
	}
	
	initSpace();
	int read = 0, write = 1;
	for( int t = 1; t <= T; ++t ){
		for( int x = lowerBound; x <= upperBound; ++x ){
			stencil(read, write, x);
		}
		read = write;
		write = 1 - write;
	}
	
	bool failed = false;
	
	for( int x = lowerBound; x <= upperBound; ++x ){
		if( endSpace[x] != space[T & 1][x] ){
			failed = true;
			if( verbose ) printf( "FAILED\n");// %f != %f at %d\n", endSpace[x], space[T & 1][x], x );
			break;
		}
	}
	
	if( verbose && !failed ) printf( "SUCCESS\n" );
	
	free( endSpace );
	
	return !failed;
}
//--------------------------------------------------------------
void testApp::draw(){
    stringstream ss;
    ss << "FPS: " << ofGetFrameRate();
    ofDrawBitmapString(ss.str(), ofPoint(50,50));

    ofRectangle stencil( 0, 0,
                         500,500 );
    
    ofPushMatrix(); {
        ofTranslate(100, 100);
        
        squareMesh.draw();
        
        squareWorld.draw(stencil);
        
        
        ofPushMatrix(); {
            ofTranslate(500, 0);
            
            quadWorld.getWorldQuad().draw();
            quadWorld.draw(stencil);
            
        } ofPopMatrix();

    } ofPopMatrix();
}
Exemplo n.º 13
0
TEST(TestISTLPETScMatrix, SchurComplement)
{
  ASMmxBase::Type = ASMmxBase::FULL_CONT_RAISE_BASIS1;
  ASMmxBase::geoBasis = 2;

  Matrix stencil(13,13);
  for (size_t i = 1; i<= 13; ++i)
    for (size_t j = 1; j <= 13; ++j)
      stencil(i,j) = 1.0;

  std::array<InspectMatrixSIM,2> sim;
  for (size_t i = 0; i < 2; ++i) {
    sim[i].read("src/LinAlg/Test/refdata/petsc_test_blocks_basis.xinp");
    sim[i].opt.solver = i == 0 ? SystemMatrix::PETSC : SystemMatrix::ISTL;
    sim[i].preprocess();
    sim[i].initSystem(i == 0 ? SystemMatrix::PETSC : SystemMatrix::ISTL);

    for (int iel = 1; iel <= sim[i].getSAM()->getNoElms(); ++iel)
      sim[i].getMatrix()->assemble(stencil, *sim[i].getSAM(), iel);

    sim[i].getMatrix()->beginAssembly();
    sim[i].getMatrix()->endAssembly();
  }

  const ProcessAdm& adm = sim[1].getProcessAdm();
  ISTL::Mat& A = static_cast<ISTLMatrix*>(sim[1].getMatrix())->getMatrix();
  ISTL::BlockPreconditioner block(A, adm.dd, "upper");

  ISTL::Mat& S = block.getBlock(1);
  PETScSolParams params(LinSolParams(), adm);
  params.setupSchurComplement(static_cast<PETScMatrix*>(sim[0].getMatrix())->getBlockMatrices());

  // check that matrices are the same
  for (size_t r = 0; r < S.N(); ++r) {
    const PetscInt* cols;
    PetscInt ncols;
    const PetscScalar* vals;
    MatGetRow(params.getSchurComplement(), r, &ncols, &cols, &vals);
    for (PetscInt i = 0; i < ncols; ++i)
      ASSERT_FLOAT_EQ(vals[i], S[r][cols[i]]);
    MatRestoreRow(params.getSchurComplement(), r, &ncols, &cols, &vals);
  }
}
Exemplo n.º 14
0
void GrStencilPathOp::onExecute(GrOpFlushState* state) {
    GrRenderTarget* rt = state->drawOpArgs().renderTarget();
    SkASSERT(rt);

    int numStencilBits = rt->renderTargetPriv().numStencilBits();
    GrStencilSettings stencil(GrPathRendering::GetStencilPassSettings(fFillType),
                              fHasStencilClip, numStencilBits);

    GrPathRendering::StencilPathArgs args(fUseHWAA, state->drawOpArgs().fProxy,
                                          &fViewMatrix, &fScissor, &stencil);
    state->gpu()->pathRendering()->stencilPath(args, fPath.get());
}
Exemplo n.º 15
0
int main () {
    std::cout << "Starting VTK test" << std::endl;
    FlowField flowField ( 10, 10, 10 );

    clock_t start = clock();

    FLOAT velocity [3] = {1,1,1};

    for (int k = 0; k < flowField.getNz() + 3; k++ ){
        for (int j = 0; j < flowField.getNy() + 3; j++ ){
            for (int i = 0; i < flowField.getNx() + 3; i++ ){
                flowField.getPressure().getScalar(i,j,k) = (double) k;
                flowField.getVelocity().setVector(velocity, i,j,k);
            }
        }
    }

    std::cout << "Initialization time: " << (double) (clock() - start) / CLOCKS_PER_SEC
        << std::endl;
    start = clock();

    Parameters parameters;

    parameters.dx = 1;
    parameters.dy = 1;
    parameters.dz = 1;

    VTKStencil stencil( "/tmp/some_file", parameters );

    std::cout << "Stencil creation time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl;
    start = clock();

    stencil.openFile ( flowField, 5.0/3 );

    std::cout << "File-openning and grid data writing time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl;
    start = clock();

    FieldIterator iterator( flowField, stencil );
    iterator.iterateInnerCells();

    std::cout << "Iteration time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl;
    start = clock();

    stencil.write( flowField );
    std::cout << "Writing time: " << (double) (clock() - start) / CLOCKS_PER_SEC << std::endl;

    stencil.closeFile();

}
// naive parallel iteration test suite
double test_1(){
    int t, x, read = 0, write = 1;
    
    double start_time = omp_get_wtime();
    
    for( t = 1; t <= T; ++t ){
        #pragma omp parallel for private( x ) //schedule(dynamic)
        for( x = lowerBound; x <= upperBound; ++x ){
        	stencil( read, write, x );
        }
        read = write;
        write = 1 - write;
    }
    
    double end_time = omp_get_wtime();
    return (end_time - start_time);
}
Exemplo n.º 17
0
JSValue JSHTMLCanvasElement::getContext(ExecState* exec)
{
    HTMLCanvasElement* canvas = static_cast<HTMLCanvasElement*>(impl());
    const UString& contextId = exec->argument(0).toString(exec)->value(exec);
    RefPtr<CanvasContextAttributes> attrs;
#if ENABLE(WEBGL)
    if (contextId == "experimental-webgl" || contextId == "webkit-3d") {
        attrs = WebGLContextAttributes::create();
        WebGLContextAttributes* webGLAttrs = static_cast<WebGLContextAttributes*>(attrs.get());
        if (exec->argumentCount() > 1 && exec->argument(1).isObject()) {
            JSObject* jsAttrs = exec->argument(1).getObject();
            Identifier alpha(exec, "alpha");
            if (jsAttrs->hasProperty(exec, alpha))
                webGLAttrs->setAlpha(jsAttrs->get(exec, alpha).toBoolean(exec));
            Identifier depth(exec, "depth");
            if (jsAttrs->hasProperty(exec, depth))
                webGLAttrs->setDepth(jsAttrs->get(exec, depth).toBoolean(exec));
            Identifier stencil(exec, "stencil");
            if (jsAttrs->hasProperty(exec, stencil))
                webGLAttrs->setStencil(jsAttrs->get(exec, stencil).toBoolean(exec));
            Identifier antialias(exec, "antialias");
            if (jsAttrs->hasProperty(exec, antialias))
                webGLAttrs->setAntialias(jsAttrs->get(exec, antialias).toBoolean(exec));
            Identifier premultipliedAlpha(exec, "premultipliedAlpha");
            if (jsAttrs->hasProperty(exec, premultipliedAlpha))
                webGLAttrs->setPremultipliedAlpha(jsAttrs->get(exec, premultipliedAlpha).toBoolean(exec));
            Identifier preserveDrawingBuffer(exec, "preserveDrawingBuffer");
            if (jsAttrs->hasProperty(exec, preserveDrawingBuffer))
                webGLAttrs->setPreserveDrawingBuffer(jsAttrs->get(exec, preserveDrawingBuffer).toBoolean(exec));
        }
    }
#endif
    CanvasRenderingContext* context = canvas->getContext(ustringToString(contextId), attrs.get());
    if (!context)
        return jsNull();
    JSValue jsValue = toJS(exec, globalObject(), WTF::getPtr(context));
#if ENABLE(WEBGL)
    if (context->is3d() && InspectorInstrumentation::hasFrontends()) {
        ScriptObject glContext(exec, jsValue.getObject());
        ScriptObject wrapped = InspectorInstrumentation::wrapWebGLRenderingContextForInstrumentation(canvas->document(), glContext);
        if (!wrapped.hasNoValue())
            return wrapped.jsValue();
    }
#endif
    return jsValue;
}
Exemplo n.º 18
0
  void VisitPolygon(const AirspacePolygon &airspace) {
    if (!PreparePolygon(airspace.GetPoints()))
      return;

    const AirspaceClassRendererSettings &class_settings =
      settings.classes[airspace.GetType()];

    bool fill_airspace = warning_manager.HasWarning(airspace) ||
      warning_manager.IsInside(airspace) ||
      class_settings.fill_mode ==
      AirspaceClassRendererSettings::FillMode::ALL;

    if (!warning_manager.IsAcked(airspace) &&
        class_settings.fill_mode !=
        AirspaceClassRendererSettings::FillMode::NONE) {
      GLEnable stencil(GL_STENCIL_TEST);

      if (!fill_airspace) {
        // set stencil for filling (bit 0)
        SetFillStencil();
        DrawPrepared();
        glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
      }

      // fill interior without overpainting any previous outlines
      {
        SetupInterior(airspace, !fill_airspace);
        GLEnable blend(GL_BLEND);
        DrawPrepared();
      }

      if (!fill_airspace) {
        // clear fill stencil (bit 0)
        ClearFillStencil();
        DrawPrepared();
        glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
      }
    }

    // draw outline
    if (SetupOutline(airspace))
      DrawPrepared();
  }
Exemplo n.º 19
0
RealType Solver::computeResidual(GridFunction& sourcegridfunction,
    				     GridFunctionType& rhs,
    					 const PointType& h){
//<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
    // The pre-value to be returned (return sqrt(doubleSum)):
    RealType doubleSum = 0.0;

    /* We need to compute the derivatives p_xx and p_yy, therefore the stencil has to be applied.
     */

    MultiIndexType dim = sourcegridfunction.GetGridDimension();
    MultiIndexType bread  (0,0);
    MultiIndexType eread  (dim[0]-1,dim[1]-1);
    MultiIndexType bwrite (1,1);
    MultiIndexType ewrite (dim[0]-2,dim[1]-2);

    //Compute the needed derivations for the whole (inner?) area
    Stencil stencil(3,h); 					// bzw. Kann man einfach const weitergeben? /Wie?
    //Get the values for derivative in x-direction:
    GridFunction Fxx(dim);
    stencil.ApplyFxxStencilOperator(bread, eread, bwrite, ewrite, sourcegridfunction.GetGridFunction(), Fxx);
    //Get the values for derivative in y-direction:
    GridFunction Fyy(dim);
    stencil.ApplyFyyStencilOperator(bread, eread, bwrite, ewrite, sourcegridfunction.GetGridFunction(), Fyy);

    // Compute the residual: res = sqrt(Sum_i^I(Sum_j^J((p_xx+p_yy-rightHandSide)²/(I*J))))
    RealType derivator;
    for (IndexType i = 1; i <= dim[0]-2; i++)
    {
    	for (IndexType j = 1; j <= dim[1]-2; j++)
    	{
    		derivator = Fxx.GetGridFunction()[i][j]+ Fyy.GetGridFunction()[i][j] - rhs[i][j];
            doubleSum +=  derivator*derivator / (dim[0]-2) / (dim[1]-2);
    	}
    }
    //std::cout<<doubleSum<<std::endl;
    return sqrt(doubleSum);
}
Exemplo n.º 20
0
    static sparse_matrix_ptrtype newMatrix( DomainSpace const& Xh,
                                            DualImageSpace const& Yh,
                                            size_type matrix_properties = NON_HERMITIAN )
    {
        auto s = stencil( _test=Yh,_trial=Xh );

        sparse_matrix_ptrtype mat;
        if ( Yh->worldComm().globalSize()>1 )
            mat = sparse_matrix_ptrtype( new petscMPI_sparse_matrix_type( Yh->dof(),Xh->dof() ) );
        else // seq
            mat = sparse_matrix_ptrtype( new petsc_sparse_matrix_type( Yh->dof(),Xh->dof() ) );

        mat->setMatrixProperties( matrix_properties );
        mat->init( Yh->nDof(), Xh->nDof(),
                   Yh->nLocalDofWithoutGhost(), Xh->nLocalDofWithoutGhost(),
                   s->graph() );
        //Yh->nLocalDof(), Xh->nLocalDof() );
#if 0
        auto nSpace = DomainSpace::nSpaces;

        std::vector < std::vector<int> > is( nSpace );
        uint cptSpaces=0;

        //boost::tuple< typename DomainSpace::functionspace_vector_type, uint, std::vector < std::vector<int> > > hola;
        //        auto result = boost::make_tuple(Xh->functionSpaces(),cptSpaces,is);
        auto result = boost::make_tuple( cptSpaces,is );
        boost::fusion::fold( Xh->functionSpaces(), result, computeNDofForEachSpace() );

        for ( uint i = 0; i<nSpace; i++ )
        {
            //is[i].resize()
        }

#endif

        return mat;
    }
Exemplo n.º 21
0
/*
  create an interpolation matrix
  inputs:
    alpha   : k*dx
    M       : highest order bessel function to use
    upsample: ratio to upsample by
  output:
    interpolation
*/
gsl_matrix *create_interp_matrix(double alpha, int M, int upsample) {
  point *points_in;
  point *points_out;
  int npoints_out;
  double x,y;
  int i;
  double step;
  double r_typical;
  gsl_matrix *interp;

  points_in = stencil();
  step = 1.0/upsample;
  r_typical = 3*sqrt(0.5); // this works well for 4x4+2 stencil

  npoints_out = (upsample+1)*(upsample+1);

  points_out = (point *)malloc(npoints_out * sizeof(point));
  if (points_out == NULL) {
    // ERROR
  }

  i = 0;
    for (x = -0.5 ; x < 0.5+step/2 ; x += step) {
      for (y = -0.5 ; y < 0.5+step/2 ; y += step) {
      points_out[i].x = x;
      points_out[i++].y = y;
    }
  }
  
  interp = interp_matrix(alpha, points_in, NUM_STENCIL_POINTS, points_out, npoints_out, M, r_typical);
  
  free(points_in);
  free(points_out);
  
  return interp;
}
Exemplo n.º 22
0
static float qsolve2(int i)
/* find new traveltime at gridpoint i */
{
    int j, k, ix;
    float a, b, t, res;
    struct Upd *v[3], x[3], *xj;

    for (j=0; j<3; j++) {
	ix = (i/s[j])%n[j];
	
	if (ix > 0) { 
	    k = i-s[j];
	    a = ttime[k];
	} else {
	    a = 0.;
	}

	if (ix < n[j]-1) {
	    k = i+s[j];
	    b = ttime[k];
	} else {
	    b = 0.;
	}

	xj = x+j;
	xj->delta = rdx[j];

	if (a > b) {
	    xj->stencil = xj->value = a;
	} else {
	    xj->stencil = xj->value = b;
	}

	if (order > 1) {
	    if (a > b  && ix-2 >= 0) { 
		k = i-2*s[j];
		if (in[k] != SF_OUT && a <= (t=ttime[k]))
		    stencil(t,xj);
	    }
	    if (a < b && ix+2 <= n[j]-1) { 
		k = i+2*s[j];
		if (in[k] != SF_OUT && b <= (t=ttime[k]))
		    stencil(t,xj);
	    }
	}
    }

    if (x[0].value >= x[1].value) {
	if (x[1].value >= x[2].value) {
	    v[0] = x; v[1] = x+1; v[2] = x+2;
	} else if (x[2].value >= x[0].value) {
	    v[0] = x+2; v[1] = x; v[2] = x+1;
	} else {
	    v[0] = x; v[1] = x+2; v[2] = x+1;
	}
    } else {
	if (x[0].value >= x[2].value) {
	    v[0] = x+1; v[1] = x; v[2] = x+2;
	} else if (x[2].value >= x[1].value) {
	    v[0] = x+2; v[1] = x+1; v[2] = x;
	} else {
	    v[0] = x+1; v[1] = x+2; v[2] = x;
	}
    }
    
    v1=vv[i];

    if(v[2]->value > 0) {   /* ALL THREE DIRECTIONS CONTRIBUTE */
	if (updaten2(3, &res, v) || 
	    updaten2(2, &res, v) || 
	    updaten2(1, &res, v)) return res;
    } else if(v[1]->value > 0) { /* TWO DIRECTIONS CONTRIBUTE */
	if (updaten2(2, &res, v) || 
	    updaten2(1, &res, v)) return res;
    } else if(v[0]->value > 0) { /* ONE DIRECTION CONTRIBUTES */
	if (updaten2(1, &res, v)) return res;
    }
	
    return 0.;
}
double test_1(){
    double start_time = omp_get_wtime();
    int read=0, write = 1;

    // s is the number of non-pointy bit 2D slices of diamond tiling
    // that is available for the current tile size.
    int s = (tau/3) - 2;
    // subset_s is an input parameter indicating how many of those
    // slices we want to use in the repeated tiling pattern.
    // subset_s should be less than s and greater than or equal to 2.
    if (subset_s > s  || subset_s<2) {
        fprintf(stderr, "Error: need 2<=subset_s<=s\n");
        exit(-1);
    }
  
    // Set lower and upper bounds for spatial dimensions.
    // When did code gen have a non-inclusive upper bound.
    // Ian's upper bound is inclusive.
    int Li=1, Lj=1, Ui=upperBound+1, Uj=upperBound+1;
  
    // Loop over the tiling pattern.
    for (int toffset=0; toffset<T; toffset+=subset_s){
  
        // Loop over phases of tiles within repeated tile pattern.
        // This is like iterating over the A and B trapezoid tile types.
        for (int c0 = -2; c0 <= 0; c0 += 1){

            // Two loops over tiles within one phase.
            // All of the tiles within one phase can be done in parallel.

            // updates by Dave W, to the c1 and c2 loops, for OpenMP (from here to the end of the #if BOUNDING_BOX_FOR_PARALLEL_LOOPS
            //   hoist out min_c1 and max_c1, then use that to hoist a bounding box for c2
            //   initial version is just aiming for correct and parallel, without worrying about a loose boundingbox

            int c1_lb =
                max(
                    max(
                        floord(Lj + (tau/3) * c0 + (tau/3), tau), 
                        c0 + floord(-2 * T + Lj - 1, tau) + 1), 
                    floord(Lj + 1, tau)
                ); // end init block c1
            int c1_ub =
                min(
                    min(
                        floord(Uj + (tau/3) * c0 - ((tau/3)+2), tau) + 1, 
                        floord(T + Uj - 1, tau)), 
                    c0 + floord(Uj - 5, tau) + 2
                ); // end cond block c1

            // The two expressions below are the same as in the previous version, except that
            //  in the c2_lb_min_expr, I have replaced c1 with:
            //    c1_min_value where it appears with a positive coefficient, and
            //    c1_max_value where it appears with a negative coefficient.
            //  and in the c2_ub_max_expr, the opposite (i.e., c1 becomes c1_max_value where positive)
            // I will be embarrassed if I have done this wrong.
            ///  Note that I assume tau > 0
#define c2_lb_min_expr(c1_min_value, c1_max_value)              \
                    max( \
                        max( \
                            max( \
                                max( \
                                    max( \
                                        max( \
                                            c0 - 2 * c1_max_value + floord(-Ui + Lj + 1,tau),  \
                                            -c1_max_value + floord(-2 * Ui - Uj + tau * c0 + tau * c1_min_value - tau-3, tau*2)+1), \
                                        c1_min_value + floord(-Ui - 2 * Uj + 3, tau)), \
                                    floord(-Ui - Uj + 3, tau)),  \
                                c0 - c1_max_value + floord(-Ui - (tau/3) * c0 + ((tau/3)+1), tau)),  \
                            c0 - c1_max_value + floord(-T - Ui, tau) + 1),  \
                        -c1_max_value + floord(-Ui + 4, tau) - 1 \
                    ) /* end init block c2 */

#define c2_ub_max_expr(c1_min_value, c1_max_value)              \
                    min( \
                        min( \
                            min( \
                                min( \
                                    min( \
                                        min( \
                                            c0 - 2 * c1_min_value + floord(-Li + Uj - 2, tau) + 1,  \
                                            c0 - c1_min_value + floord(-Li - 2, tau) + 1),  \
                                        c0 - c1_min_value + floord(-Li - (tau/3) * c0 - ((tau/3)+1), tau) + 1), \
                                    floord(T - Li - Lj, tau)), \
                                -c1_min_value + floord(2 * T - Li, tau)),  \
                            c1_max_value + floord(-Li - 2 * Lj - 1, tau) + 1),  \
                        -c1_min_value + floord(-2 * Li - Lj + tau * c0 + tau * c1_max_value + (tau-1), tau*2) \
                    ) /* end cond block c2 */
#define c2_lb_expr(c1_value) c2_lb_min_expr(c1_value, c1_value)
#define c2_ub_expr(c1_value) c2_ub_max_expr(c1_value, c1_value)


#if BOUNDING_BOX_FOR_PARALLEL_LOOPS
            int c2_box_lb = c2_lb_min_expr(c1_lb, c1_ub);
            int c2_box_ub = c2_ub_max_expr(c1_lb, c1_ub);
#if PARALLEL
            // don't need to mention c1...c5 below, since they're scoped inside the for loops
#pragma omp parallel for shared(start_time, s, Li, Lj, Ui, Uj, toffset, c0, c1_lb, c1_ub, c2_box_lb, c2_box_ub, ) private(read, write) collapse(2)
#endif
            for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) {
                for (int c2 = c2_box_lb; c2 <= c2_box_ub; c2 += 1) if (c2 >= c2_lb_expr(c1) && c2 <= c2_ub_expr(c1)) {
#else
            for (int c1 = c1_lb; c1 <= c1_ub; c1 += 1) {
                for (int c2 = c2_lb_expr(c1); c2 <= c2_ub_expr(c1); c2 += 1) {
#endif
                  //fprintf(stdout, "(%d,%d,%d)\n", c0,c1,c2);
                    // Loop over subset_s time steps within tiling pattern
                    // and within tile c0,c1,c2.
                    // Every time the pattern is repeated, toffset will be
                    // subset_s bigger.
                    // The real t value is c3+toffset.  We are just using the
                    // tiling pattern from t=1 to t<=subset_s.
                    for (int c3 = 1; c3 <= min(T-toffset,subset_s); c3 += 1){
                        int t = c3+toffset;
                        // if t % 2  is 1, then read=0 and write=1
                        write = t & 1;
                        read = 1-write;

                        // x spatial dimension.
                        for (int c4 = 
                            max(
                                max(
                                    max(
                                        -tau * c1 - tau * c2 + 2 * c3 - (2*tau-2),
                                        -Uj - tau * c2 + c3 - (tau-2)),
                                   tau * c0 - tau * c1 - tau * c2 - c3),
                                 Li
                            ); // end init block c4
                        c4 <= 
                            min(
                                min(
                                    min(
                                        tau * c0 - tau * c1 - tau * c2 - c3 + (tau-1),
                                        -tau * c1 - tau * c2 + 2 * c3),
                                    -Lj - tau * c2 + c3),
                                Ui - 1
                            ); // end cond block c4
                        c4 += 1){

                            // y spatial dimension.
                            for (int c5 = 
                                max(
                                    max(
                                        tau * c1 - c3,
                                        Lj),
                                    -tau * c2 + c3 - c4 - (tau-1)
                                ); // end init block c5
                                c5 <= 
                                    min(
                                        min(
                                            Uj - 1,
                                            -tau * c2 + c3 - c4),
                                        tau * c1 - c3 + (tau-1)
                                ); // end cond block c5
                                c5 += 1){
                                //fprintf(stdout, "(%d,%d,%d,%d,%d,%d)\n", c0,c1,c2,c3,c4,c5);
                                stencil( read, write, c4, c5);
                            } // for c5
                        } // for c4
                    } // for c3
                } // for c2
            } // for c1
        } // for c0
    } // for toffset
    
    double end_time = omp_get_wtime();
    
    return (end_time - start_time);
}

int main( int argc, char* argv[] ){
    setbuf(stdout, NULL); // set buffer to null, so prints ALWAYS print (for debug purposes mainly)
    
    bool verify = false;
    bool printtime = true;
    // Command line parsing
    char c;
    while ((c = getopt (argc, argv, "nc:s:p:T:t:hv")) != -1){
        switch( c ) {
            case 'n':
                printtime=false;
                break;
                
            case 'c': // problem size
                cores = parseInt( optarg );
                if( cores <= 0 ){
                    fprintf(stderr, "cores must be greater than 0: %d\n", cores);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
                
            case 's': // subset
                //globalSeed = parseInt( optarg );
                subset_s = parseInt( optarg );
                break;
                
            case 'p': // problem size
                problemSize = parseInt( optarg );
                if( problemSize <= 0 ){
                    fprintf(stderr, "problemSize must be greater than 0: %d\n", problemSize);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
                
            case 'T': // T (time steps)
                T = parseInt( optarg );
                if( T <= 0 ){    
                    fprintf(stderr, "T must be greater than 0: %d\n", T);
                    exit(BAD_RUN_TIME_PARAMETERS);
                }
                break;
            
            case 't': // tau
#if defined tau
                fprintf(stderr, "don't use -t to set tau when you compiled with -Dtau=%d.\n", tau);
                if (parseInt(optarg) != tau)
                    exit(BAD_COMPILE_TIME_PARAMETERS);
#else
                tau = parseInt( optarg );
#endif
                break;
            
            case 'h': // help
                printf("usage: %s\n-n \t dont print time \n-p <problem size> \t problem size in elements \n-T <time steps>\t number of time steps\n-c <cores>\tnumber of threads\n-s <subset_s>\t tile parameter\n-t <tau>\t tile parameter\n-h\tthis dialogue\n-v\tverify output\n", argv[0]);
                exit(0);
            
            case 'v': // verify;
                verify = true;
                break;
            
            case '?':
                if (optopt == 'p')
                    fprintf (stderr, "Option -%c requires positive int argument: problem size.\n", optopt);
                else if (optopt == 'T')
                    fprintf (stderr, "Option -%c requires positive int argument: T.\n", optopt);
                else if (optopt == 's')
                    fprintf (stderr, "Option -%c requires int argument: subset_s.\n", optopt);
                else if (optopt == 'c')
                    fprintf (stderr, "Option -%c requires int argument: number of cores.\n", optopt);
                else if (isprint (optopt))
                    fprintf (stderr, "Unknown option `-%c'.\n", optopt);
                else
                    fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
                exit(0);
                
          default:
             exit(0);
          }
    }

    if( !( tau % 3 == 0 && tau >= 15 ) ){
#if defined tau
        fprintf(stderr, "tau must be a multiple of 3, and >= 15, but the program was compiled with -Dtau=%d, and thus can't run :-(\n", tau);
        exit(BAD_COMPILE_TIME_PARAMETERS);
#else
        fprintf(stderr, "tau must be a multiple of 3, and >= 15, but it's %d; re-run with a different -t value\n", tau);
        exit(BAD_RUN_TIME_PARAMETERS);
#endif
    }


    init();
    initSpace();
    
    double time = test_1();
    
    if( printtime ) {
        printf( "Time: %f\n", time );
    }

    if( verify ){
        verifyResult( true );
    }
    
}
// returns true if valid result
bool verifyResult( bool verbose ){
    assert( space[0] != NULL && space[1] != NULL );
   
    double** endSpace;
    endSpace = (double**) malloc( (problemSize + 2) * sizeof(double*));
    if( endSpace == NULL ){
        printf( "Could not allocate x axis of verification array\n" );
        exit(0);
    }
    
   // allocate y axis
    for( int x = 0; x < problemSize + 2; ++x ){
        endSpace[x] = (double*) malloc( (problemSize + 2) * sizeof(double));
        if( endSpace[x] == NULL ){
            printf( "Could not allocate y axis of verification array\n" );
            exit(0);
        }
    }
   
    for( int x = 0; x <  problemSize + 2; ++x ){
        for( int y = lowerBound; y <= upperBound; ++y ){
            endSpace[x][y] = space[ T & 1 ][x][y];
        }
    }
    
    initSpace();
    
    int t, x, y, read = 0, write = 1;
    for( t = 1; t <= T; ++t ){
        for( x = lowerBound; x <= upperBound; ++x ){
            for( y = lowerBound; y <= upperBound; ++y ){
                stencil( read, write, x, y);
            }        
        }
        read = write;
        write = 1 - write;
    }
    
    bool failed = false;
    for( x = lowerBound; x <= upperBound; ++x ){
        for( y = lowerBound; y <= upperBound; ++y ){
            if( endSpace[x][y] != space[ T & 1 ][x][y] ){
                failed = true;
                 if( verbose ) printf( "FAILED! %f != %f at %d, %d\n", endSpace[x][y],space[ T & 1 ][x][y], x, y);
                break;
            }
        }
        if( failed ) break;
    }
    
    if( verbose && !failed ) printf( "SUCCESS\n" );
    
    for( int x = 0; x < problemSize + 2; ++x ){
        free( endSpace[x] );
    }
    
    free( endSpace );
    
    return !failed;

}
void RenderState::dump() {
    blend().dump();
    meshState().dump();
    scissor().dump();
    stencil().dump();
}
Exemplo n.º 26
0
// The MAIN function, from here we start our application and run our Game loop
int main()
{
	// Init GLFW
	glfwInit();
	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
	glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
	glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
	glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);

	GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "Some OpenGL Testing", nullptr, nullptr); // Windowed
	glfwMakeContextCurrent(window);

	// Set the required callback functions
	glfwSetKeyCallback(window, key_callback);
	glfwSetCursorPosCallback(window, mouse_callback);
	glfwSetScrollCallback(window, scroll_callback);
	glfwSetMouseButtonCallback(window, mouse_button_click_callback);

	// Options
	glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_NORMAL);

	// Initialize GLEW to setup the OpenGL Function pointers
	glewExperimental = GL_TRUE;
	glewInit();

	// Define the viewport dimensions
	glViewport(0, 0, screenWidth, screenHeight);

	// Setup some OpenGL options
	glEnable(GL_DEPTH_TEST);
	glEnable(GL_STENCIL_TEST);
	glDepthFunc(GL_LESS); // Set to always pass the depth test (same effect as glDisable(GL_DEPTH_TEST))
	glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);

	// Setup and compile our shaders
	Shader shader("shaders/advanced.vs", "shaders/advanced.frag");
	Shader stencil("shaders/stencil.vs", "shaders/stencil.frag");

#pragma region "object_initialization"
	// Set the object data (buffers, vertex attributes)
	GLfloat cubeVertices[] = {
		// Positions          // Texture Coords
		-0.5f, -0.5f, -0.5f,  0.0f, 0.0f,
		0.5f, -0.5f, -0.5f,  1.0f, 0.0f,
		0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
		0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
		-0.5f,  0.5f, -0.5f,  0.0f, 1.0f,
		-0.5f, -0.5f, -0.5f,  0.0f, 0.0f,

		-0.5f, -0.5f,  0.5f,  0.0f, 0.0f,
		0.5f, -0.5f,  0.5f,  1.0f, 0.0f,
		0.5f,  0.5f,  0.5f,  1.0f, 1.0f,
		0.5f,  0.5f,  0.5f,  1.0f, 1.0f,
		-0.5f,  0.5f,  0.5f,  0.0f, 1.0f,
		-0.5f, -0.5f,  0.5f,  0.0f, 0.0f,

		-0.5f,  0.5f,  0.5f,  1.0f, 0.0f,
		-0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
		-0.5f, -0.5f, -0.5f,  0.0f, 1.0f,
		-0.5f, -0.5f, -0.5f,  0.0f, 1.0f,
		-0.5f, -0.5f,  0.5f,  0.0f, 0.0f,
		-0.5f,  0.5f,  0.5f,  1.0f, 0.0f,

		0.5f,  0.5f,  0.5f,  1.0f, 0.0f,
		0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
		0.5f, -0.5f, -0.5f,  0.0f, 1.0f,
		0.5f, -0.5f, -0.5f,  0.0f, 1.0f,
		0.5f, -0.5f,  0.5f,  0.0f, 0.0f,
		0.5f,  0.5f,  0.5f,  1.0f, 0.0f,

		-0.5f, -0.5f, -0.5f,  0.0f, 1.0f,
		0.5f, -0.5f, -0.5f,  1.0f, 1.0f,
		0.5f, -0.5f,  0.5f,  1.0f, 0.0f,
		0.5f, -0.5f,  0.5f,  1.0f, 0.0f,
		-0.5f, -0.5f,  0.5f,  0.0f, 0.0f,
		-0.5f, -0.5f, -0.5f,  0.0f, 1.0f,

		-0.5f,  0.5f, -0.5f,  0.0f, 1.0f,
		0.5f,  0.5f, -0.5f,  1.0f, 1.0f,
		0.5f,  0.5f,  0.5f,  1.0f, 0.0f,
		0.5f,  0.5f,  0.5f,  1.0f, 0.0f,
		-0.5f,  0.5f,  0.5f,  0.0f, 0.0f,
		-0.5f,  0.5f, -0.5f,  0.0f, 1.0f
	};
	GLfloat planeVertices[] = {
		// Positions
		5.0f,  -0.5f,  5.0f,  2.0f, 0.0f,
		-5.0f, -0.5f,  5.0f,  0.0f, 0.0f,
		-5.0f, -0.5f, -5.0f,  0.0f, 2.0f,

		5.0f,  -0.5f,  5.0f,  2.0f, 0.0f,
		-5.0f, -0.5f, -5.0f,  0.0f, 2.0f,
		5.0f,  -0.5f, -5.0f,  2.0f, 2.0f
	};
	// Setup cube VAO
	GLuint cubeVAO, cubeVBO;
	glGenVertexArrays(1, &cubeVAO);
	glGenBuffers(1, &cubeVBO);
	glBindVertexArray(cubeVAO);
	glBindBuffer(GL_ARRAY_BUFFER, cubeVBO);
	glBufferData(GL_ARRAY_BUFFER, sizeof(cubeVertices), &cubeVertices, GL_STATIC_DRAW);
	glEnableVertexAttribArray(0);
	glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0);
	glEnableVertexAttribArray(1);
	glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
	glBindVertexArray(0);
	// Setup plane VAO
	GLuint planeVAO, planeVBO;
	glGenVertexArrays(1, &planeVAO);
	glGenBuffers(1, &planeVBO);
	glBindVertexArray(planeVAO);
	glBindBuffer(GL_ARRAY_BUFFER, planeVBO);
	glBufferData(GL_ARRAY_BUFFER, sizeof(planeVertices), &planeVertices, GL_STATIC_DRAW);
	glEnableVertexAttribArray(0);
	glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)0);
	glEnableVertexAttribArray(1);
	glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), (GLvoid*)(3 * sizeof(GLfloat)));
	glBindVertexArray(0);

	// Load textures
	GLuint cubeTexture = loadTexture("media/container.jpg");
	GLuint floorTexture = loadTexture("media/awesomeface.png");

#pragma endregion

	// Game loop
	while (!glfwWindowShouldClose(window))
	{
		GLfloat currentFrame = glfwGetTime();
		deltaTime = currentFrame - lastFrame;
		lastFrame = currentFrame;

		// Check and call events
		glfwPollEvents();
		handle_input(window);

		// Clear the colorbuffer
		glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);

		// Set uniforms
		stencil.use();
		glm::mat4 model;
		glm::mat4 view = camera.get_view_matrix();
		glm::mat4 projection = glm::perspective(camera.m_zoom, (float)screenWidth / (float)screenHeight, 0.1f, 100.0f);
		glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "view"), 1, GL_FALSE, glm::value_ptr(view));
		glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "projection"), 1, GL_FALSE, glm::value_ptr(projection));
		shader.use();
		glUniformMatrix4fv(glGetUniformLocation(shader.Program, "view"), 1, GL_FALSE, glm::value_ptr(view));
		glUniformMatrix4fv(glGetUniformLocation(shader.Program, "projection"), 1, GL_FALSE, glm::value_ptr(projection));


		// Draw floor as normal, we only care about the containers. The floor should NOT fill the stencil buffer so we set its mask to 0x00
		glStencilMask(0x00);
		// Floor
		glBindVertexArray(planeVAO);
		glBindTexture(GL_TEXTURE_2D, floorTexture);
		model = glm::mat4();
		glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model));
		glDrawArrays(GL_TRIANGLES, 0, 6);
		glBindVertexArray(0);

		// == =============
		// 1st. Render pass, draw objects as normal, filling the stencil buffer
		glStencilFunc(GL_ALWAYS, 1, 0xFF);
		glStencilMask(0xFF);
		// Cubes
		glBindVertexArray(cubeVAO);
		glBindTexture(GL_TEXTURE_2D, cubeTexture);
		model = glm::translate(model, glm::vec3(-1.0f, 0.0f, -1.0f));
		glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model));
		glDrawArrays(GL_TRIANGLES, 0, 36);
		model = glm::mat4();
		model = glm::translate(model, glm::vec3(2.0f, 0.0f, 0.0f));
		glUniformMatrix4fv(glGetUniformLocation(shader.Program, "model"), 1, GL_FALSE, glm::value_ptr(model));
		glDrawArrays(GL_TRIANGLES, 0, 36);
		glBindVertexArray(0);

		// == =============
		// 2nd. Render pass, now draw slightly scaled versions of the objects, this time disabling stencil writing.
		// Because stencil buffer is now filled with several 1s. The parts of the buffer that are 1 are now not drawn, thus only drawing 
		// the objects' size differences, making it look like borders.
		glStencilFunc(GL_NOTEQUAL, 1, 0xFF);
		glStencilMask(0x00);
		glDisable(GL_DEPTH_TEST);
		stencil.use();
		GLfloat scale = 1.1;
		// Cubes
		glBindVertexArray(cubeVAO);
		glBindTexture(GL_TEXTURE_2D, cubeTexture);
		model = glm::mat4();
		model = glm::translate(model, glm::vec3(-1.0f, 0.0f, -1.0f));
		model = glm::scale(model, glm::vec3(scale, scale, scale));
		glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "model"), 1, GL_FALSE, glm::value_ptr(model));
		glDrawArrays(GL_TRIANGLES, 0, 36);
		model = glm::mat4();
		model = glm::translate(model, glm::vec3(2.0f, 0.0f, 0.0f));
		model = glm::scale(model, glm::vec3(scale, scale, scale));
		glUniformMatrix4fv(glGetUniformLocation(stencil.Program, "model"), 1, GL_FALSE, glm::value_ptr(model));
		glDrawArrays(GL_TRIANGLES, 0, 36);
		glBindVertexArray(0);
		glStencilMask(0xFF);
		glEnable(GL_DEPTH_TEST);

		// Swap the buffers
		glfwSwapBuffers(window);
	}

	glfwTerminate();
	return 0;
}
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createInnerVertex(
  peano::applications::poisson::multigrid::SpacetreeGridVertex&               fineGridVertex,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridX,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridH,
  peano::applications::poisson::multigrid::SpacetreeGridVertex const * const  coarseGridVertices,
  const peano::kernel::gridinterface::VertexEnumerator&                       coarseGridVerticesEnumerator,
  const peano::applications::poisson::multigrid::SpacetreeGridCell&           coarseGridCell,
  const tarch::la::Vector<DIMENSIONS,int>&                                    fineGridPositionOfVertex
) {
  logTraceInWith6Arguments( "createInnerVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex );
//  if (tarch::la::volume(fineGridH) > _refinementThreshold) {
//    fineGridVertex.refine();
//  }

  if (coarseGridVerticesEnumerator.getLevel() < 3) {
    fineGridVertex.refine();
  }

  peano::toolbox::stencil::Stencil stencil;

  #ifdef Dim2
  //if(fineGridVertex.getLevel() == 4){
  stencil =
//    kappa_x *
    peano::toolbox::stencil::StencilFactory::stencilProduct(
      peano::toolbox::stencil::StencilFactory::get1DLaplaceStencil(fineGridH(0)),
      peano::toolbox::stencil::StencilFactory::get1DMassStencil(fineGridH(1))
    ) +
//    kappa-y *
    peano::toolbox::stencil::StencilFactory::stencilProduct(
      peano::toolbox::stencil::StencilFactory::get1DMassStencil(fineGridH(0)),
      peano::toolbox::stencil::StencilFactory::get1DLaplaceStencil(fineGridH(1))
    );
  assertionNumericalEquals(stencil(0), -1.0/3.0);
  assertionNumericalEquals(stencil(1), -1.0/3.0);
  assertionNumericalEquals(stencil(2), -1.0/3.0);
  assertionNumericalEquals(stencil(3), -1.0/3.0);
  assertionNumericalEquals(stencil(4),  8.0/3.0);
  assertionNumericalEquals(stencil(5), -1.0/3.0);
  assertionNumericalEquals(stencil(6), -1.0/3.0);
  assertionNumericalEquals(stencil(7), -1.0/3.0);
  assertionNumericalEquals(stencil(8), -1.0/3.0);

  #if defined(Asserts)
  peano::toolbox::stencil::ElementMatrix elementMatrix;
  peano::toolbox::stencil::ElementWiseAssemblyMatrix testMatrix = elementMatrix.getElementWiseAssemblyMatrix( stencil );

  assertionNumericalEquals(testMatrix(0,0),  2.0/3.0);
  assertionNumericalEquals(testMatrix(0,1), -0.5/3.0);
  assertionNumericalEquals(testMatrix(0,2), -0.5/3.0);
  assertionNumericalEquals(testMatrix(0,3), -1.0/3.0);

  assertionNumericalEquals(testMatrix(1,0), -0.5/3.0);
  assertionNumericalEquals(testMatrix(1,1),  2.0/3.0);
  assertionNumericalEquals(testMatrix(1,2), -1.0/3.0);
  assertionNumericalEquals(testMatrix(1,3), -0.5/3.0);

  assertionNumericalEquals(testMatrix(2,0), -0.5/3.0);
  assertionNumericalEquals(testMatrix(2,1), -1.0/3.0);
  assertionNumericalEquals(testMatrix(2,2),  2.0/3.0);
  assertionNumericalEquals(testMatrix(2,3), -0.5/3.0);

  assertionNumericalEquals(testMatrix(3,0), -1.0/3.0);
  assertionNumericalEquals(testMatrix(3,1), -0.5/3.0);
  assertionNumericalEquals(testMatrix(3,2), -0.5/3.0);
  assertionNumericalEquals(testMatrix(3,3),  2.0/3.0);

  //logDebug( "createInnerVertex(...)", testMatrix );
  #endif
//  tarch::la::assignList(stencil) = -1.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0, 8.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0, -1.0/3.0;
  //}
  //else{
  //  tarch::la::assignList(stencil) = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0;
  //}

  fineGridVertex.setStencil(stencil);

//  double squaredDistanceFromCenter = 0.0;
//  for (int d=0; d<DIMENSIONS; d++) {
//    squaredDistanceFromCenter += (0.5 - fineGridX(d)) * (0.5 - fineGridX(d));
//  }
//  if (squaredDistanceFromCenter<0.24*0.24) {
//    stencil *= 4.2;
//  }

  peano::toolbox::stencil::ProlongationMatrix prolongation;
  tarch::la::assignList(prolongation) = 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0,
                                        2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0,
                                        3.0/9.0, 6.0/9.0, 9.0/9.0, 6.0/9.0, 3.0/9.0,
                                        2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0,
                                        1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0;
  fineGridVertex.setP(prolongation);

  peano::toolbox::stencil::RestrictionMatrix restriction;
  tarch::la::assignList(restriction) = 1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0,
                                       2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0,
                                       3.0/9.0, 6.0/9.0, 9.0/9.0, 6.0/9.0, 3.0/9.0,
                                       2.0/9.0, 4.0/9.0, 6.0/9.0, 4.0/9.0, 2.0/9.0,
                                       1.0/9.0, 2.0/9.0, 3.0/9.0, 2.0/9.0, 1.0/9.0;
  fineGridVertex.setR(restriction);

  fineGridVertex.setRhs(1.0, fineGridH);

  #else
  assertionMsg( false, "not implemented yet" );
  #endif

  logTraceOutWith1Argument( "createInnerVertex(...)", fineGridVertex );
}
Exemplo n.º 28
0
int main(int argc, char** argv)
{

    int numIterations = NB_ITER;

    if(argc > 0)
    {
        numIterations = atoi(argv[1]);
        ydim_gpu = atoi(argv[2]);
    }

    const unsigned int line_size = LINESIZE;
    const unsigned int mem_size= TOTALSIZE*sizeof(float);
    const unsigned int mem_size_gpu = SIZE_GPU * sizeof(float);

    float *h_idata = NULL;
    float *h_odata = NULL;
    struct double_matrice container;


    struct timeval tv1,tv2,tcpu1,tcpu2;

    // Allocation of input & output matrices
    //
    h_idata = malloc(mem_size);
    h_odata = malloc(mem_size);

    container.in  = h_idata + LINESIZE * (YDIM_GPU) + OFFSET;
    container.out = h_odata + LINESIZE * (YDIM_GPU) + OFFSET;
    container.ydim_cpu = YDIM_CPU;



    // Initialization of input & output matrices
    //
    srand(1234);
    for(unsigned int i = 0; i < TOTALSIZE; i++)
    {
        h_idata[i]=rand();
        h_odata[i]=0.0;
    }



    /* Version cpu pour comparaison */

    void * tmp_switch;


    float* reference = (float*) malloc(mem_size);
    float* reference_i = (float*) malloc(mem_size);
    for(unsigned int i = 0; i < TOTALSIZE; i++)
    {
        reference[i]   = 0.0;
        reference_i[i] = h_idata[i];
    }

    gettimeofday(&tcpu1,NULL);

    for(int i=0; i<numIterations; ++i)
    {
        stencil(reference + OFFSET, reference_i + OFFSET, YDIM);
        tmp_switch  = reference;
        reference   = reference_i;
        reference_i = tmp_switch;
    }

    if(numIterations%2)
    {
        tmp_switch  = reference;
        reference   = reference_i;
        reference_i = tmp_switch;
    }
    gettimeofday(&tcpu2,NULL);

    float timecpu=((float)TIME_DIFF(tcpu1,tcpu2)) / 1000;



    pthread_t thread;

    printf("nombre d'itérations: %d\n",numIterations);

    gettimeofday(&tv1, NULL);

    for(int i = 0; i<numIterations; i++) // Iterations are done inside the kernel
    {
        stencil_multi(container.out,container.in,container.ydim_cpu);

        tmp_switch = container.out;
        container.out = container.in;
        container.in = tmp_switch;

    }
    gettimeofday(&tv2, NULL);

    tmp_switch = d_odata;
    d_odata = d_idata;
    d_idata = tmp_switch;



    float time1=((float)TIME_DIFF(tv1,tv2)) / 1000;
    // Read back the results from the device to verify the output
    //

    printf("%f\t%f ms (%fGo/s)\t%f ms (%fGo/s)\n", timecpu/time1,
           time1, numIterations * 3*mem_size / time1 / 1000000,
           timecpu, numIterations * 3*mem_size / timecpu / 1000000);

    // Validate our results
    //
    unsigned int errors=0;

    float * h_fdata = h_odata;

    for(unsigned int i=0; i<TOTALSIZE; i++)
    {
        if((reference[i]-h_fdata[i])/reference[i] > 1e-6)
        {
            if(errors < 10) printf(" %u %f vs %f\n", i, h_fdata[i], reference[i]);
            errors++;
        }
    }
    if(errors)
        fprintf(stderr,"%d erreurs !\n", errors);
    else
        fprintf(stderr,"pas d'erreurs, cool !\n");
    free(reference_i);
    free(reference);

    // Shutdown and cleanup
    //
    free(h_odata);
    free(h_idata);

    return 0;
}
void LLOcclusionCullingGroup::doOcclusion(LLCamera* camera, const LLVector4a* shift)
{
	LLGLDisable stencil(GL_STENCIL_TEST);
	if (mSpatialPartition->isOcclusionEnabled() && LLPipeline::sUseOcclusion > 1)
	{
		//move mBounds to the agent space if necessary
		LLVector4a bounds[2];
		bounds[0] = mBounds[0];
		bounds[1] = mBounds[1];
		if(shift != NULL)
		{
			bounds[0].add(*shift);
		}

		// Don't cull hole/edge water, unless we have the GL_ARB_depth_clamp extension
		if (earlyFail(camera, bounds))
		{
			LLFastTimer t(FTM_OCCLUSION_EARLY_FAIL);
			setOcclusionState(LLOcclusionCullingGroup::DISCARD_QUERY);
			assert_states_valid(this);
			clearOcclusionState(LLOcclusionCullingGroup::OCCLUDED, LLOcclusionCullingGroup::STATE_MODE_DIFF);
			assert_states_valid(this);
		}
		else
		{
			if (!isOcclusionState(QUERY_PENDING) || isOcclusionState(DISCARD_QUERY))
			{
				{ //no query pending, or previous query to be discarded
					LLFastTimer t(FTM_RENDER_OCCLUSION);

					if (!mOcclusionQuery[LLViewerCamera::sCurCameraID])
					{
						LLFastTimer t(FTM_OCCLUSION_ALLOCATE);
						mOcclusionQuery[LLViewerCamera::sCurCameraID] = getNewOcclusionQueryObjectName();
					}

					// Depth clamp all water to avoid it being culled as a result of being
					// behind the far clip plane, and in the case of edge water to avoid
					// it being culled while still visible.
					bool const use_depth_clamp = gGLManager.mHasDepthClamp &&
												(mSpatialPartition->mDrawableType == LLDrawPool::POOL_WATER ||
												mSpatialPartition->mDrawableType == LLDrawPool::POOL_VOIDWATER);

					LLGLEnable clamp(use_depth_clamp ? GL_DEPTH_CLAMP : 0);	

#if !LL_DARWIN					
					U32 mode = gGLManager.mHasOcclusionQuery2 ? GL_ANY_SAMPLES_PASSED : GL_SAMPLES_PASSED_ARB;
#else
					U32 mode = GL_SAMPLES_PASSED_ARB;
#endif
					
#if LL_TRACK_PENDING_OCCLUSION_QUERIES
					sPendingQueries.insert(mOcclusionQuery[LLViewerCamera::sCurCameraID]);
#endif

					{
						LLFastTimer t(FTM_PUSH_OCCLUSION_VERTS);
						
						//store which frame this query was issued on
						mOcclusionIssued[LLViewerCamera::sCurCameraID] = gFrameCount;

						{
							LLFastTimer t(FTM_OCCLUSION_BEGIN_QUERY);
							glBeginQueryARB(mode, mOcclusionQuery[LLViewerCamera::sCurCameraID]);					
						}
					
						LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr;
						llassert(shader);

						shader->uniform3fv(LLShaderMgr::BOX_CENTER, 1, bounds[0].getF32ptr());
						//static LLVector4a fudge(SG_OCCLUSION_FUDGE);
						static LLCachedControl<F32> vel("SHOcclusionFudge",SG_OCCLUSION_FUDGE);
						LLVector4a fudge(SG_OCCLUSION_FUDGE);
						static LLVector4a fudged_bounds;
						fudged_bounds.setAdd(fudge, bounds[1]);
						shader->uniform3fv(LLShaderMgr::BOX_SIZE, 1, fudged_bounds.getF32ptr());

						if (!use_depth_clamp && mSpatialPartition->mDrawableType == LLDrawPool::POOL_VOIDWATER)
						{
							LLFastTimer t(FTM_OCCLUSION_DRAW_WATER);

							LLGLSquashToFarClip squash(glh_get_current_projection(), 1);
							if (camera->getOrigin().isExactlyZero())
							{ //origin is invalid, draw entire box
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, 0);
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, b111*8);				
							}
							else
							{
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, get_box_fan_indices(camera, bounds[0]));
							}
						}
						else
						{
							LLFastTimer t(FTM_OCCLUSION_DRAW);
							if (camera->getOrigin().isExactlyZero())
							{ //origin is invalid, draw entire box
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, 0);
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, b111*8);				
							}
							else
							{
								gPipeline.mCubeVB->drawRange(LLRender::TRIANGLE_FAN, 0, 7, 8, get_box_fan_indices(camera, bounds[0]));
							}
						}


						{
							LLFastTimer t(FTM_OCCLUSION_END_QUERY);
							glEndQueryARB(mode);
						}
					}
				}

				{
					LLFastTimer t(FTM_SET_OCCLUSION_STATE);
					setOcclusionState(LLOcclusionCullingGroup::QUERY_PENDING);
					clearOcclusionState(LLOcclusionCullingGroup::DISCARD_QUERY);
				}
			}
		}
	}
}
// Parallel Tiling
double test_1(){
    
    double start_time = omp_get_wtime();
    
    int write, read; // read and write buffers
	int t0, t1, x0, x1, dx0, dx1; // most values of the tile tuples
	int t, x; // indices into space (t,x)
	
	// for all t0 in t0..T by timeBand
	for( t0 = 1; t0 <= T; t0 += timeBand ) {
		// set and clamp t1 from t0
		t1 = min(t0 + timeBand - 1, T); 
		
		// Do A-tiles
		
		// set dx0 and dx1 to correct A-tile values
		dx0 = 1; 
		dx1 = -1;
		
		// iterate over all x0 points for A-tiles
		#pragma omp parallel for private(  x0, x1, write, read, t, x) schedule(dynamic, A_tiles_per_core)
		for( x0 = tiles_A_start; x0 <= upperBound; x0 += betweenTiles ){
			x1 = x0 + width_max - 1; // set x1 from x0
			
			// Set read and write buffer. 
			// this is equivilent to t0 % 2 but assumed faster
			read = (t0 - 1) & 1;
			write = 1 - read; 
			
			// if x0 is at or below lower bound (left edge tile)
			if( x0 <= lowerBound ) {
				//printf("%d, %d, %d, %d, %d, %d\n", lowerBound, 0, x1, dx1, t0, t1 );
				// for t in t0 ... t1
				
				for( t = t0; t<= t1; ++t ){
					//#pragma omp parallel for private( x ) schedule(static)
					// for x in lowerBound ... x1'ish
					int minVal = min(x1 + dx1 * (t - t0), upperBound );
					for( x = lowerBound; x <= minVal; ++x){
						stencil( read, write, x ); // stencil computation
					}// for x
					// flip write buffer
					read = write;
					write = 1 - write;
					
				}// for t
			}// if x0 <= lowerBound
			
			// if x1 is at or above upper bound (right edge tile)
			else if( x1 >= upperBound ){
				//printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, upperBound, 0, t0, t1 );
				// for t in t0...t1
				for( t = t0; t<= t1; ++t ){
					//#pragma omp parallel for private( x ) schedule(static)	
					// for x in x0'ish ... upperbound
					for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= upperBound; ++x){
						stencil( read, write, x ); // stencil computation
					}// for x
					// flip write buffer
					read = write;
					write = 1 - write;
				}// for t
			}// else if x1 >= upperBound
			
			// otherwise regular ol' tile
			else {
				//printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, x1, dx1, t0, t1 );
				// for t in t0 ... t1
				for( t = t0; t<= t1; ++t ){	
					//#pragma omp parallel for private( x ) schedule(static)	
					// for x in x0'ish ... x1'ish	
					int minVal = min(x1 + dx1 * (t - t0), upperBound );
					for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= minVal; ++x){
						stencil( read, write, x ); // stencil computation
					}// for x
					// flip write buffer
					read = write;
					write = 1 - write;
				}// for t
			}// else
		}// for A-tiles
		
		
		// Do B-tiles
		
		// set dx0 and dx1 to correct B-tile values
		dx0 = -1;
		dx1 = 1;
		
		// iterate over x0 points for B-tiles
		#pragma omp parallel for private(  x0, x1, write, read, t, x) schedule(dynamic,B_tiles_per_core)
		
		for( x0 = tiles_B_start; x0 <= upperBound; x0 += betweenTiles ){
			x1 = x0 + width_min - 1; // set x1 from x0
			
			// Set write buffer. 
			// this is equivilent to (t0 - 1 )% 2, but assumed faster
			read = (t0 - 1) & 1;
			write = 1 - read;
			
			// if x1 is at or above upper bound (right edge tile)
			if( x1 >= upperBound ){
				//printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, upperBound, 0, t0, t1 );
				// for t in t0 ... t1
				for( t = t0; t <= t1; ++t ){
					//#pragma omp parallel for private( x ) schedule(static)
					// for x in x0'ish ... upper bound
					for( x = max( x0 + dx0 * (t - t0), lowerBound); x <= upperBound; ++x){
						stencil( read, write, x ); // stencil computation
					}// for x
					// flip write buffer
					read = write;
					write = 1 - write;
				}// for t
			}// if x1 >= upperBound
			
			// regular ol' tile
			else {
				//printf("%d, %d, %d, %d, %d, %d\n", x0, dx0, x1, dx1, t0, t1 );
				// for t in t0 ... t1
				for( t = t0; t<= t1; ++t ){	
					//#pragma omp parallel for private( x ) schedule(static)	
					// for x in x0'ish ... x1'ish	
					int minVal = min(x1 + dx1 * (t - t0), upperBound);
					for( x = max(x0 + dx0 * (t - t0), lowerBound); x <= minVal; ++x){
						stencil( read, write, x ); // stencil computation
					}// for x
					// flip write buffer
					read = write;
					write = 1 - write;
					
				} // for t
			}// else
		} // for B-tiles
	}// for t0
    
    double end_time = omp_get_wtime();
    return (end_time - start_time);
}