Ejemplo n.º 1
0
int main()
{

#ifdef _OPENMP
   (void) omp_set_dynamic(FALSE);
   if (omp_get_dynamic()) {printf("Warning: dynamic adjustment of threads has been set\n");}
   (void) omp_set_num_threads(3);

   (void) omp_set_nested(TRUE);
   if (! omp_get_nested()) {printf("Warning: nested parallelism not set\n");}
#endif

   printf("Nested parallelism is %s\n", 
           omp_get_nested() ? "supported" : "not supported");

/*
  ------------------------------------------------------------------------
  Inside the parallel region we can no longer distinguish between the 
  threads
  ------------------------------------------------------------------------
*/
#pragma omp parallel
   {
     printf("Thread %d executes the outer parallel region\n",
            omp_get_thread_num());

     #pragma omp parallel num_threads(2)
     {
        printf("  Thread %d executes the inner parallel region\n",
               omp_get_thread_num());
     }  /*-- End of inner parallel region --*/
   }  /*-- End of outer parallel region --*/

   return(0);
}
Ejemplo n.º 2
0
int main(int argc, char* argv[])
{
    signal(SIGINT, sigint_handler);
#if !defined(NDEBUG)
    std::cout << "\t> Running in DEBUG mode" << std::endl;
#endif

#if defined(OPENMP_FOUND)
    omp_set_nested(true);
    std::cout << "\t> Running using OPENMP " << std::endl;
    std::cout << "\t\t> " << omp_get_max_threads() << " threads max" << std::endl;
    std::cout << "\t\t> " << omp_get_wtick()*1e9 << "ns tick" << std::endl;
    assert( omp_get_nested() );
#endif

//     test_random();
    Rng rng;
    rng.seed(rand());

    Options options = parse_options(argc, argv);

    typedef std::map<std::string, int> Wins;
    Wins wins;

    for (int kk=0; kk<options.number_of_games; kk++)
    {
        std::cout << std::endl << std::endl;
        std::cout << "****************************************" << std::endl;
        std::cout << "game " << kk << "/" << options.number_of_games << std::endl;

        const Game& game = play_game(options, rng);

        const int winner = game.state.get_winner();
        if (winner < 0) wins["draw"]++;
        else {
            std::string winner_name = "bot";
            if (game.hero_infos[winner].is_real_bot())
                winner_name = game.hero_infos[winner].name;
            wins[winner_name]++;
        }

        std::cout << std::endl;
        std::cout << "after " << options.number_of_games << " games" << std::endl;
        for (Wins::const_iterator wi=wins.begin(), wie=wins.end(); wi!=wie; wi++)
        {
            if (wi->first == "draw")
            {
                std::cout << "  " << wi->second << " draw" << std::endl;
                continue;
            }
            std::cout << "  " << wi->second << " victory for " << wi->first << std::endl;
        }

        if (sigint_already_caught) break;
    }

    return 0;
}
Ejemplo n.º 3
0
main ()
{
    thds = omp_get_max_threads ();
    if (thds == 1) {
        printf ("should be run this program on multi threads.\n");
        exit (0);
    }
    omp_set_dynamic (0);
    omp_set_num_threads (2);
    omp_set_nested (1);
    if (omp_get_nested () == 0) {
        printf ("test skipped.\n");
        exit(0);
    }

    sum = 0;
    #pragma omp parallel
    {
        #pragma omp parallel
        {
            int	add;

            if (omp_get_num_threads () == 1) {
                add = 2;
                printf ("nested parallel is serialized.\n");
            } else {
                add = 1;
            }

            #pragma omp critical
            {
                sum += add;
            }
        }
    }
    if (sum != 2*2) {
        errors += 1;
    }


    sum = 0;
    #pragma omp parallel
    func_nesting ();
    if (sum != 2*2) {
        errors += 1;
    }


    if (errors == 0) {
        printf ("nesting 002 : SUCCESS\n");
        return 0;
    } else {
        printf ("nesting 002 : FAILED\n");
        return 1;
    }
}
bool prepOpenMP()
{
    try
    {
        GDEBUG_STREAM("--> OpenMP info <--");
        GDEBUG_STREAM("--------------------------------------------------------");

        int numOpenMPProcs = omp_get_num_procs();
        GDEBUG_STREAM("GtPlusRecon, numOpenMPProcs : " << numOpenMPProcs);

        #ifndef WIN32
            int maxOpenMPLevels = omp_get_max_active_levels();
            GDEBUG_STREAM("GtPlusRecon, maxOpenMPLevels : " << maxOpenMPLevels);
        #endif // WIN32

        int maxOpenMPThreads = omp_get_max_threads();
        GDEBUG_STREAM("GtPlusRecon, maxOpenMPThreads : " << maxOpenMPThreads);

        if ( numOpenMPProcs != maxOpenMPThreads )
        {
            GDEBUG_STREAM("GtPlusRecon, numOpenMPProcs != maxOpenMPThreads , hyperthreading must be disabled ... ");
            omp_set_num_threads(numOpenMPProcs);
        }

        // omp_set_nested(1);
        int allowOpenMPNested = omp_get_nested();
        GDEBUG_STREAM("GtPlusRecon, allowOpenMPNested : " << allowOpenMPNested);

        #ifdef WIN32
            GDEBUG_STREAM("----------------------------------");
            GDEBUG_STREAM("GtPlus, set thread affinity ... ");

            /// lock the threads
            #pragma omp parallel default(shared)
            {
                int tid = omp_get_thread_num();
                DWORD_PTR mask = (1 << tid);
                GDEBUG_STREAM("thread id : " << tid << " - mask : " << mask);
                SetThreadAffinityMask( GetCurrentThread(), mask );
            }
        #endif // WIN32

        GDEBUG_STREAM("--------------------------------------------------------");
    }
    catch(...)
    {
        GERROR_STREAM("Errors in GtPlus prepOpenMP() ... ");
        return false;
    }

    return true;
}
Ejemplo n.º 5
0
Archivo: main.c Proyecto: dahlem/heat
void print_settings()
{
#ifdef HAVE_MPI
    if (mpiArgs.rank == 0) {
#endif /* HAVE_MPI */
        fprintf(stdout, "(1) Application settings\n");

#ifdef HAVE_LIBGSL
        fprintf(stdout, "GSL configured        : true\n");
#else
        fprintf(stdout, "GSL configured        : false\n");
#endif /* HAVE_LIBGSL */

#ifdef HAVE_OPENMP
        fprintf(stdout, "OpenMP                : true\n");
        fprintf(stdout, "Max number of Threads : %d\n", omp_get_max_threads());
        fprintf(stdout, "Support Nesting (0/1) : %d\n", omp_get_nested());
#else
        fprintf(stdout, "OpenMP                : false\n");
#endif /* HAVE_OPENMP */

#ifdef NDEBUG
        fprintf(stdout, "Debug                 : true\n\n");
#else
        fprintf(stdout, "Debug                 : false\n\n");
#endif /* NDEBUG */

        fprintf(stdout, "(2) Mesh settings\n");
        fprintf(stdout, "Space Dimension       : %d\n", globalArgs.s);
        fprintf(stdout, "Time Dimension        : %d\n", globalArgs.t);
        fprintf(stdout, "Delta                 : %1.8f\n", globalArgs.d);
        fprintf(stdout, "Input Range           : %2.2f <= x <= %2.2f; %2.2f <= y <= %2.2f\n\n",
                globalArgs.x0, globalArgs.x1, globalArgs.y0, globalArgs.y1);

        fprintf(stdout, "(3) Conjugate Gradient settings\n");
        fprintf(stdout, "Error Threshold       : %e\n\n", globalArgs.e);

#ifdef HAVE_MPI
        fprintf(stdout, "(4) MPI settings\n");
        fprintf(stdout, "Number Processors     : %d\n", mpiArgs.num_tasks);
    }
#endif /* HAVE_MPI */

    fprintf(stdout, "\n\n");
    fflush(stdout);
}
void OpenMP::partition_master( F const& f
                             , int num_partitions
                             , int partition_size
                             )
{
  if (omp_get_nested()) {
    using Exec = Impl::OpenMPExec;

    Exec * prev_instance = Impl::t_openmp_instance;

    Exec::validate_partition( prev_instance->m_pool_size, num_partitions, partition_size );

    OpenMP::memory_space space;

    #pragma omp parallel num_threads(num_partitions)
    {
      void * const ptr = space.allocate( sizeof(Exec) );

      Impl::t_openmp_instance = new (ptr) Exec( partition_size );

      size_t pool_reduce_bytes  =   32 * partition_size ;
      size_t team_reduce_bytes  =   32 * partition_size ;
      size_t team_shared_bytes  = 1024 * partition_size ;
      size_t thread_local_bytes = 1024 ;

      Impl::t_openmp_instance->resize_thread_data( pool_reduce_bytes
                                                 , team_reduce_bytes
                                                 , team_shared_bytes
                                                 , thread_local_bytes
                                                 );

      omp_set_num_threads(partition_size);
      f( omp_get_thread_num(), omp_get_num_threads() );

      Impl::t_openmp_instance->~Exec();
      space.deallocate( Impl::t_openmp_instance, sizeof(Exec) );
      Impl::t_openmp_instance = nullptr;
    }

    Impl::t_openmp_instance  = prev_instance;
  }
  else {
    // nested openmp not enabled
    f(0,1);
  }
}
Ejemplo n.º 7
0
int main (int argc, char *argv[]) 
{
  int nthreads, tid, procs, maxt, inpar, dynamic, nested;
  char name[50];
  
  /* Start parallel region */
#pragma omp parallel private(nthreads, tid)
  {
    
    /* Obtain thread number */
    tid = omp_get_thread_num();
    
    /* Only master thread does this
       We could also use #pragma omp master
     */
    if (tid == 0) 
      {
	printf("Thread %d getting environment info...\n", tid);
	
	/* Get host name */
	gethostname(name, 50);

	/* Get environment information */
	procs = omp_get_num_procs();
	nthreads = omp_get_num_threads();
	maxt = omp_get_max_threads();
	inpar = omp_in_parallel();
	dynamic = omp_get_dynamic();
	nested = omp_get_nested();
	
	/* Print environment information */
	printf("Hostname = %s\n", name);
	printf("Number of processors = %d\n", procs);
	printf("Number of threads = %d\n", nthreads);
	printf("Max threads = %d\n", maxt);
	printf("In parallel? = %d\n", inpar);
	printf("Dynamic threads enabled? = %d\n", dynamic);
	printf("Nested parallelism supported? = %d\n", nested);
	
      }
    
  }  /* Done */
  exit(0);
}
Ejemplo n.º 8
0
void GOMP_parallel_start(void (*fn)(void *),
                         void *data,
                         unsigned nthreads)
{
    debug_printf("GOMP_parallel_start(%p, %p, %u)\n", fn, data, nthreads);

    /* Identify the number of threads that can be spawned and start the processing */
    if (!omp_in_parallel()) {
        debug_printf("not in parallel\n");

        struct omp_icv_task *icv_task = bomp_icv_task_new();
        if (!icv_task) {
            debug_printf("no icv task\n");
            return;
        }

        icv_task->active_levels = 1;
        icv_task->nthreads = omp_get_max_threads();
        debug_printf("omp_get_max_threads = %u\n", icv_task->nthreads);

        if (nthreads == 0 || (icv_task->dynamic && icv_task->nthreads < nthreads)) {
            icv_task->nthreads = OMP_GET_ICV_GLOBAL(thread_limit);
            debug_printf("resetting to = %u\n", icv_task->nthreads);
        }

        bomp_icv_set_task(icv_task);
        debug_printf("icv task set %u\n", icv_task->nthreads);

        /* start processing */
        bomp_start_processing(fn, data, 0, icv_task->nthreads);
    } else {
        if (omp_get_nested()) {
            // handle nested paralellism
            assert(!"Handling nested paralellism\n");
        }

        /* we have already started enough threads */
        uint32_t active_levels = OMP_GET_ICV_TASK(active_levels);
        //debug_printf("setting active_levels to %u\n", active_levels+1);

        OMP_SET_ICV_TASK(active_levels, active_levels+1);
    }
}
Ejemplo n.º 9
0
Archivo: lib-1.c Proyecto: 0day-ci/gcc
int
main (void)
{
  double d, e;
  int l;
  omp_lock_t lck;
  omp_nest_lock_t nlck;

  d = omp_get_wtime ();

  omp_init_lock (&lck);
  omp_set_lock (&lck);
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  if (! omp_test_lock (&lck))
    abort ();
  if (omp_test_lock (&lck))
    abort ();
  omp_unset_lock (&lck);
  omp_destroy_lock (&lck);

  omp_init_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 1)
    abort ();
  omp_set_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 3)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  if (omp_test_nest_lock (&nlck) != 2)
    abort ();
  omp_unset_nest_lock (&nlck);
  omp_unset_nest_lock (&nlck);
  omp_destroy_nest_lock (&nlck);

  omp_set_dynamic (1);
  if (! omp_get_dynamic ())
    abort ();
  omp_set_dynamic (0);
  if (omp_get_dynamic ())
    abort ();

  omp_set_nested (1);
  if (! omp_get_nested ())
    abort ();
  omp_set_nested (0);
  if (omp_get_nested ())
    abort ();

  omp_set_num_threads (5);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 5)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  omp_set_num_threads (3);
  if (omp_get_num_threads () != 1)
    abort ();
  if (omp_get_max_threads () != 3)
    abort ();
  if (omp_get_thread_num () != 0)
    abort ();
  l = 0;
#pragma omp parallel reduction (|:l)
  {
    l = omp_get_num_threads () != 3;
    l |= omp_get_thread_num () < 0;
    l |= omp_get_thread_num () >= 3;
#pragma omp master
    l |= omp_get_thread_num () != 0;
  }
  if (l)
    abort ();

  if (omp_get_num_procs () <= 0)
    abort ();
  if (omp_in_parallel ())
    abort ();
#pragma omp parallel reduction (|:l)
  l = ! omp_in_parallel ();
#pragma omp parallel reduction (|:l) if (1)
  l = ! omp_in_parallel ();
  if (l)
    abort ();

  e = omp_get_wtime ();
  if (d > e)
    abort ();
  d = omp_get_wtick ();
  /* Negative precision is definitely wrong,
     bigger than 1s clock resolution is also strange.  */
  if (d <= 0 || d > 1)
    abort ();

  return 0;
}
Ejemplo n.º 10
0
//------------------------------------------------------------------------------------------------------------------------------
int main(int argc, char **argv){
  int my_rank=0;
  int num_tasks=1;
  int OMP_Threads = 1;
  int OMP_Nested = 0;

  #ifdef _OPENMP
  #pragma omp parallel 
  {
    #pragma omp master
    {
      OMP_Threads = omp_get_num_threads();
      OMP_Nested  = omp_get_nested();
    }
  }
  #endif
    

  #ifdef USE_MPI
  int    actual_threading_model = -1;
  int requested_threading_model = -1;
      requested_threading_model = MPI_THREAD_SINGLE;
    //requested_threading_model = MPI_THREAD_FUNNELED;
    //requested_threading_model = MPI_THREAD_SERIALIZED;
    //requested_threading_model = MPI_THREAD_MULTIPLE;
  //MPI_Init(&argc, &argv);
  #ifdef _OPENMP
      requested_threading_model = MPI_THREAD_FUNNELED;
    //requested_threading_model = MPI_THREAD_SERIALIZED;
    //requested_threading_model = MPI_THREAD_MULTIPLE;
  //MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model);
  #endif
  MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model);
  MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//if(actual_threading_model>requested_threading_model)actual_threading_model=requested_threading_model;
  if(my_rank==0){
       if(requested_threading_model == MPI_THREAD_MULTIPLE  )printf("Requested MPI_THREAD_MULTIPLE, ");
  else if(requested_threading_model == MPI_THREAD_SINGLE    )printf("Requested MPI_THREAD_SINGLE, ");
  else if(requested_threading_model == MPI_THREAD_FUNNELED  )printf("Requested MPI_THREAD_FUNNELED, ");
  else if(requested_threading_model == MPI_THREAD_SERIALIZED)printf("Requested MPI_THREAD_SERIALIZED, ");
  else if(requested_threading_model == MPI_THREAD_MULTIPLE  )printf("Requested MPI_THREAD_MULTIPLE, ");
  else                                                       printf("Requested Unknown MPI Threading Model (%d), ",requested_threading_model);
       if(actual_threading_model    == MPI_THREAD_MULTIPLE  )printf("got MPI_THREAD_MULTIPLE\n");
  else if(actual_threading_model    == MPI_THREAD_SINGLE    )printf("got MPI_THREAD_SINGLE\n");
  else if(actual_threading_model    == MPI_THREAD_FUNNELED  )printf("got MPI_THREAD_FUNNELED\n");
  else if(actual_threading_model    == MPI_THREAD_SERIALIZED)printf("got MPI_THREAD_SERIALIZED\n");
  else if(actual_threading_model    == MPI_THREAD_MULTIPLE  )printf("got MPI_THREAD_MULTIPLE\n");
  else                                                       printf("got Unknown MPI Threading Model (%d)\n",actual_threading_model);
  }
  #ifdef USE_HPM // IBM HPM counters for BGQ...
  HPM_Init();
  #endif
  #endif // USE_MPI


  int log2_box_dim = 6;
  int target_boxes_per_rank = 1;

  if(argc==3){
           log2_box_dim=atoi(argv[1]);
     target_boxes_per_rank=atoi(argv[2]);
  }else{
    if(my_rank==0){printf("usage: ./a.out  [log2_box_dim]  [target_boxes_per_rank]\n");}
    #ifdef USE_MPI
    MPI_Finalize();
    #endif
    exit(0);
  }

  if(log2_box_dim<4){
    if(my_rank==0){printf("log2_box_dim must be at least 4\n");}
    #ifdef USE_MPI
    MPI_Finalize();
    #endif
    exit(0);
  }

  if(target_boxes_per_rank<1){
    if(my_rank==0){printf("target_boxes_per_rank must be at least 1\n");}
    #ifdef USE_MPI
    MPI_Finalize();
    #endif
    exit(0);
  }

  if(my_rank==0){
    if(OMP_Nested)fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=TRUE)\n\n" ,num_tasks,OMP_Threads);
             else fprintf(stdout,"%d MPI Tasks of %d threads (OMP_NESTED=FALSE)\n\n",num_tasks,OMP_Threads);
  }
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
  // calculate the problem size...
  #ifndef MAX_COARSE_DIM
  #define MAX_COARSE_DIM 11
  #endif
  int64_t box_dim=1<<log2_box_dim;
  int64_t target_boxes = (int64_t)target_boxes_per_rank*(int64_t)num_tasks;
  int64_t boxes_in_i = -1;
  int64_t bi;
  for(bi=1;bi<1000;bi++){ // all possible problem sizes
    int64_t total_boxes = bi*bi*bi;
    if(total_boxes<=target_boxes){
      int64_t coarse_grid_dim = box_dim*bi;
      while( (coarse_grid_dim%2) == 0){coarse_grid_dim=coarse_grid_dim/2;}
      if(coarse_grid_dim<=MAX_COARSE_DIM){
        boxes_in_i = bi;
      }
    }
  }
  if(boxes_in_i<1){
    if(my_rank==0){printf("failed to find an acceptable problem size\n");}
    #ifdef USE_MPI
    MPI_Finalize();
    #endif
    exit(0);
  }
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  // create the fine level...
  #ifdef USE_PERIODIC_BC
  int bc = BC_PERIODIC;
  #else
  int bc = BC_DIRICHLET;
  #endif
  level_type fine_grid;
  int ghosts=stencil_get_radius();
  create_level(&fine_grid,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,bc,my_rank,num_tasks);
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  #ifdef USE_HELMHOLTZ
  double a=1.0;double b=1.0; // Helmholtz
  if(my_rank==0)fprintf(stdout,"  Creating Helmholtz (a=%f, b=%f) test problem\n",a,b);
  #else
  double a=0.0;double b=1.0; // Poisson
  if(my_rank==0)fprintf(stdout,"  Creating Poisson (a=%f, b=%f) test problem\n",a,b);
  #endif
  double h0=1.0/( (double)boxes_in_i*(double)box_dim );
  initialize_problem(&fine_grid,h0,a,b); // calculate VECTOR_ALPHA, VECTOR_BETA, and VECTOR_UTRUE
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if( ((a==0.0)||(fine_grid.alpha_is_zero==1) ) && (fine_grid.boundary_condition.type == BC_PERIODIC)){ 
    // Poisson w/ periodic BC's... 
    // nominally, u shifted by any constant is still a valid solution.  
    // However, by convention, we assume u sums to zero.
    double average_value_of_u = mean(&fine_grid,VECTOR_UTRUE);
    if(my_rank==0){fprintf(stdout,"  average value of u_true = %20.12e... shifting u_true to ensure it sums to zero...\n",average_value_of_u);}
    shift_vector(&fine_grid,VECTOR_UTRUE,VECTOR_UTRUE,-average_value_of_u);
  }
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  //apply_op(&fine_grid,VECTOR_F,VECTOR_UTRUE,a,b); // by construction, f = A(u_true)
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(fine_grid.boundary_condition.type == BC_PERIODIC){
    double average_value_of_f = mean(&fine_grid,VECTOR_F);
    if(average_value_of_f!=0.0){
      if(my_rank==0){fprintf(stderr,"  WARNING... Periodic boundary conditions, but f does not sum to zero... mean(f)=%e\n",average_value_of_f);}
      //shift_vector(&fine_grid,VECTOR_F,VECTOR_F,-average_value_of_f);
    }
  }
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  mg_type all_grids;
  int minCoarseDim = 1;
  rebuild_operator(&fine_grid,NULL,a,b); // i.e. calculate Dinv and lambda_max
  MGBuild(&all_grids,&fine_grid,a,b,minCoarseDim); // build the Multigrid Hierarchy 
  double dtol=  0.0;double rtol=1e-10; // converged if ||b-Ax|| / ||b|| < rtol
//double dtol=1e-15;double rtol=  0.0; // converged if ||D^{-1}(b-Ax)|| < dtol
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
     int     doTiming;
     int    minSolves = 10; // do at least minSolves MGSolves
  double timePerSolve = 0;
  for(doTiming=0;doTiming<=1;doTiming++){ // first pass warms up, second pass times

    #ifdef USE_HPM // IBM performance counters for BGQ...
    if(doTiming)HPM_Start("FMGSolve()");
    #endif

    #ifdef USE_MPI
    double minTime   = 30.0; // minimum time in seconds that the benchmark should run
    double startTime = MPI_Wtime();
    if(doTiming==1){
      if((minTime/timePerSolve)>minSolves)minSolves=(minTime/timePerSolve); // if one needs to do more than minSolves to run for minTime, change minSolves
    }
    #endif

    if(my_rank==0){
      if(doTiming==0){fprintf(stdout,"\n\n===== warming up by running %d solves ===============================\n",minSolves);}
                 else{fprintf(stdout,"\n\n===== running %d solves =============================================\n",minSolves);}
      fflush(stdout);
    }

    int numSolves =  0; // solves completed
    MGResetTimers(&all_grids);
    while( (numSolves<minSolves) ){
      zero_vector(all_grids.levels[0],VECTOR_U);
      #ifdef USE_FCYCLES
      FMGSolve(&all_grids,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
      #else
       MGSolve(&all_grids,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
      #endif
      numSolves++;
    }

    #ifdef USE_MPI
    if(doTiming==0){
      double endTime = MPI_Wtime();
      timePerSolve = (endTime-startTime)/numSolves;
      MPI_Bcast(&timePerSolve,1,MPI_DOUBLE,0,MPI_COMM_WORLD); // after warmup, process 0 broadcasts the average time per solve (consensus)
    }
    #endif

    #ifdef USE_HPM // IBM performance counters for BGQ...
    if(doTiming)HPM_Stop("FMGSolve()");
    #endif
  }
  MGPrintTiming(&all_grids); // don't include the error check in the timing results
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(my_rank==0){fprintf(stdout,"calculating error...  ");}
  double fine_error = error(&fine_grid,VECTOR_U,VECTOR_UTRUE);
  if(my_rank==0){fprintf(stdout,"h = %22.15e  ||error|| = %22.15e\n\n",h0,fine_error);fflush(stdout);}
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  // MGDestroy()
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  #ifdef USE_MPI
  #ifdef USE_HPM // IBM performance counters for BGQ...
  HPM_Print();
  #endif
  MPI_Finalize();
  #endif
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  return(0);
}
Ejemplo n.º 11
0
int main(int argc, char *argv[]) {
  omp_set_nested(1);
  omp_set_num_threads(2);
  printf("Master: Nthr %d   Thrid %d   Nested %d\n",omp_get_num_threads(),omp_get_thread_num(),omp_get_nested());
#pragma omp parallel
  {
  printf("Parallel 1: Nthr %d   Thrid %d   Nested %d\n",omp_get_num_threads(),omp_get_thread_num(),omp_get_nested());
  omp_set_num_threads(2);
#pragma omp parallel
  {
  printf("Parallel 2: Nthr %d   Thrid %d   Nested %d\n",omp_get_num_threads(),omp_get_thread_num(),omp_get_nested());
  }
  }
}
Ejemplo n.º 12
0
int32_t
omp_get_nested_ (void)
{
  return omp_get_nested ();
}
Ejemplo n.º 13
0
void masterFunc (int argc, char ** argv) {
	/****************************************************************
	* Step 1: Setup and Initialization
	* Load conf, init model, allocate mem, init params, init solver
	* Load cross-validation data
	****************************************************************/

	// Step 1.1: Load configuration
	if (argc < 2) {
		printf("argc %d\n", argc);
		exit(1);
	}
	string dirPath = argv[1];
	boost::property_tree::ptree *confReader = new boost::property_tree::ptree();
	boost::property_tree::ini_parser::read_ini(dirPath+"mpi.conf", *confReader);
	
	string section = "Master.";
	// int validBatchSize = confReader->get<int>(section + "validation_batch_size");
	int nSendMax = confReader->get<int>(section + "max_iteration_number");

	// Step 1.2 Initialize model	
	section = "LSTM.";

	openblas_set_num_threads(1);
	int max_openmp_threads = confReader->get<int>(section + "max_threads");
	omp_set_num_threads(max_openmp_threads);
	omp_set_nested(0);
    printf("MASTER openmp threads: max threads %d, nested %d\n", omp_get_max_threads(), omp_get_nested());
	
	RecurrentNN *rnn = new RNNLSTM(confReader, section);
	int paramSize = rnn->m_paramSize;
	printf("paramSize: %d\n", paramSize);

	// Step 1.3: Allocate master memory
	float *params = new float[paramSize];
	float *grad = new float[paramSize];

	// Step 1.4: Initialize params
	rnn->initParams(params);
	
	// Step 1.5: Initialize SGD Solver
	section = "SGD.";
	sgdBase *sgdSolver = initSgdSolver(confReader, section, paramSize);
	printf("MASTER: finish step 1\n");

	// Step 1.6: Load cross-validation data
	// section = "ValidationData.";
	// DataFactory *dataset = initDataFactory(confReader, section);
	// int numSample = dataset->getNumberOfData();
	// int dataSize  = dataset->getDataSize();
	// int labelSize = dataset->getLabelSize();

	// float *data  = new float[validBatchSize * dataSize];
	// float *label = new float[validBatchSize * labelSize];

	/****************************************************************
	* Step 2: Seed the slaves
	* (1) Broadcast paramSize to all slaves
	* (2) Send the same initial params with WORKTAG to all slaves
	****************************************************************/
	int nProc;
	MPI_Comm_size(MPI_COMM_WORLD, &nProc);
	int nSlave = nProc - 1;

	MPI_Bcast(&paramSize, 1, MPI_INT, ROOT, MPI_COMM_WORLD);
	
	int nSend = 0;
	int nRecv = 0;
	for (int rank = 1; rank < nProc; ++rank) {
		MPI_Send(params, paramSize, MPI_FLOAT, rank, WORKTAG, MPI_COMM_WORLD);
		nSend++;
	}
	printf("MASTER: finish step 2\n");

	/****************************************************************
	* Step 3: Paralleled training
	* Receive mini-batch grad from *ANY* slave
	* Update params based received grad
	* Re-send params to slave to process next mini-batch
	****************************************************************/
	
	MPI_Status status;
	// TEMP while loop condition
	while (nSend < nSendMax) {
		MPI_Recv(grad, paramSize, MPI_FLOAT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
		nRecv++;
		
		sgdSolver->updateParams(params, grad, status.MPI_SOURCE);
		
		// Send updated params to corresponding slave
		MPI_Send(params, paramSize, MPI_FLOAT, status.MPI_SOURCE, WORKTAG, MPI_COMM_WORLD);
		nSend++;
	}
	printf("MASTER: finish step 3\n");
	
	/****************************************************************
	* Step 4: Stop the slaves
	****************************************************************/
	
	// Step 4.1: Receive all dispatched but irreceived grad result
	while (nRecv < nSend) {
		MPI_Recv(grad, paramSize, MPI_FLOAT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);

		sgdSolver->updateParams(params, grad, status.MPI_SOURCE);

		nRecv++;
	}
	// Step 4.2: Send STOPTAG to all slaves
	for (int rank = 1; rank < nProc; ++rank) {
		MPI_Send(&rank, 1, MPI_INT, rank, STOPTAG, MPI_COMM_WORLD);
	}
	printf("MASTER: finish step 4\n");
	
	/****************************************************************
	* Step 5: Save trained parameters and clear things
	****************************************************************/
	section = "Master.";
	string saveFilename = confReader->get<string>(section + "save_filename");

	ofstream savefile (saveFilename.c_str(), ios::out|ios::binary);
	if (savefile.is_open()) {
		savefile.write ((char *)params, sizeof(float) * paramSize);
		savefile.close();
	} else {
		printf("Failed to open savefile\n");
		exit(1);
	}
	
	delete [] params;
	delete [] grad;

	delete confReader;
	delete sgdSolver;
	delete rnn;
}
Ejemplo n.º 14
0
int
test_openmp1(int argc, char *argv[])
#endif
{
  short OK;

  if (argc>1) Verbose = 1;

#ifdef _OPENMP
  omp_set_nested(-1);
  printf("%s%s%s\n", "Nested parallel blocks are ", omp_get_nested()?" ":"NOT ", "supported.");
#endif

  MainThread();

#ifdef SPAWN_THREADS
  {
    pthread_t a_thr;
    pthread_t b_thr;
    int status;

	memset(&a_thr, 0, sizeof(a_thr)); /* [i_a] fix valid MSVC complaint about unitialized a_thr / b_thr */
	memset(&b_thr, 0, sizeof(b_thr)); /* [i_a] fix valid MSVC complaint about unitialized a_thr / b_thr */

    printf("%s:%d - %s - a_thr:%p - b_thr:%p\n",
           __FILE__,__LINE__,__FUNCTION__,a_thr.p,b_thr.p);

    status = pthread_create(&a_thr, NULL, _thread, (void*) 1 );
    if ( status != 0 ) {
      printf("Failed to create thread 1\n");
      return (-1);
    }

    status = pthread_create(&b_thr, NULL, _thread, (void*) 2 );
    if ( status != 0 ) {
      printf("Failed to create thread 2\n");
      return (-1);
    }

    status = pthread_join(a_thr, NULL);
    if ( status != 0 ) {
      printf("Failed to join thread 1\n");
      return (-1);
    }
    printf("Joined thread1\n");

    status = pthread_join(b_thr, NULL);
    if ( status != 0 ) {
      printf("Failed to join thread 2\n");
      return (-1);
    }
    printf("Joined thread2\n");
  }
#endif // SPAWN_THREADS

  OK = 0;
  // Check that we have OpenMP before declaring things OK formally.
#ifdef _OPENMP
    OK = 1;
    {
      short i;
      for (i=0;i<3;i++) OK &= ThreadOK[i];
    }
    if (OK) printf("OMP : All looks good\n");
    else printf("OMP : Error\n");
#else
    OK = 1;
    printf("OpenMP seems not enabled ...\n");
#endif

  return OK?0:1;
}
Ejemplo n.º 15
0
  const double*, const double*, const int*, const double*, const int*,
  const double*, double*, const int*);


LIBXSTREAM_TARGET(mic) void process(LIBXSTREAM_INVAL(size_t) size, LIBXSTREAM_INVAL(size_t) nn, const size_t* idata,
  const double* adata, const double* bdata, double* cdata)
{
  if (0 < LIBXSTREAM_GETVAL(size)) {
    static const double alpha = 1, beta = 1;
    static const char trans = 'N';
    const int isize = static_cast<int>(size);
    const size_t base = idata[0];

#if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED)
    const int nthreads = omp_get_max_threads() / LIBXSTREAM_GETVAL(size);
    const int dynamic = omp_get_dynamic(), nested = omp_get_nested();
    omp_set_dynamic(0);
    omp_set_nested(1);
#   pragma omp parallel for schedule(dynamic,1) num_threads(LIBXSTREAM_GETVAL(size))
#endif
    for (int i = 0; i < isize; ++i) {
#if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED)
      omp_set_num_threads(nthreads);
#endif
      LIBXSTREAM_ASSERT(base <= idata[i]);
      const size_t i0 = idata[i], i1 = (i + 1) < isize ? idata[i+1] : (i0 + LIBXSTREAM_GETVAL(nn)), n2 = i1 - i0, offset = i0 - base;
      const int n = static_cast<int>(std::sqrt(static_cast<double>(n2)) + 0.5);
      DGEMM(&trans, &trans, &n, &n, &n, &alpha, adata + offset, &n, bdata + offset, &n, &beta, cdata + offset, &n);
    }

#if defined(_OPENMP) && defined(MULTI_DGEMM_USE_NESTED)
Ejemplo n.º 16
0
int main(int argc, char *argv[])
{
	Display *display;
	Window window;      //initialization for a window
	int screen;         //which screen
 
 	/* set window size */
	int width = atoi(argv[6]);
	int height = atoi(argv[7]);
  int xleft = atoi(argv[2]);
  int yleft = atoi(argv[4]);
  int xright = atoi(argv[3]);
  int yright = atoi(argv[5]); 
  int NUM_THREADS = atoi(argv[1]); 
  double xrange = xright - xleft;
  double yrange = yright - yleft;  
	/* set window position */
	int x = 0;
	int y = 0;
  int NUM_PROCS = omp_get_num_procs(); 
  struct timeval tv1, tv2;  
  double timeStart, timeEnd;
  gettimeofday(&tv1, NULL);
  timeStart = tv1.tv_sec * 1000000 + tv1.tv_usec;
  
	GC gc; 
  printf("X Window is %sd\n", argv[8]); 
  xflag = strcmp(argv[8], "enable");
  omp_set_num_threads(NUM_THREADS);
  omp_set_nested(1);
  printf("Total %d threads functioning among %d processors\n", NUM_THREADS, NUM_PROCS); 
  int nest = omp_get_nested();
  printf("omp_nested is set to %d\n", nest);
  
  if (xflag == 0){
	   /* open connection with the server */ 
	   display = XOpenDisplay(NULL);
	   if(display == NULL) {
		    fprintf(stderr, "cannot open display\n");
		    return -1;
	   }
     
     screen = DefaultScreen(display);

	   /* border width in pixels */
	   int border_width = 0;

     /* create window */
     window = XCreateSimpleWindow(display, RootWindow(display, screen), x, y, width, height, border_width,
     BlackPixel(display, screen), WhitePixel(display, screen));
	
     /* create graph */
     XGCValues values;
     long valuemask = 0;
	
     gc = XCreateGC(display, window, valuemask, &values);
	   //XSetBackground (display, gc, WhitePixel (display, screen));
	   XSetForeground (display, gc, BlackPixel (display, screen));
	   XSetBackground(display, gc, 0X0000FF00);
	   XSetLineAttributes (display, gc, 1, LineSolid, CapRound, JoinRound);
	
	   /* map(show) the window */
	   XMapWindow(display, window);
	   XSync(display, 0);
	}
        
  // Parameters
  Compl z, c;
  int repeats;
  double temp, lengthsq; 
  int i, j;
  int fakewidth; 
  int task;
  int localw = 0;
  int nlocal = 100;  
  int tid;
  int width1;
  int judge=0;
  int cnt;
  
  for(cnt=0; cnt<NUM_THREADS; cnt++){
      rowCnt[cnt] = 0;
      thgap[cnt] = 0;
  }  
                
  #pragma omp parallel num_threads(NUM_THREADS) private(tid, temp, lengthsq, z, c, repeats, i, j)
  { 
     tid = omp_get_thread_num();   
     printf("Thread %d!!\n", tid);
     #pragma omp for schedule(static, 1)      
	   for(i=0; i<width; i++) {
		   for(j=0; j<height; j++) {
         gettimeofday(&thtv1[tid], NULL);
         thtimeStart[tid] = thtv1[tid].tv_sec * 1000000 + thtv1[tid].tv_usec;          
			   z.real = 0.0;
		     z.imag = 0.0;
			   c.real = xleft + (double)i * (xrange/(double)width);
			   c.imag = yleft + (double)j * (yrange/(double)height);
			   repeats = 0;
			   lengthsq = 0.0;

			   while(repeats < 100000 && lengthsq < 4.0) {
				   temp = z.real*z.real - z.imag*z.imag + c.real;
				   z.imag = 2*z.real*z.imag + c.imag;
				   z.real = temp;
			 	   lengthsq = z.real*z.real + z.imag*z.imag; 
			 	   repeats++;
			   }
         #pragma omp critical
         {    
           rowData[i][j] = repeats;
           rowCnt[tid]++;
           gettimeofday(&thtv2[tid], NULL);
           thtimeEnd[tid] = thtv2[tid].tv_sec * 1000000 + thtv2[tid].tv_usec;
           thgap[tid] += (thtimeEnd[tid]-thtimeStart[tid]) / CLOCKS_PER_SEC;           
         }
		   }    
     }
     #pragma omp barrier     
  }
  // Draw the graph 
  if(xflag == 0){  
     for(i=0; i<width; i++) {
	      for(j=0; j<height; j++) {
           XSetForeground (display, gc,  1024 * 1024 * (rowData[i][j] % 256));		
	         XDrawPoint (display, window, gc, i, j);
        }        
     }
	   XFlush(display);
  }   
  
  gettimeofday(&tv2, NULL);
  timeEnd = tv2.tv_sec * 1000000 + tv2.tv_usec;
	double gap = (timeEnd-timeStart) / CLOCKS_PER_SEC;  
  printf("OOOOOOO Graph Drawing Done OOOOOO\n");
  printf("Threads : %d\n", NUM_THREADS);         
  printf("Running time : %lf\n", gap);
  printf("\n");
  for(cnt=0; cnt<NUM_THREADS; cnt++){
      printf("Thread %d computed %d points consuming %1f seconds\n", cnt, rowCnt[cnt], thgap[cnt]);
  }
  printf("\n");  
  FILE *outFile;
  outFile = fopen(argv[9], "a");
  fprintf(outFile, "Threads : %d \n", NUM_THREADS);      
  fprintf(outFile, "Running time : %lf\n\n", gap);
  fclose(outFile);          
	sleep(5);
	return 0;
}
Ejemplo n.º 17
0
int
main ()
{
  int	thds, *buf;

  int	errors = 0;


  thds = omp_get_max_threads ();
  if (thds == 1) {
    printf ("should be run this program on multi thread.\n");
    exit (0);
  }
  buf = (int *) malloc (sizeof(int) * (thds + 1));
  if (buf == NULL) {
    printf ("can not allocate memory.\n");
    exit (1);
  }

  omp_set_dynamic (0);
  omp_set_nested (1);
  if (omp_get_nested () == 0) {
    printf ("nested parallelism is not implement.\n");
    goto END;
  }


  omp_set_num_threads (1);

  #pragma omp parallel 
  {
    int	i, j;

    if (omp_get_num_threads () != 1) {
      #pragma omp critical
      errors += 1;
    }
    if (omp_get_thread_num () != 0) {
      errors += 1;
    }

    for (i=1; i<=thds; i++) {

      memset (buf, 0, sizeof(int) * (thds+1));
      omp_set_num_threads (i);

      #pragma omp parallel
      {
	int	id = omp_get_thread_num ();

	if (omp_get_num_threads () != i) {
	  #pragma omp critical
	  errors += 1;
	}
	buf[id] += 1;
      }

      for (j=0; j<i; j++) {
	if (buf[j] != 1) {
	  #pragma omp critical
	  errors += 1;
	}	
      }
      for (j=i; j<=thds; j++) {
	if (buf[j] != 0) {
	  #pragma omp critical
	  errors += 1;
	}	
      }
    }
  }


 END:
  if (errors == 0) {
    printf ("omp_set_nested 002 : SUCCESS\n");
    return 0;
  } else {
    printf ("omp_set_nested 002 : FAILED\n");
    return 1;
  }
}
Ejemplo n.º 18
0
 int main()
 {
omp_set_nested(1);
printf("is nested :%d \n",	omp_get_nested());


          /* screen ( integer) coordinate */
        int iX,iY;
	unsigned char tablica[800][800][3];
        const int iXmax = 800; 
        const int iYmax = 800;
        /* world ( double) coordinate = parameter plane*/
        double Cx,Cy;
        const double CxMin=-2.5;
        const double CxMax=1.5;
        const double CyMin=-2.0;
        const double CyMax=2.0;
        /* */
        double PixelWidth=(CxMax-CxMin)/iXmax;
        double PixelHeight=(CyMax-CyMin)/iYmax;
        /* color component ( R or G or B) is coded from 0 to 255 */
        /* it is 24 bit color RGB file */
        const int MaxColorComponentValue=255; 
        FILE * fp;
        FILE * blurp;
        char *filename="new1.ppm";
        char *blurname="blur.ppm";
        char *comment="# ";/* comment should start with # */
        static unsigned char color[3];
        /* Z=Zx+Zy*i  ;   Z0 = 0 */
        double Zx, Zy;
        double Zx2, Zy2; /* Zx2=Zx*Zx;  Zy2=Zy*Zy  */
        /*  */
        int Iteration;
        const int IterationMax=200;
        /* bail-out value , radius of circle ;  */
        const double EscapeRadius=2;
        double ER2=EscapeRadius*EscapeRadius;
        /*create new file,give it a name and open it in binary mode  */
        fp= fopen(filename,"wb"); /* b -  binary mode */
        blurp= fopen(blurname,"wb"); /* b -  binary mode */
        /*write ASCII header to the file*/
        fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
        fprintf(blurp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
        /* compute and write image data bytes to the file*/
	#pragma omp parallel for schedule(dynamic,10) firstprivate(Zx, Zy, Zx2, Zy2, Cx, Cy, Iteration, iX) shared(tablica, PixelHeight, PixelWidth, ER2) 
        for(iY=0;iY<iYmax;iY++)
        {
	
             Cy=CyMin + iY*PixelHeight;
             if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */
	
             for(iX=0;iX<iXmax;iX++)
             {         
                        Cx=CxMin + iX*PixelWidth;
                        /* initial value of orbit = critical point Z= 0 */
                        Zx=0.0;
                        Zy=0.0;
                        Zx2=Zx*Zx;
                        Zy2=Zy*Zy;
                        /* */
                        for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
                        {
                            Zy=2*Zx*Zy + Cy;
                            Zx=Zx2-Zy2 +Cx;
                            Zx2=Zx*Zx;
                            Zy2=Zy*Zy;
                        };
                        /* compute  pixel color (24 bit = 3 bytes) */
			
                        if (Iteration==IterationMax)
                        { /*  interior of Mandelbrot set = black */
                           tablica[iY][iX][0] = 0;// (120 * omp_get_thread_num()) % 255;
                           tablica[iY][iX][1] = 0;//(210 * omp_get_thread_num()) % 255;
                           tablica[iY][iX][2] = 0;//(100 * omp_get_thread_num()) % 255;
           		   
                        }
                     	else 
                        { /* exterior of Mandelbrot set = white */
				//printf("%d\n", omp_get_thread_num());
                             tablica[iY][iX][0]= 255;//(50 * omp_get_thread_num()) % 255; /* Red*/
                             tablica[iY][iX][1]= 255;// (80 * omp_get_thread_num()) % 255;  /* Green */ 
                             tablica[iY][iX][2]= 255;//(10 * omp_get_thread_num()) % 255;/* Blue */
                        };
                }
        }

unsigned char t2[800][800][3];
unsigned char* tmp[9];
int i, j;
//#pragma omp parallel sections
{
	//#pragma omp section
        {
            for (i = 0; i < 800; i++)
        	{
		        for (j = 0; j < 800; j++)
		        {
		           tmp[4]  = tablica[i][j];		           
		           
		           if(i-1<0 && j-1 < 0)
		           {
		            tmp[0] = NULL;
		           }
		           else
		           {
		            tmp[0] = tablica[i-1][j-1];
		           }
		           
		            if(i-1<0)
		           {
		            tmp[1] = NULL;
		           }
		           else
		           {
		            tmp[1] = tablica[i-1][j];
		           }

		           if(i-1<0 && j+1 > 800)
		           {
		            tmp[2] = NULL;
		           }
		           else
		           {
		            tmp[2] = tablica[i-1][j+1];
		           }

		           if(j-1 < 0)
		           {
		            tmp[3] = NULL;
		           }
		           else
		           {
		            tmp[3] = tablica[i][j-1];
		           }

		           if( j+1 >800)
		           {
		            tmp[5] = NULL;
		           }
		           else
		           {
		            tmp[5] = tablica[i][j+1];
		           }

		           if(i+1>800 && j-1 < 0)
		           {
		            tmp[6] = NULL;
		           }
		           else
		           {
		            tmp[6] = tablica[i+1][j-1];
		           }

		           if(i+1>800)
		           {
		            tmp[7] = NULL;
		           }
		           else
		           {
		            tmp[7] = tablica[i+1][j];
		           }

		           if(i+1>800 && j+1 >800)
		           {
		            tmp[8] = NULL;
		           }
		           else
		           {
		            tmp[8] = tablica[i+1][j+1];
		           }           
		           

		           int b;
		           int red = 0;
                    int blue = 0;
                    int green = 0;
		           for(b = 0 ; b < 9 ;b++)
		           {
		            if(b == 4) continue;
		            if(tmp[b]==NULL) continue; 
                    red += tmp[b][0];
                    blue += tmp[b][1];
                    green += tmp[b][2];
		           }
		           
		           t2[i][j][0] = red;
                    t2[i][j][1] = blue;
                    t2[i][j][2] = green;    
		           
	            }
            }
           
        }

   //     #pragma omp section
        {
        
        }
}


	/*write color to the file*/
	
	for (i = 0; i < 800; i++)
	{
		for (j = 0; j < 800; j++)
		{
        		fwrite(tablica[i][j],1,3,fp);
        		fwrite(t2[i][j],1,3,blurp);
		}
	}
        fclose(fp);
        fclose(blurp);        
        return 0;
 }
Ejemplo n.º 19
0
int
main ()
{
  int d_o = omp_get_dynamic ();
  int n_o = omp_get_nested ();
  omp_sched_t s_o;
  int c_o;
  omp_get_schedule (&s_o, &c_o);
  int m_o = omp_get_max_threads ();
  omp_set_dynamic (1);
  omp_set_nested (1);
  omp_set_schedule (omp_sched_static, 2);
  omp_set_num_threads (4);
  int d = omp_get_dynamic ();
  int n = omp_get_nested ();
  omp_sched_t s;
  int c;
  omp_get_schedule (&s, &c);
  int m = omp_get_max_threads ();
  if (!omp_is_initial_device ())
    abort ();
  #pragma omp target if (0)
  {
    omp_sched_t s_c;
    int c_c;
    omp_get_schedule (&s_c, &c_c);
    if (d_o != omp_get_dynamic ()
	|| n_o != omp_get_nested ()
	|| s_o != s_c
	|| c_o != c_c
	|| m_o != omp_get_max_threads ())
      abort ();
    omp_set_dynamic (0);
    omp_set_nested (0);
    omp_set_schedule (omp_sched_dynamic, 4);
    omp_set_num_threads (2);
    if (!omp_is_initial_device ())
      abort ();
  }
  if (!omp_is_initial_device ())
    abort ();
  omp_sched_t s_c;
  int c_c;
  omp_get_schedule (&s_c, &c_c);
  if (d != omp_get_dynamic ()
      || n != omp_get_nested ()
      || s != s_c
      || c != c_c
      || m != omp_get_max_threads ())
    abort ();
  #pragma omp target if (0)
  #pragma omp teams
  {
    omp_sched_t s_c;
    int c_c;
    omp_get_schedule (&s_c, &c_c);
    if (d_o != omp_get_dynamic ()
	|| n_o != omp_get_nested ()
	|| s_o != s_c
	|| c_o != c_c
	|| m_o != omp_get_max_threads ())
      abort ();
    omp_set_dynamic (0);
    omp_set_nested (0);
    omp_set_schedule (omp_sched_dynamic, 4);
    omp_set_num_threads (2);
    if (!omp_is_initial_device ())
      abort ();
  }
  if (!omp_is_initial_device ())
    abort ();
  omp_get_schedule (&s_c, &c_c);
  if (d != omp_get_dynamic ()
      || n != omp_get_nested ()
      || s != s_c
      || c != c_c
      || m != omp_get_max_threads ())
    abort ();
  return 0;
}