コード例 #1
0
ファイル: adt_calc_kernel.cpp プロジェクト: ioz9/OP2-Common
void op_par_loop_adt_calc(char const *name, op_set set,                 
  op_arg arg0,                                                          
  op_arg arg1,                                                          
  op_arg arg2,                                                          
  op_arg arg3,                                                          
  op_arg arg4,                                                          
  op_arg arg5 ){                                                        
                                                                        
                                                                        
  int    nargs   = 6;                                                   
  op_arg args[6] = {arg0,arg1,arg2,arg3,arg4,arg5};                     
                                                                        
  int    ninds   = 1;                                                   
  int    inds[6] = {0,0,0,0,-1,-1};                                     
                                                                        
  if (OP_diags>2) {                                                     
    printf(" kernel routine with indirection: adt_calc \n");            
  }                                                                     
                                                                        
  // get plan                                                           
                                                                        
  #ifdef OP_PART_SIZE_1                                                 
    int part_size = OP_PART_SIZE_1;                                     
  #else                                                                 
    int part_size = OP_part_size;                                       
  #endif                                                                
                                                                        
  op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
                                                                        
  // initialise timers                                                  
                                                                        
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                              
  op_timers_core(&cpu_t1, &wall_t1);                                         
                                                                        
  // set number of threads                                              
                                                                        
#ifdef _OPENMP                                                          
  int nthreads = omp_get_max_threads( );                                
#else                                                                   
  int nthreads = 1;                                                     
#endif                                                                  
                                                                        
  // execute plan                                                       
                                                                        
  int block_offset = 0;                                                 
                                                                        
  for (int col=0; col < Plan->ncolors; col++) {                         
    int nblocks = Plan->ncolblk[col];                                   
                                                                        
#pragma omp parallel for                                                
    for (int blockIdx=0; blockIdx<nblocks; blockIdx++)                  
     op_x86_adt_calc( blockIdx,                                         
       (float *)arg0.data, Plan->ind_maps[0],                           
       Plan->loc_maps[0],                                               
       Plan->loc_maps[1],                                               
       Plan->loc_maps[2],                                               
       Plan->loc_maps[3],                                               
       (float *)arg4.data,                                              
       (float *)arg5.data,                                              
       Plan->ind_sizes,                                                 
       Plan->ind_offs,                                                  
       block_offset,                                                    
       Plan->blkmap,                                                    
       Plan->offset,                                                    
       Plan->nelems,                                                    
       Plan->nthrcol,                                                   
       Plan->thrcol);                                                   
                                                                        
    block_offset += nblocks;                                            
  }                                                                     
                                                                        
  // combine reduction data                                             
                                                                        
  // update kernel record                                               
                                                                        
  op_timers_core(&cpu_t2, &wall_t2);                                         
  op_timing_realloc(1);                                                 
  OP_kernels[1].name      = name;                                       
  OP_kernels[1].count    += 1;                                          
  OP_kernels[1].time     += wall_t2 - wall_t1;                          
  OP_kernels[1].transfer  += Plan->transfer;                            
  OP_kernels[1].transfer2 += Plan->transfer2;                           
}                                                                       
コード例 #2
0
void op_par_loop_adt_calc(char const *name, op_set set,                 
  op_arg arg0,                                                          
  op_arg arg1,                                                          
  op_arg arg2,                                                          
  op_arg arg3,                                                          
  op_arg arg4,                                                          
  op_arg arg5 ){                                                        
                                                                       
  int nargs   = 6;                                                   
  op_arg args[6] = {arg0,arg1,arg2,arg3,arg4,arg5};                     
                                                                        
  int    ninds   = 1;                                                   
  int    inds[6] = {0,0,0,0,-1,-1};   
  
  int sent[6] = {0,0,0,0,0,0}; 
               
  if(ninds > 0) //indirect loop
  {
      for(int i = 0; i<nargs; i++)
      {
      	  if(args[i].argtype == OP_ARG_DAT)
      	  {
      	      if (OP_diags==1) reset_halo(args[i]);
      	      sent[0] = exchange_halo(args[i]); 
      	      if(sent[0] == 1)wait_all(args[i]);
      	  }
      }
  }
  
  if (OP_diags>2) {                                                     
    printf(" kernel routine with indirection: adt_calc \n");            
  }                                                                     
                                                                        
  // get plan                                                           
                                                                        
  #ifdef OP_PART_SIZE_1                                                 
    int part_size = OP_PART_SIZE_1;                                     
  #else                                                                 
    int part_size = OP_part_size;                                       
  #endif                                                                
                 
  
  op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
                                                                        
  // initialise timers                                                  
                                                                        
  double cpu_t1, cpu_t2, wall_t1, wall_t2;                              
  op_timers(&cpu_t1, &wall_t1);                                         
                                                                        
  // set number of threads                                              
                                                                        
#ifdef _OPENMP                                                          
  int nthreads = omp_get_max_threads( );                                
#else                                                                   
  int nthreads = 1;                                                     
#endif                                                                  
                                                                        
  // execute plan                                                       
                                                                        
  int block_offset = 0;                                                 
                                                                        
  for (int col=0; col < Plan->ncolors; col++) {                         
    int nblocks = Plan->ncolblk[col];                                   
                                                                        
#pragma omp parallel for                                                
    for (int blockIdx=0; blockIdx<nblocks; blockIdx++)                  
     op_x86_adt_calc( blockIdx,                                         
       (double *)arg0.data, Plan->ind_maps[0],                           
       Plan->loc_maps[0],                                               
       Plan->loc_maps[1],                                               
       Plan->loc_maps[2],                                               
       Plan->loc_maps[3],                                               
       (double *)arg4.data,                                              
       (double *)arg5.data,                                              
       Plan->ind_sizes,                                                 
       Plan->ind_offs,                                                  
       block_offset,                                                    
       Plan->blkmap,                                                    
       Plan->offset,                                                    
       Plan->nelems,                                                    
       Plan->nthrcol,                                                   
       Plan->thrcol);                                                   
                                                                        
    block_offset += nblocks;                                            
  }             
  
  
  //set dirty bit on direct/indirect datasets with access OP_INC,OP_WRITE, OP_RW
  for(int i = 0; i<nargs; i++)
      if(args[i].argtype == OP_ARG_DAT)
      	set_dirtybit(args[i]);
  
  //performe any global operations
  // - NONE
  
                                                                        
  // update kernel record                                               
                                                                        
  op_timers(&cpu_t2, &wall_t2);                                         
  op_timing_realloc(1);                                                 
  OP_kernels[1].name      = name;                                       
  OP_kernels[1].count    += 1;                                          
  OP_kernels[1].time     += wall_t2 - wall_t1;                          
  OP_kernels[1].transfer  += Plan->transfer;                            
  OP_kernels[1].transfer2 += Plan->transfer2;                           
}                                                                       
コード例 #3
0
ファイル: adt_calc_kernel.cpp プロジェクト: xyuan/OP2-Common
void op_par_loop_adt_calc(char const *name, op_set set,
  op_arg arg0,
  op_arg arg1,
  op_arg arg2,
  op_arg arg3,
  op_arg arg4,
  op_arg arg5 ){


  int    nargs   = 6;
  op_arg args[6];

  args[0] = arg0;
  args[1] = arg1;
  args[2] = arg2;
  args[3] = arg3;
  args[4] = arg4;
  args[5] = arg5;

  int    ninds   = 1;
  int    inds[6] = {0,0,0,0,-1,-1};

  if (OP_diags>2) {
    printf(" kernel routine with indirection: adt_calc\n");
  }

  // get plan

  #ifdef OP_PART_SIZE_1
    int part_size = OP_PART_SIZE_1;
  #else
    int part_size = OP_part_size;
  #endif

  int set_size = op_mpi_halo_exchanges(set, nargs, args);

  // initialise timers

  double cpu_t1, cpu_t2, wall_t1, wall_t2;
  op_timers_core(&cpu_t1, &wall_t1);

  if (set->size >0) {


    op_plan *Plan = op_plan_get(name,set,part_size,nargs,args,ninds,inds);
    // execute plan

    int block_offset = 0;

    for (int col=0; col < Plan->ncolors; col++) {
      if (col==Plan->ncolors_core) op_mpi_wait_all(nargs, args);

      int nblocks = Plan->ncolblk[col];

#pragma omp parallel for
      for (int blockIdx=0; blockIdx<nblocks; blockIdx++)
      op_x86_adt_calc( blockIdx,
         (float *)arg0.data,
         Plan->ind_map,
         Plan->loc_map,
         (float *)arg4.data,
         (float *)arg5.data,
         Plan->ind_sizes,
         Plan->ind_offs,
         block_offset,
         Plan->blkmap,
         Plan->offset,
         Plan->nelems,
         Plan->nthrcol,
         Plan->thrcol,
         set_size);

      block_offset += nblocks;
    }

  op_timing_realloc(1);
  OP_kernels[1].transfer  += Plan->transfer;
  OP_kernels[1].transfer2 += Plan->transfer2;

  }


  // combine reduction data

  op_mpi_set_dirtybit(nargs, args);

  // update kernel record

  op_timers_core(&cpu_t2, &wall_t2);
  op_timing_realloc(1);
  OP_kernels[1].name      = name;
  OP_kernels[1].count    += 1;
  OP_kernels[1].time     += wall_t2 - wall_t1;
}