示例#1
0
文件: papi.c 项目: jreybert/power
void check_events(int *events, int nb_events) {
  int i, retval;
  if ( nb_events > PAPI_num_counters() ) {
    fprintf(stderr, "Too many counters: %d\nMaximum available %d\n", nb_events, PAPI_num_counters());
  }
  for (i=0; i < nb_events; i++) {
    if ( (retval = PAPI_query_event(events[i])) != PAPI_OK) {
      PAPI_event_info_t info;
      PAPI_get_event_info(events[i], &info);
      fprintf(stderr,"No instruction counter for \"%s\" event\n%s\n", info.short_descr, PAPI_strerror(retval));
    }
  }
}
示例#2
0
void initBigSimTrace(int outputParams, int _outputTiming)
{
  CkpvInitialize(int, outputParameters);
  CkpvAccess(outputParameters) = outputParams;

  bgTraceCounter = 0;
#ifdef CMK_BIGSIM_CHARM
  if (!BgIsReplay()) outputTiming = 0;
  outputTiming = _outputTiming;
#endif
  CkpvInitialize(bool, insideTraceBracket);
  CkpvAccess(insideTraceBracket) = false;

  CkpvInitialize(double, start_time);
  CkpvInitialize(double, end_time);

  CkpvInitialize(FILE*, bgfp);
  CkpvAccess(bgfp) = NULL;
#ifdef CMK_BIGSIM_CHARM
  //   for bigsim emulation, write to files, one for each processor
  //   always write immediately, instead of store and dump at the end
  if (!outputTiming) {
  char fname[128];
  const char *subdir = "params";
  CmiMkdir(subdir);
  sprintf(fname, "%s/param.%d", subdir, CkMyPe());
  CkpvAccess(bgfp) = fopen(fname, "w");
  if (CkpvAccess(bgfp) == NULL) 
    CmiAbort("Failed to generated trace param file!");
  }
#endif
  //   for Mambo simulation, write to screen for now
//  CkpvAccess(bgfp) = stdout;
  if (CkpvAccess(outputParameters))  { 
    CkpvInitialize(StringPool, eventsPool);
    if (CkMyPe()==0) CmiPrintf("outputParameters enabled!\n");
#ifdef CMK_BIGSIM_CHARM
    BgRegisterUserTracingFunction(finalizeBigSimTrace);
#endif
  }


#ifdef BIG_SIM_PAPI
	CkPrintf("PAPI: number of available counters: %d\n", PAPI_num_counters());
	CkAssert(PAPI_num_counters() >= 0);
#endif

}
void start_papi() {
    values = calloc(en, sizeof(long long));

    if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
        fprintf(stderr, "PAPI is unsupported.\n");
        papi_supported = 0;
    }

    if (PAPI_num_counters() < en) {
        fprintf(stderr, "PAPI is unsupported.\n");
        papi_supported = 0;
    }
    
    if ((papi_err = PAPI_create_eventset(&eventSet)) != PAPI_OK) {
        fprintf(stderr, "Could not create event set: %s\n", PAPI_strerror(papi_err));
    }

    for (int i=0; i<en; ++i) {
        if ((papi_err = PAPI_add_event(eventSet, events[i])) != PAPI_OK ) {
            fprintf(stderr, "Could not add event: %s %s\n", event_names[i], PAPI_strerror(papi_err));
        }
    }

    /* start counters */

    if (papi_supported) {
        if ((papi_err = PAPI_start(eventSet)) != PAPI_OK) {
            fprintf(stderr, "Could not start counters: %s\n", PAPI_strerror(papi_err));
        }
    }
}
示例#4
0
void papi_init(int eventNumber){
	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
		fprintf(stderr, "PAPI is unsupported.\n");
		papi_supported = false;
	}

	if (PAPI_num_counters() < 5) {
		fprintf(stderr, "PAPI is unsupported.\n");
		papi_supported = false;
	}

	if ((papi_err = PAPI_create_eventset(&eventSet)) != PAPI_OK) {
		fprintf(stderr, "Could not create event set: %s\n", PAPI_strerror(papi_err));
	}

    /* force program to run on a single CPU */
    cpu_set_t my_set;        /* Define your cpu_set bit mask. */
    CPU_ZERO(&my_set);       /* Initialize it all to 0, i.e. no CPUs selected. */
	CPU_SET(0, &my_set);
	if (sched_setaffinity(0, sizeof(cpu_set_t), &my_set) != 0) {
		perror("sched_setaffinity error");
	}

	if ((papi_err = PAPI_add_event(eventSet, events[eventNumber])) != PAPI_OK ) {
		fprintf(stderr, "Could not add event: %s\n", PAPI_strerror(papi_err));
	}
}
示例#5
0
//
// This method should be placed at the start of instrumented code
//
void startPapiCounters(){
    initializeCounters(0);
#ifdef DBG
    printGEvents();
    printf("********* STARTING COUNTERS *************\n");
    //assert(NUM_EVENTS == _G_EVENT_COUNT);
#endif
    // initialize papi library and assert that it's successful
    _CALL_PAPI(PAPI_library_init( PAPI_VER_CURRENT ));    
    
    // check that all the events can be counted at once.
    int numCounters = PAPI_num_counters() ;
    assert( NUM_EVENTS <= numCounters );

    
#ifdef DBG
    printf("Number of hardware counters available on this machine: %d", numCounters);
#endif

    for ( int i = 0; i < NUM_EVENTS; i++ ) {
        char name[PAPI_MAX_STR_LEN];
        (void) _CALL_PAPI(PAPI_event_code_to_name( _G_EVENTS[i], name ));
        if(PAPI_query_event( _G_EVENTS[i] ) < PAPI_OK) {
            fprintf(stderr, "Event %s could not be counted on this machine.\n", name);
            abort();
        }
    }

    //*******  Start Counters ******
    (void) _CALL_PAPI(PAPI_start_counters(_G_EVENTS, NUM_EVENTS));
}
示例#6
0
文件: perf.c 项目: jiachengy/numa-db
void
perf_lib_init(const char *perfcfg, const char *perfout)
{
  int retval;

  retval = PAPI_library_init(PAPI_VER_CURRENT);
  assert(retval == PAPI_VER_CURRENT);

  retval = PAPI_thread_init(pthread_self);
  assert(retval == PAPI_OK);

  if (perfcfg)
    PERF_CONFIG = mystrdup(perfcfg);

  if (perfout)
    PERF_OUT = mystrdup(perfout);

  int max_counters = PAPI_num_counters();
  if (max_counters > MAX_PERF_EVENTS)
    max_counters = MAX_PERF_EVENTS;

  PERF_EVENT_NAMES = (char**)malloc(sizeof(char*) * max_counters);  
  assert(PERF_EVENT_NAMES != NULL);
  memset(PERF_EVENT_NAMES, 0x0, sizeof(char*) * max_counters);

  if (PERF_CONFIG) {
    char line[80];
    FILE *config = fopen(PERF_CONFIG, "r");
    assert(config != NULL);

    while (fgets(line, 80, config) != NULL && NUM_EVENTS < max_counters) {
      if (line[0]=='#')
        continue;
      PERF_EVENT_NAMES[NUM_EVENTS] = mystrdup(line);
      NUM_EVENTS++;
    }
    if (!feof(config))
      fprintf(stderr, "Too many counters added. Only take the first %d.\n", max_counters);

    fclose(config);
  }
  else { /* if no config file is specified, add default events only */
    NUM_EVENTS = sizeof(DEFAULT_EVENTS) / sizeof(char*);
    if (NUM_EVENTS > max_counters) {
      NUM_EVENTS = max_counters;
      fprintf(stderr, "Too many counters added. Only take the first %d.\n", max_counters);
    }
    
    for (int i = 0; i < NUM_EVENTS; i++)
      PERF_EVENT_NAMES[i] = mystrdup(DEFAULT_EVENTS[i]);
  }

  for (int i = 0; i != NUM_EVENTS; ++i)
    PERF_COUNTER_INITIALIZER.value[i] = 0;
}
示例#7
0
void
papi_init() 
{
  int max;

  /* Check PAPI sanity */
  if (PAPI_VER_CURRENT != PAPI_library_init(PAPI_VER_CURRENT))
    papi_eprintf("PAPI_library_init error.\n");

  max = PAPI_num_counters();
  PAPI_reset(max);
}
示例#8
0
文件: countloop.c 项目: di/school
int main(int argc, char *argv[]) {

     double a[MAXVSIZE], b[MAXVSIZE], c[MAXVSIZE];
     int i,n;
     long long before, after;


     if (PAPI_VER_CURRENT != 
		PAPI_library_init(PAPI_VER_CURRENT))
	ehandler("PAPI_library_init error.");

     const size_t EVENT_MAX = PAPI_num_counters();
        printf("# Max counters = %zd\n", EVENT_MAX);

     if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
	        ehandler("Cannot count PAPI_TOT_INS.");

     if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
	        ehandler("Cannot count PAPI_FP_OPS.");

     size_t EVENT_COUNT = 2;
     int events[] = { PAPI_TOT_INS, PAPI_FP_OPS };
     long long values[EVENT_COUNT];


     printf("Enter vector size:  ");
     scanf("%d",&n);

     for (i=0;i<n;i++) {
       a[i] = i;
       b[i] = n-i;
     }

     PAPI_start_counters(events, EVENT_COUNT);

     if(PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
               ehandler("Problem reading counters.");

     loop(c,a,b,n);

     if(PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
               ehandler("Problem reading counters.");

     printf("Number of instructions = %lld\n",values[0]);
     printf("Number of fp operations = %lld\n",values[1]);
     return 0;

}
int main () 
{
    float t0, t1;
    int iter, i, j;
    int events[2] = {PAPI_L1_DCM, PAPI_FP_OPS }, ret;
    long_long values[2];

    if (PAPI_num_counters() < 2) {
        fprintf(stderr, "No hardware counters here, or PAPI not supported.\n");
        exit(1);
    }
    for (i = 0; i < MX; i++) {
        if ((ad[i] = malloc(sizeof(double)*MX)) == NULL) {
            fprintf(stderr,"malloc failed\n");
            exit(1);
        }
    }
    for (j = 0; j < MX; j++) { 
        for (i = 0; i < MX; i++) {
            ad[i][j] = 1.0/3.0; /* Initialize the data */
        }
    }
    t0 = gettime();
    if ((ret = PAPI_start_counters(events, 2)) != PAPI_OK) {
        fprintf(stderr, "PAPI failed to start counters: %s\n", PAPI_strerror(ret));
        exit(1);
    }
    for (iter = 0; iter < NITER; iter++) {
        for (j = 0; j < MX; j++) {
            for (i = 0; i < MX; i++) {
                ad[i][j] += ad[i][j] * 3.0;
            }
        }
    }
    if ((ret = PAPI_read_counters(values, 2)) != PAPI_OK) {
        fprintf(stderr, "PAPI failed to read counters: %s\n", PAPI_strerror(ret));
        exit(1);
    }
    t1 = gettime();

    printf("Total software flops = %f\n",(float)TOT_FLOPS);
    printf("Total hardware flops = %lld\n",(float)values[1]);
    printf("MFlop/s = %f\n", (float)(TOT_FLOPS/MEGA)/(t1-t0));
    printf("L1 data cache misses is %lld\n", values[0]);
}
示例#10
0
void
papi_set_events(char *metric)
{
  const size_t n = 1;

  int max;
  long_long *papi_tmp;
  int papi_events[1];
  int code;

  max = PAPI_num_counters();

  if (n > max)
    papi_eprintf("Too many counters requested.\n");

  papi_tmp = malloc(sizeof(*papi_tmp) * n);

  PAPI_reset(max);

  PAPI_stop_counters(papi_tmp, n);

  if (PAPI_event_name_to_code(metric, &code) != PAPI_OK)
    papi_eprintf("Unknown PAPI event %s.\n", metric);

  if (code == 0)
    papi_eprintf("Unknown PAPI event %s.\n", metric);

  papi_events[0] = code;

  PAPI_start_counters(papi_events, n);

  if (PAPI_read_counters(papi_tmp, n) != PAPI_OK)
    papi_eprintf("Problem reading counters %s:%d.\n", __FILE__, __LINE__);

  free(papi_tmp);
}
示例#11
0
int main(int argc, char *argv[])
{
  int size, rank, world_rank, my_group;
  int num_lsms; // number of parallel LSMS instances
  int size_lsms; // number of atoms in a lsms instance
  int num_steps; // number of energy calculations
  int initial_steps; // number of steps before sampling starts
  int stepCount=0; // count the Monte Carlo steps executed
  double max_time; // maximum walltime for this run in seconds
  bool restrict_time = false;       // was the maximum time specified?
  bool restrict_steps = false; // or the max. numer of steps?
  int align; // alignment of lsms_instances
  
  double magnetization;
  double energy_accumulator; // accumulates the enegy to calculate the mean
  int energies_accumulated;


  int new_peid,new_root;
  static int op,flag;
  double *evec,*r_values;
  evec=(double *)shmalloc(sizeof(double)*3*size_lsms);
  r_values=(double *)shmalloc(sizeof(double)*(R_VALUE_OFFSET+3*(size_lsms+1)));




  energy_accumulator=0.0;
  energies_accumulated=0;

  double walltime_0,walltime;

  double restartWriteFrequency=30.0*60.0;
  double nextWriteTime=restartWriteFrequency;

  MPI_Comm local_comm;
  int *lsms_rank0;
  MPI_Status status;

  char prefix[40];
  char i_lsms_name[64];
  char gWL_in_name[64], gWL_out_name[64];
  char mode_name[64];
  char energy_calculation_name[64];
  char stupid[37];

  char step_out_name[64];
  char wl_step_out_name[128];
  char *wl_stepf=NULL;
  bool step_out_flag=false;
  std::ofstream step_out_file;
  typedef enum {Constant, Random, WangLandau_1d, ExhaustiveIsing, WangLandau_2d} EvecGenerationMode;
  typedef enum {MagneticMoment, MagneticMomentZ, MagneticMomentX, MagneticMomentY} SecondDimension;

  EvecGenerationMode evec_generation_mode = Constant;
  SecondDimension second_dimension = MagneticMoment;
  double ev0[3];

  bool return_moments_flag=true; // true-> return all magnetic moments from lsms run at each step.
  bool generator_needs_moment=false;

  typedef enum {OneStepEnergy, MultiStepEnergy, ScfEnergy} EnergyCalculationMode;
  EnergyCalculationMode energyCalculationMode = OneStepEnergy;
  int energyIndex=1; // index for the return value to use for the MC step (0: total energy, 1: band energy)

  ev0[0]=ev0[1]=0.0; ev0[2]=1.0;
  // size has to be align + size_lsms*num_lsms
  align=1;
  num_lsms=1;
  size_lsms=-1;
  my_group=-1;
  num_steps=1;
  initial_steps=0;

  sprintf(i_lsms_name,"i_lsms");
  gWL_in_name[0]=gWL_out_name[0]=0;
  mode_name[0]=0;
  energy_calculation_name[0]=0;

  // check command line arguments
  for(int i=0; i<argc; i++)
  {
    if(!strcmp("-num_lsms",argv[i])) num_lsms=atoi(argv[++i]);
    if(!strcmp("-size_lsms",argv[i])) size_lsms=atoi(argv[++i]);
    if(!strcmp("-align",argv[i])) align=atoi(argv[++i]);
    if(!strcmp("-num_steps",argv[i])) {num_steps=atoi(argv[++i]); restrict_steps=true;}
    if(!strcmp("-initial_steps",argv[i])) initial_steps=atoi(argv[++i]); 
    if(!strcmp("-walltime",argv[i])) {max_time=60.0*atof(argv[++i]); restrict_time=true;}
    if(!strcmp("-i",argv[i])) strncpy(i_lsms_name,argv[++i],64);
    if(!strcmp("-random_dir",argv[i])) {evec_generation_mode = Random;}
    if(!strcmp("-step_out",argv[i]))
    {strncpy(step_out_name,argv[++i],64); step_out_flag=true;
      return_moments_flag=true;}
    if(!strcmp("-wl_out", argv[i])) strncpy(gWL_out_name,argv[++i],64);
    if(!strcmp("-wl_in", argv[i])) strncpy(gWL_in_name,argv[++i],64);
    if(!strcmp("-mode", argv[i])) strncpy(mode_name,argv[++i],64);
    if(!strcmp("-energy_calculation",argv[i])) strncpy(energy_calculation_name,argv[++i],64);
  }

  if(!(restrict_steps || restrict_time)) restrict_steps=true;

  if(mode_name[0]!=0)
  {
    if(!strcmp("constant",mode_name)) evec_generation_mode = Constant;
    if(!strcmp("random",mode_name)) evec_generation_mode = Random;
    if(!strcmp("1d",mode_name)) evec_generation_mode = WangLandau_1d;
    if(!strcmp("ising",mode_name)) evec_generation_mode = ExhaustiveIsing;
    if(!strcmp("2d",mode_name)) evec_generation_mode = WangLandau_2d;
    if(!strcmp("2d-m",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMoment;}
    if(!strcmp("2d-x",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentX;}
    if(!strcmp("2d-y",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentY;}
    if(!strcmp("2d-z",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentZ;}
  }

  if(energy_calculation_name[0]!=0)
  {
    if(energy_calculation_name[0]=='o') { energyCalculationMode = OneStepEnergy; energyIndex=1; }
    if(energy_calculation_name[0]=='m') { energyCalculationMode = MultiStepEnergy; energyIndex=1; }
    if(energy_calculation_name[0]=='s') { energyCalculationMode = ScfEnergy; energyIndex=0; }
  }

#ifdef USE_PAPI
#define NUM_PAPI_EVENTS 4
  int hw_counters = PAPI_num_counters();
  if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS;
  int papi_events[NUM_PAPI_EVENTS]; // = {PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_FP_OPS,PAPI_VEC_INS};
  char *papi_event_name[] = {"PAPI_TOT_INS","PAPI_FP_OPS",
                             "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:OP_TYPE",
                             "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:OP_TYPE"};
  // "RETIRED_INSTRUCTIONS",
  // "RETIRED_MMX_AND_FP_INSTRUCTIONS:PACKED_SSE_AND_SSE2",
  // "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:1",
  // "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:1"
  // get events from names:
  for(int i=0; i<NUM_PAPI_EVENTS; i++)
  {
    if(PAPI_event_name_to_code(papi_event_name[i],&papi_events[i]) != PAPI_OK)
    {
      // printline("Error in obtaining PAPI event code for: "+ttos(papi_event_name[i]),
      //           std::cerr,parameters.myrankWorld);
      // printline("Skipping all following events",
      //           std::cerr,parameters.myrankWorld);
      if(hw_counters>i) hw_counters=i;
    }
  }
  long long papi_values[NUM_PAPI_EVENTS+4];
  // printline("PAPI: "+ttos(hw_counters)+" counters available",std::cout,parameters.myrankWorld);
  if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS;
  long long papi_real_cyc_0 = PAPI_get_real_cyc();
  long long papi_real_usec_0 = PAPI_get_real_usec();
  long long papi_virt_cyc_0 = PAPI_get_virt_cyc();
  long long papi_virt_usec_0 = PAPI_get_virt_usec();
  PAPI_start_counters(papi_events,hw_counters);
#endif


  lsms_rank0=(int *)malloc(sizeof(int)*(num_lsms+1));

  // initialize MPI:
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  world_rank=rank;
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  walltime_0 = get_rtc();

#ifndef SVN_REV
#define SVN_REV "unknown"
#endif

// make sure 'return_moments_flag' is set correctly
  switch(evec_generation_mode)
  {
  case Constant : break;
  case Random : break;
  case WangLandau_1d :
    return_moments_flag = true;
    generator_needs_moment = true;
    break;
  case ExhaustiveIsing : break;
  case WangLandau_2d :
    return_moments_flag = true;
    generator_needs_moment = true;
    break;
  default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1);
  }

  if(rank==0)
  {
    std::cout<<"LSMS_3"<<std::endl;
    std::cout<<" SVN revision "<<SVN_REV<<std::endl<<std::endl;
#ifdef USE_PAPI
    std::cout<<" Using Papi counters"<<std::endl<<std::endl; 
#endif
    std::cout<<" Size of LSMS instances = "<<size_lsms<<" atoms\n";
    std::cout<<" Number of LSMS instances = "<<num_lsms<<std::endl;
    std::cout<<" LSMS Energy calculated using ";
    switch(energyCalculationMode)
    {
    case OneStepEnergy: std::cout<<"oneStepEnergy [frozen potential band energy]"<<std::endl; break;
    case MultiStepEnergy: std::cout<<"multiStepEnergy [frozen potential band energy with converged Fermi energy]"<<std::endl; break;
    case ScfEnergy: std::cout<<"scfEnergy [self-consistent total energy]"<<std::endl; break;
    default: std::cout<<"UNKNOWN ENERGY CALCULATION METHOD"<<std::endl; exit(1);
    }
    if(restrict_steps) std::cout<<" Number of gWL steps = "<<num_steps<<std::endl;
    if(restrict_time) std::cout<<" Maximum walltime = "<<max_time<<"s\n";
    std::cout<<" Processor alignment (process allocation quantization) = "<<align<<std::endl;
    switch(evec_generation_mode)
    {
    case Constant : std::cout<<" Constant moments direction along "
                             <<ev0[0]<<" "<<ev0[1]<<" "<<ev0[2]<<std::endl;
      break;
    case Random : std::cout<<" Random distribution of moments (no Wang-Landau)"<<std::endl;
      break;
    case WangLandau_1d : std::cout<<" Wang-Landau for one continuous variable (energy)"<<std::endl;
//      return_moments_flag = true;
//      generator_needs_moment = true;
      break;
    case ExhaustiveIsing : std::cout<<" Exhaustive Ising sampling"<<std::endl; break;
    case WangLandau_2d : std::cout<<" Wang-Landau for two continuous variable (energy, ";
      switch(second_dimension)
      {
      case MagneticMoment  : std::cout<<"magnitude of magnetization)"; break;
      case MagneticMomentX : std::cout<<"x component of magnetization)"; break;
      case MagneticMomentY : std::cout<<"y component of magnetization)"; break;
      case MagneticMomentZ : std::cout<<"z component of magnetization)"; break;
      }
      std::cout<<std::endl;
//      return_moments_flag = true;
//      generator_needs_moment = true;
      break;
    default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1);
    }
    if(step_out_flag) std::cout<<" Step output written to: "<<step_out_name<<std::endl;
    std::cout<<std::endl;

    if(step_out_flag && (evec_generation_mode==WangLandau_1d))
    {
      // step_out_flag=false;
      snprintf(wl_step_out_name,127,"wl1d_%s",step_out_name);
      wl_stepf=wl_step_out_name;
    }

    if(step_out_flag)
    {
      step_out_file.open(step_out_name);
      step_out_file<<"#";
      for(int i=0; i<argc; i++) step_out_file<<" "<<argv[i];
      step_out_file<<std::endl<<size_lsms<<std::endl;
    }
  }

  if(generator_needs_moment) return_moments_flag=true;

  if(num_lsms==1)
  {
    SHMEM_activeset local_comm;
    local_comm.rank=shmem_my_pe();
    local_comm.size=shmem_n_pes();
    local_comm.start_pe=0;
    local_comm.logPE_stride=0;
    LSMS lsms_calc(local_comm,i_lsms_name,"1_");
      
    if(rank==0)
    {
      std::cout<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n";
      std::cout<<"  LSMS version = "<<lsms_calc.version()<<std::endl;
    }

    if(energyCalculationMode==OneStepEnergy)
      std::cout<<"one step Energy = "<<lsms_calc.oneStepEnergy()<<std::endl;
    else if(energyCalculationMode==MultiStepEnergy)
      std::cout<<"multi-step Energy = "<<lsms_calc.multiStepEnergy()<<std::endl;
    else if(energyCalculationMode==ScfEnergy)
      std::cout<<"self-consistent Energy = "<<lsms_calc.scfEnergy()<<std::endl;
    else
    {
      printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n");
     // MPI_Abort(MPI_COMM_WORLD,5);
      exit(5);
    }
  }
  else
  {
    // build the communicators
    //int color=MPI_UNDEFINED;
    //Assuming user passes a power of two while using "-align"
    int s = align;
    int comm_size=(size-align)/num_lsms;
    int world_rank;
    for(int i=0; i<num_lsms; i++)
    {
      if((world_rank>=s) && (world_rank<s+comm_size)) 
      { 
        my_group=i; 
        //color=i; 
        new_peid=world_rank-s;
        new_root=s;
      }
      lsms_rank0[i]=s;
      s+=comm_size;
    }
    if(world_rank==0){ 
      //color=num_lsms;
      new_peid=0;
      comm_size=1;
      new_root=0;
    }

    //MPI_Comm_split(MPI_COMM_WORLD, color, 0, &local_comm);
    SHMEM_activeset local_comm;
    local_comm.rank=new_peid;
    local_comm.size=comm_size;
    local_comm.start_pe=new_root;
    local_comm.logPE_stride=0;

    std::cout<<"world_rank="<<world_rank<<" -> group="<<my_group<<std::endl;

      
    snprintf(prefix,38,"Group %4d: ",my_group);

    // now we get ready to do some calculations...

    if(my_group>=0)
    {
      double energy;
      double band_energy;
      int static i_values[10];
      double static r_values[10];
      static int op;


      //MPI_Comm_rank(local_comm, &rank);
      rank = local_comm.rank;
      snprintf(prefix,38,"%d_",my_group);
      // to use the ramdisk on jaguarpf:
      // snprintf(prefix,38,"/tmp/ompi/%d_",my_group);
      LSMS lsms_calc(local_comm,i_lsms_name,prefix);
      snprintf(prefix,38,"Group %4d: ",my_group);

      if(rank==0 && my_group==0)
      {
        std::cout<<prefix<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n";
        std::cout<<prefix<<"  LSMS version = "<<lsms_calc.version()<<std::endl;
      }

      // wait for commands from master
      bool finished=false;
      while(!finished)
      {
        if(rank==0)
        {
          //MPI_Recv(evec,3*size_lsms,MPI_DOUBLE,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          //op =status.MPI_TAG;
          if (lsms_rank0[0]==world_rank)
                shmem_barrier(0, lsms_rank0[0], 2, pSync1);

        }
        //MPI_Bcast(&op,1,MPI_INT,0,local_comm);
        shmem_broadcast32(&op, &op, 1, local_comm.start_pe, local_comm.start_pe, local_comm.logPE_stride, local_comm.size, pSync2); 

/* recognized opcodes:
   5: calculate energy

   recognized energy calculation modes:
   OneStepEnergy : calclulate frozen potential band energy in one step (don't converge Ef)
   use only if the Fermi energy will not change due to MC steps!
   The only method available in LSMS_1.9
   MultiStepEnergy : calculate frozen potential band energy after converging Fermi energy
   This should be the new default method. If the Fermi energy doesn't change
   multiStepEnergy only performs one step and should be equivalent to oneStepEnergy
   The tolerance for Ef convergence can be set with LSMS::setEfTol(Real).
   The default tolerance is set in the LSMS::LSMS constructor (currently 1.0e-6).
   The maximum number of steps is read from the LSMS input file 'nscf' parameter.
   ScfEnergy : this will calculate the selfconsistent total energy.
   The maximum number of steps is read from the LSMS input file 'nscf' parameter.
   NOT IMPLEMENTED YET!!!

   10: get number of sites
*/

        if(op==5)
        {
          lsms_calc.setEvec(evec);
          if(energyCalculationMode==OneStepEnergy)
            energy=lsms_calc.oneStepEnergy(&band_energy);
          else if(energyCalculationMode==MultiStepEnergy)
            band_energy=energy=lsms_calc.multiStepEnergy();
          else if(energyCalculationMode==ScfEnergy)
            energy=lsms_calc.scfEnergy(&band_energy);
          else
          {
            printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n");
            //MPI_Abort(MPI_COMM_WORLD,5);
            exit(5);
          }
          r_values[0]=energy;
          r_values[1]=band_energy;
          if(return_moments_flag)
          {
            lsms_calc.getMag(&r_values[R_VALUE_OFFSET]);
          }
          if(rank==0)
          {
            if(return_moments_flag)
            {
              //MPI_Send(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,0,1005,MPI_COMM_WORLD);
              shmem_double_put(r_values, r_values, R_VALUE_OFFSET+3*size_lsms, 0);

            } else {
              //MPI_Send(r_values,R_VALUE_OFFSET,MPI_DOUBLE,0,1005,MPI_COMM_WORLD);
              shmem_double_put(r_values, r_values, R_VALUE_OFFSET, 0);
            }
            shmem_fence();
            shmem_int_swap(&flag, world_rank, 0);

          }
              
        } else if(op==10) {
          i_values[0]=lsms_calc.numSpins();
          //MPI_Send(i_values,10,MPI_INT,0,1010,MPI_COMM_WORLD);
          shmem_int_put(i_values, i_values, 10, 0);
        } else {
          // printf("world rank %d: recieved exit\n",world_rank); 
          finished=true;
        }
      }

      shfree(evec);
      //shfree(r_values);
    }
    else if(world_rank==0)
    {
      int running;
      double **evecs;
      //double *r_values;
      //int i_values[10];
      int *init_steps;
      int total_init_steps;
      bool accepted;
        
      char *wl_inf=NULL;
      char *wl_outf=NULL;
      if(gWL_in_name) wl_inf=gWL_in_name;
      if(gWL_out_name) wl_outf=gWL_out_name;
        
      EvecGenerator *generator;

/*
      // get number of spins from first LSMS instance
      // temp r_values:
      r_values=(double *)malloc(sizeof(double)*10);
      MPI_Send(r_values,1,MPI_DOUBLE, lsms_rank0[0], 10, MPI_COMM_WORLD);
      free(r_values);
      MPI_Recv(i_values,10,MPI_INT,lsms_rank0[0],1010,MPI_COMM_WORLD,&status);
      if(i_values[0]!=size_lsms)
      {
        printf("Size specified for Wang-Landau and in LSMS input file don't match!\n");
        size_lsms=i_values[0];
      }
*/

      evecs=(double **)shmalloc(sizeof(double *)*num_lsms);
      init_steps=(int *)shmalloc(sizeof(int)*num_lsms);
      for(int i=0; i<num_lsms; i++)
      {
        evecs[i]=(double *)shmalloc(sizeof(double)*3*size_lsms);
        init_steps[i]=initial_steps;
      }
      total_init_steps=num_lsms*initial_steps;
        

      // Initialize the correct evec generator
      switch(evec_generation_mode)
      {
      case Random :  generator = new RandomEvecGenerator(size_lsms);
        break;
      case Constant: generator = new ConstantEvecGenerator(size_lsms, ev0, num_lsms);
        break;
     //case WangLandau_1d : generator = new WL1dEvecGenerator<std::mt19937>(size_lsms, num_lsms,
     //                                                                      evecs, wl_inf, wl_outf, wl_stepf);
     case WangLandau_1d : generator = new WL1dEvecGenerator<boost::mt19937>(size_lsms, num_lsms,
                                                                           evecs, wl_inf, wl_outf, wl_stepf);
        break;
      case ExhaustiveIsing : generator = new ExhaustiveIsing1dEvecGenerator(size_lsms, num_lsms,
                                                                            evecs, wl_inf, wl_outf);
        break;
      //case WangLandau_2d : generator = new WL2dEvecGenerator<std::mt19937>(size_lsms, num_lsms,
      //                                                                     evecs, wl_inf, wl_outf, wl_stepf);
      case WangLandau_2d : generator = new WL2dEvecGenerator<boost::mt19937>(size_lsms, num_lsms,
                                                                           evecs, wl_inf, wl_outf, wl_stepf);
        break;
      default: std::cerr<<"The code should never arrive here: UNKNOWN EVEC GENERATION MODE\n";
        exit(1);
      }

      for(int i=0; i<num_lsms; i++)
      {
        generator->initializeEvec(i,evecs[i]);
      }
      std::cout<<"This is the master node\n";
      // issue initial commands to all LSMS instances
      running=0;
      bool more_work=true;
      if(total_init_steps>0)
      {
        for(int i=0; i<num_lsms; i++)
        {
          std::cout<<"starting initial calculation in group "<<i<<std::endl;
          //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD);
          shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]);
          shmem_int_p(&op, 5, lsms_rank0[i]);
          shmem_fence();


          num_steps--; running++; stepCount++;
          if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
        }
        shmem_barrier(0, lsms_rank0[0], 2, pSync1);
        // first deal with the initial steps:
        while(running>0)
        {
          //if(return_moments_flag)
          //  MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          //else
          //  MPI_Recv(r_values,R_VALUE_OFFSET,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          
          shmem_int_wait(&flag,-1);

          running--;
          // std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl;
          // std::cout<<"    band energy E_band="<<r_values[1]<<std::endl;
          if(total_init_steps>0)
          {
            //int r_group=(status.MPI_SOURCE-align)/comm_size;
            int r_group=(flag-align)/comm_size;
            std::cout<<"starting additional calculation in group "<<r_group<<std::endl;

            if(init_steps[r_group]>0)
            {
              more_work = !(generator->generateUnsampledEvec(r_group,evecs[r_group],r_values[energyIndex]));
              init_steps[r_group]--; total_init_steps--;
            }
                
            //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD);
            shmem_double_put(r_values, evecs[r_group],  3*size_lsms, lsms_rank0[r_group]); //TODO check this
            shmem_fence();
                
            num_steps--; running++; stepCount++;
            if(restrict_steps && num_steps<=0) more_work=false;
            if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
            walltime = get_rtc() - walltime_0;
            if(restrict_time && walltime>=max_time) more_work=false;
            if(restrict_time) std::cout<<"      "<<max_time-walltime<<" seconds remaining\n";
          }
              
        }
      }
      more_work=true;
      running=0;
      for(int i=0; i<num_lsms; i++)
      {
        std::cout<<"starting main calculation in group "<<i<<std::endl;
        //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD);
        shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]);
        shmem_int_p(&op, 5, lsms_rank0[i]);
        shmem_fence();
        num_steps--; running++; stepCount++;
        if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
      }
      shmem_barrier(0, lsms_rank0[0], 2, pSync1);
        
      generator->startSampling();
      // wait for results and issue new commands or wind down
      while(running>0)
      {
        //MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
        shmem_int_wait(&flag,-1);

        running--;
        std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl;
        std::cout<<"    band energy E_band="<<r_values[1]<<std::endl;
        // printf("from status.MPI_SOURCE=%d\n",status.MPI_SOURCE);
        energy_accumulator+=r_values[0]; energies_accumulated++;
        if(more_work)
        {
          int r_group=(status.MPI_SOURCE-align)/comm_size;
          std::cout<<"starting additional calculation in group "<<r_group<<std::endl;
              
          if(generator_needs_moment)
          {
            double m0,m1,m2;
            m0=0.0; m1=0.0; m2=0.0;
            for(int i=0; i<3*size_lsms; i+=3)
            {
              m0+=r_values[R_VALUE_OFFSET+i];
              m1+=r_values[R_VALUE_OFFSET+i+1];
              m2+=r_values[R_VALUE_OFFSET+i+2];
            }
            switch(second_dimension)
            {
            case  MagneticMoment : magnetization=std::sqrt(m0*m0+m1*m1+m2*m2); break;
            case  MagneticMomentX : magnetization=m0; break;
            case  MagneticMomentY : magnetization=m1; break;
            case  MagneticMomentZ : magnetization=m2; break;
            }
            if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex],magnetization, &accepted))
              more_work=false;
          } else {
            if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex], &accepted)) more_work=false;
          }

          //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD);
          shmem_double_put(r_values, evecs[r_group],  3*size_lsms, lsms_rank0[r_group]); //TODO check this
          shmem_fence();

          num_steps--; running++; stepCount++;
          if(restrict_steps && num_steps<=0) more_work=false;
          if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
          walltime = get_rtc() - walltime_0;
          if(restrict_time && walltime>=max_time) more_work=false;
          if(restrict_time) std::cout<<"      "<<max_time-walltime<<" seconds remaining\n";
        }
        else
        {
          // send an exit message to this instance of LSMS
          int r_group=(status.MPI_SOURCE-align)/comm_size;

          MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 2, MPI_COMM_WORLD);
        }

        if(step_out_flag && accepted)
        {
          step_out_file<<"# iteration "<<energies_accumulated<<std::endl;
          step_out_file.precision(15);
          step_out_file<<energies_accumulated<<std::endl;
          step_out_file<<r_values[0]<<"  "<<r_values[1]<<std::endl;
          for(int j=0; j<3*size_lsms; j+=3)
          {
            step_out_file<<r_values[j+R_VALUE_OFFSET]<<"  "<<r_values[j+R_VALUE_OFFSET+1]
                         <<"  "<<r_values[j+R_VALUE_OFFSET+2]<<std::endl;
          }
        }
        // write restart file every restartWriteFrequency seconds
        if(walltime>nextWriteTime)
        {
          generator->writeState("WLrestart.jsn");
          nextWriteTime+=restartWriteFrequency;
        }

      }
      generator->writeState("WLrestart.jsn");
/*
  if(evec_generation_mode==WangLandau_1d)
  (static_cast<WL1dEvecGenerator<std::mt19937> *>(generator))->writeState("WLrestart.state");
  if(evec_generation_mode==ExhaustiveIsing)
  (static_cast<ExhaustiveIsing1dEvecGenerator *>(generator))->writeState("WLrestart.state");
*/
      for(int i=0; i<num_lsms; i++) free(evecs[i]);
      shfree(evecs);
      //shfree(r_values);
    }
  }

  if(world_rank==0)
  {
    if(step_out_flag)
    {
      step_out_file<<"# end\n-1\n"
                   <<energy_accumulator/double(energies_accumulated)<<std::endl;
      step_out_file.close();
    }
    std::cout<<"Finished all scheduled calculations. Freeing resources.\n";
    std::cout<<"Energy mean = "<<energy_accumulator/double(energies_accumulated)<<"Ry\n";
  }


  if(num_lsms>1)
  {
    // make sure averyone arrives here:
    MPI_Bcast(stupid,37,MPI_CHAR,0,MPI_COMM_WORLD);

    if(world_rank==0)
    {
      MPI_Comm_free(&local_comm);
    }
    else if(my_group>=0)
    {
      MPI_Comm_free(&local_comm);
    }
  }



  if(world_rank==0)
  {
    double walltime = get_rtc() - walltime_0;
    std::cout<<" WL-LSMS finished in "<<walltime<<" seconds.\n";
    std::cout<<" Monte-Carlo steps / walltime = "
             <<double(stepCount)/walltime<<"/sec\n";
  }

#ifdef USE_PAPI
  PAPI_stop_counters(papi_values,hw_counters);
  papi_values[hw_counters  ] = PAPI_get_real_cyc()-papi_real_cyc_0;
  papi_values[hw_counters+1] = PAPI_get_real_usec()-papi_real_usec_0;
  papi_values[hw_counters+2] = PAPI_get_virt_cyc()-papi_virt_cyc_0;
  papi_values[hw_counters+3] = PAPI_get_virt_usec()-papi_virt_usec_0;
  long long accumulated_counters[NUM_PAPI_EVENTS+4];
/*
  for(int i=0; i<hw_counters; i++)
  {
  printline(ttos(papi_event_name[i])+" = "+ttos(papi_values[i]),
  std::cout,parameters.myrankWorld);
  }
  printline("PAPI real cycles : "+ttos(papi_values[hw_counters]),
  std::cout,parameters.myrankWorld);
  printline("PAPI real usecs : "+ttos(papi_values[hw_counters+1]),
  std::cout,parameters.myrankWorld);
  printline("PAPI user cycles : "+ttos(papi_values[hw_counters+2]),
  std::cout,parameters.myrankWorld);
  printline("PAPI user usecs : "+ttos(papi_values[hw_counters+3]),
  std::cout,parameters.myrankWorld);
*/
  
  //MPI_Reduce(papi_values,accumulated_counters,hw_counters+4,
  //           MPI_LONG,MPI_SUM,0,MPI_COMM_WORLD);

  shmem_long_sum_to_all(accumulated_counters, papi_values, hw_counters+4,
      comm.pestart, comm.logPE_stride, comm.size, pWrk_i, pSync2);



  if(world_rank==0)
  {
    for(int i=0; i<hw_counters; i++)
    {
      std::cout<<"Accumulated: "<<(papi_event_name[i])<<" = "<<(accumulated_counters[i])<<"\n";
    }
    std::cout<<"PAPI accumulated real cycles : "<<(accumulated_counters[hw_counters])<<"\n";
    std::cout<<"PAPI accumulated user cycles : "<<(accumulated_counters[hw_counters+2])<<"\n";
    double gflops_papi = ((double)accumulated_counters[1])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gflops_hw_double = ((double)accumulated_counters[2])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gflops_hw_single = ((double)accumulated_counters[3])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gips = ((double)accumulated_counters[0])/(1000.0*(double)papi_values[hw_counters+1]);
    std::cout<<"PAPI_FP_OPS real GFLOP/s : "<<(gflops_papi)<<"\n";
    std::cout<<"PAPI hw double real GFLOP/s : "<<(gflops_hw_double)<<"\n";
    std::cout<<"PAPI hw single real GFLOP/s : "<<(gflops_hw_single)<<"\n";
    std::cout<<"PAPI real GINST/s : "<<(gips)<<"\n";
  }
#endif


  //MPI_Finalize();
  return 0;
}
示例#12
0
文件: timeblockmm.c 项目: di/school
main(int argc, char *argv[])
{
float **a,**b,**c;
int n,n1,n2;
int i,j;
//double t0,t1;
struct timeval t0,t1;
long mtime, seconds, useconds;

// Using PAPI - from countloop.c
if (PAPI_VER_CURRENT !=
    PAPI_library_init(PAPI_VER_CURRENT))
    ehandler("PAPI_library_init error.");

const size_t EVENT_MAX = PAPI_num_counters();
// Suppressing output
//    printf("# Max counters = %zd\n", EVENT_MAX);

if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
    ehandler("Cannot count PAPI_TOT_INS.");

if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
    ehandler("Cannot count PAPI_FP_OPS.");

if (PAPI_OK != PAPI_query_event(PAPI_L1_DCM))
    ehandler("Cannot count PAPI_L1_DCM.");

size_t EVENT_COUNT = 3;
int events[] = { PAPI_TOT_INS, PAPI_FP_OPS, PAPI_L1_DCM };
long long values[EVENT_COUNT];

// Take size from args, not prompt
// printf("Enter n:  ");  scanf("%d",&n);  printf("n = %d\n",n);
n = atoi(argv[1]);

//printf("Enter n1:  ");  scanf("%d",&n1);  printf("n1 = %d\n",n1);
//printf("Enter n2:  ");  scanf("%d",&n2);  printf("n2 = %d\n",n2);

// To conform to the other matrix functions
n1 = floor(sqrt(n));
n2 = n1;
n = n1*n2;
//printf("n = %d X %d = %d\n",n1,n2,n);
a = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        a[i][j] = i+j;

b = matrix(1,n,1,n);
for (i=1;i<=n;i++) 
    for (j=1;j<=n;j++) 
        b[i][j] = i-j;

//#ifdef PRINT
//print_matrix(a,1,n,1,n);
//printf("\n"); */
//print_matrix(b,1,n,1,n);
//printf("\n"); */
//#endif

//t0 = get_seconds();
//c = matrix_prod(n,n,n,n,a,b);
//t1 = get_seconds();
//printf("Time for matrix_prod = %f sec\n",t1-t0);

//t0 = get_seconds();
gettimeofday(&t0, NULL);
// Start PAPI
PAPI_start_counters(events, EVENT_COUNT);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

c = block_prod(n1,n1,n1,n2,n2,n2,a,b);

if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
    ehandler("Problem reading counters.");

//t1 = get_seconds();
//printf("Time for block_prod = %f sec\n",t1-t0);
gettimeofday(&t1, NULL);
seconds = t1.tv_sec - t0.tv_sec;
useconds = t1.tv_usec - t0.tv_usec;
mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
//printf("Time for matrix_prod = %f sec\n",t1-t0);
printf("%d\t%lld\t%lld\t%lld\t%ld\n", n, values[0], values[1],
    values[2], mtime);
}
int main(int argc, char **argv) {

   int quiet;
   int events[1],i;
   long long counts[1];

   int retval,num_counters;

   char test_string[]="Testing PAPI_L1_DCA predefined event...";

//   quiet=test_quiet();

   retval = PAPI_library_init(PAPI_VER_CURRENT);
   if (retval != PAPI_VER_CURRENT) {
//      if (!quiet) 
	printf("Error PAPI_library_init: %d\n",retval);
//      test_fail(test_string);
   }

   retval = PAPI_query_event(PAPI_L1_DCA);
   if (retval != PAPI_OK) {
//      if (!quiet) 
	printf("PAPI_L1_DCA not available\n");
//      test_fail(test_string);
   }


   num_counters=PAPI_num_counters();

   events[0]=PAPI_L1_DCA;


   /*******************************************************************/
   /* Test if the C compiler uses a sane number of data cache acceess */
   /*******************************************************************/

#define ARRAYSIZE 1000

   double array[ARRAYSIZE];
   double aSumm = 0.0;

   if (!quiet) printf("Write test:\n");
   PAPI_start_counters(events,1);
   
   for(i=0; i<ARRAYSIZE; i++) { 
      array[i]=(double)i;
   }
     
   PAPI_stop_counters(counts,1);

//   if (!quiet) {
      printf("\tL1 D accesseses: %lld\n",counts[0]);
      printf("\tShould be roughly: %d\n",ARRAYSIZE);
//   }

   PAPI_start_counters(events,1);
   
   for(i=0; i<ARRAYSIZE; i++) { 
       aSumm += array[i]; 
   }
     
   PAPI_stop_counters(counts,1);

//   if (!quiet) {
      printf("Read test (%lf):\n",aSumm);
      printf("\tL1 D accesseses: %lld\n",counts[0]);
      printf("\tShould be roughly: %d\n",ARRAYSIZE);
//   }

   PAPI_shutdown();

//   test_pass(test_string);

   return 0;
}
示例#14
0
void ipm_hpm_start() {
    int i,j,k,rv;


#ifndef HPM_DISABLED

#ifdef HPM_PAPI
    char errstring[PAPI_MAX_STR_LEN];
    char event_name[PAPI_MAX_STR_LEN];
#endif

    if(task.hpm_eventset < 0) {
	return;
    }

    for(i=0;i<MAXSIZE_REGION;i++) {
	for(j=0;j<MAXSIZE_NEVENTSETS;j++) {
	    for(k=0;k<MAXSIZE_HPMCOUNTERS;k++) { 
		task.hpm_count[i][j][k] = 0;
	    }
	}
    }

#ifdef HPM_PAPI
/* Initialize the low level PAPI library */

    rv = PAPI_library_init(PAPI_VER_CURRENT);
    if(rv != PAPI_VER_CURRENT) {
	printf("IPM: %d papi_error library_init in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	perror("PAPI_library_init");
    } 
    if(rv == PAPI_OK) {
	if(task.flags & DEBUG && task.mpi_rank==0) {
	    printf("IPM: %d PAPI_library_init in hpm_start rv=%d \"%s\"\n",
		   task.mpi_rank,
		   rv,
		   PAPI_strerror(rv));
	}
    }

#ifdef CPU_PPC450D
/* then we are on a bluegene P*/
    if (node_rank != 0 ) return;
#endif

    rv = PAPI_num_counters();
    if(rv < 0) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error num_counters in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
    }

    if ((hwinfo = PAPI_get_hardware_info()) == NULL) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error PAPI_get_hardware_info in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
    } else {
	/* do something clever wrt. formal machine description */
    }

    rv = PAPI_create_eventset(&papi_eventset[0]);
    if(rv != PAPI_OK) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error create_eventset in hpmstart rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
    }

    if(0) {
	printf("PAPI: JIE_DEBUG:: rank %d task.hpm_eventset is %d.\n",
	       task.mpi_rank, task.hpm_eventset);
	fflush(stdout);
    }

    for(i=0;i<MAXSIZE_HPMCOUNTERS;i++) {
       
	if (papi_event[task.hpm_eventset][i] != -1) {
	    rv = PAPI_query_event(papi_event[task.hpm_eventset][i]);

	    if (rv != PAPI_OK) {
		PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
		PAPI_event_code_to_name(papi_event[task.hpm_eventset][i],event_name);
		printf("IPM: %d papi_error query_event %s %d \"%s\"\n",
		       task.mpi_rank, 
		       event_name,
		       rv,
		       PAPI_strerror(rv));
	    }
	   
	    rv = PAPI_add_event(papi_eventset[0], papi_event[task.hpm_eventset][i]);
	    if(rv != PAPI_OK) {
		PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
		PAPI_event_code_to_name(papi_event[task.hpm_eventset][i],event_name);
		printf("IPM: %d papi_error add_event %s %d \"%s\"\n",
		       task.mpi_rank, 
		       event_name,
		       rv,
		       PAPI_strerror(rv));
	    }

	    if(0) {
		PAPI_event_code_to_name(papi_event[task.hpm_eventset][i],event_name);
		printf("PAPI: JIE_DEBUG:: rank %d add event %s.\n",
		       task.mpi_rank, event_name);
		fflush(stdout);
	    }
	}
    }
    rv = PAPI_start(papi_eventset[0]);
    if (rv != PAPI_OK) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error: start in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
	task.flags &= ~IPM_HPM_ACTIVE;
    }

#endif

#ifdef HPM_PMAPI
#ifdef AIX51
    rv = pm_init(PM_VERIFIED|PM_UNVERIFIED|PM_CAVEAT|PM_GET_GROUPS, &pmapi_info, &pmapi_groups_info);
#else 
    rv = pm_initialize(PM_VERIFIED|PM_UNVERIFIED|PM_CAVEAT|PM_GET_GROUPS, &pmapi_info, &pmapi_groups_info,PM_CURRENT);
#endif
 
    if(rv) {
	printf("IPM: %d pmapi_error: pm_initialize \n",
	       task.mpi_rank);
	pm_error("IPM: pmapi : pm_initialize",rv);
    }

    for(i=0;i<pmapi_info.maxpmcs;i++)
	pmapi_prog.events[i]=COUNT_NOTHING;
    pmapi_prog.mode.w          = 0;
    pmapi_prog.mode.b.user     = 1;
    pmapi_prog.mode.b.proctree = 1;

#ifndef POWER3

#ifdef CPU_POWER4
    pmapi_prog.mode.b.is_group  = 1;
    if(task.hpm_eventset == 0) { pmapi_prog.events[0]       = 60; }
    if(task.hpm_eventset == 1) { pmapi_prog.events[0]       = 59; }
    if(task.hpm_eventset == 2) { pmapi_prog.events[0]       =  5; }
    if(task.hpm_eventset == 3) { pmapi_prog.events[0]       = 58; }
    if(task.hpm_eventset == 4) { pmapi_prog.events[0]       = 53; }
#endif

#ifdef CPU_POWER5
    pmapi_prog.mode.b.is_group  = 1;
    if(task.hpm_eventset == 0) { pmapi_prog.events[0]       = 137; }
#endif

#ifdef CPU_POWER6
    pmapi_prog.mode.b.is_group  = 1;
/* use all the pm_hpm* groups 186 - 195 */
    pmapi_prog.events[0] = 186 + task.hpm_eventset;
#endif

#else 

    for(i=0;i<MAXSIZE_HPMCOUNTERS;i++) {
	pmapi_prog.events[i] = pmapi_event[task.hpm_eventset][i];
    }
#endif

    rv = pm_set_program_mythread(&pmapi_prog);
    if(rv) {
	printf("IPM: %d pmapi_error: pm_set_program_mythread\n",
	       task.mpi_rank);
	pm_error("IPM: pmapi : pm_set_program_mythread",rv);
    }
    rv = pm_start_mythread();
    if(rv) {
	printf("IPM: %d pmapi_error: pm_start_mythread\n",
	       task.mpi_rank);
	pm_error("IPM: pmapi : pm_start_mythread",rv);
	task.flags &= ~IPM_HPM_ACTIVE;
    }
    rv = pm_reset_data_mythread();
    if(rv) {
	printf("IPM: %d pmapi_error: pm_reset_data_mythread\n",
	       task.mpi_rank);
    }
#endif


#endif
    return;
}
示例#15
0
文件: bench_rank.c 项目: jpflori/m4ri
int main(int argc, char **argv) {
  int opts = global_options(&argc, &argv);
  int data_len;

#ifdef HAVE_LIBPAPI
  int papi_counters = PAPI_num_counters();
  if (papi_counters < papi_array_len) {
    fprintf(stderr, "%s: Warning: there are only %d hardware counters available!\n", progname, papi_counters);
    papi_array_len = papi_counters;
  }
  if (papi_test(papi_events, papi_array_len))
    exit(1);

  for (int nv = 0; nv <= papi_array_len; ++nv)
    loop_calibration[nv] = 100000000;

  data_len = papi_array_len + 1;
#else
  data_len = 2;
#endif
  if (opts < 0 || argc < 2 || argc > 5) {
    print_help_and_exit();
  }

  struct elim_params params;
  params.m = atoi(argv[1]);
  if (argc >= 3)
    params.n = atoi(argv[2]);
  else
    params.n = params.m;

  if (argc >= 4)
    params.algorithm = argv[3];
  else
    params.algorithm = "ple";
  if (argc >= 5)
    params.r = atoi(argv[4]);
  else
    params.r = MIN(params.m, params.n);

  srandom(17);
  unsigned long long data[16];

  for (int i = 0; i < 4; ++i)
    run_nothing((void*)&params, data, &data_len);

  run_bench(run, (void*)&params, data, data_len);

  double cc_per_op = ((double)data[1])/ ( (double)params.m * (double)params.n * powl((double)params.r,0.807) );

  printf("m: %5d, n: %5d, last r: %5d, cpu cycles: %12llu, cc/(mnr^0.807): %.5lf, ", params.m, params.n, params.r, data[1], cc_per_op);
  print_wall_time(data[0] / 1000000.0);
  printf(", ");
  print_cpu_time(data[1] / (double)cpucycles_persecond());
  printf("\n");
#ifdef HAVE_LIBPAPI
  for (int n = 1; n < data_len; ++n) {
    double tmp = ((double)data[n]) / powl((double)params.n,2.807);
    printf("%20s (%20llu) per bit (divided by n^2.807): %15.5f\n", papi_event_name(papi_events[n - 1]), data[n], tmp);
  }
#endif
}
示例#16
0
文件: timemmm.c 项目: di/school
main(int argc, char *argv[])
{
    float **a,**b,**c;
    int n;
    int NB;
    int i,j;
    int x;
//double t0,t1;
    struct timeval t0,t1;
    long mtime, seconds, useconds;

// Using PAPI - from countloop.c
    if (PAPI_VER_CURRENT !=
            PAPI_library_init(PAPI_VER_CURRENT))
        ehandler("PAPI_library_init error.");

    const size_t EVENT_MAX = PAPI_num_counters();
// Suppressing output
//    printf("# Max counters = %zd\n", EVENT_MAX);

    if (PAPI_OK != PAPI_query_event(PAPI_TOT_INS))
        ehandler("Cannot count PAPI_TOT_INS.");

    if (PAPI_OK != PAPI_query_event(PAPI_FP_OPS))
        ehandler("Cannot count PAPI_FP_OPS.");

    if (PAPI_OK != PAPI_query_event(PAPI_L1_DCM))
        ehandler("Cannot count PAPI_L1_DCM.");

    size_t EVENT_COUNT = 3;
    int events[] = { PAPI_TOT_INS, PAPI_FP_OPS, PAPI_L1_DCM };
    long long values[EVENT_COUNT];

// Take size from args, not prompt
// printf("Enter n:  ");  scanf("%d",&n);  printf("n = %d\n",n);
    n = atoi(argv[1]);
    NB = atoi(argv[2]);

    a = matrix(1,n,1,n);
    for (i=1; i<=n; i++)
        for (j=1; j<=n; j++)
            a[i][j] = i+j;

    b = matrix(1,n,1,n);
    for (i=1; i<=n; i++)
        for (j=1; j<=n; j++)
            b[i][j] = i-j;


//t0 = get_seconds();
    gettimeofday(&t0, NULL);
// Start PAPI
    PAPI_start_counters(events, EVENT_COUNT);

    if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
        ehandler("Problem reading counters.");
//for (x=0;x<1000;x++){
    c = matrix_prod(n,n,n,n,a,b,NB);
//}
    if (PAPI_OK != PAPI_read_counters(values, EVENT_COUNT))
        ehandler("Problem reading counters.");

//t1 = get_seconds();
    gettimeofday(&t1, NULL);
    seconds = t1.tv_sec - t0.tv_sec;
    useconds = t1.tv_usec - t0.tv_usec;
    mtime = ((seconds) * 1000 + useconds/1000.0) + 0.5;
//printf("Time for matrix_prod = %f sec\n",t1-t0);
    printf("%d\t%lld\t%lld\t%lld\t%ld\n", n, values[0], values[1],
           values[2], mtime);
}
示例#17
0
void ipm_pthread_hpm_init() 
{
    int i,j,k,rv;
  
#ifdef HPM_PAPI
    char errstring[PAPI_MAX_STR_LEN];
    char event_name[PAPI_MAX_STR_LEN];
#endif
  
    //task.flags |= DEBUG; 

    if(task.hpm_eventset < 0) {
	return;
    }
  
    for(i=0;i<MAXSIZE_REGION;i++) {
	for(j=0;j<MAXSIZE_NEVENTSETS;j++) {
	    for(k=0;k<MAXSIZE_HPMCOUNTERS;k++) { 
		task.hpm_count[i][j][k] = 0;
	    }
	}
    }
  
#ifdef HPM_PAPI
    for( i=0; i<MAXSIZE_NTHREADS; i++ )
    {
	papi_eventset[i]=PAPI_NULL;
    }

  
    /* Initialize the low level PAPI library */
  
    rv = PAPI_library_init(PAPI_VER_CURRENT);
    if(rv != PAPI_VER_CURRENT) {
	printf("IPM: %d papi_error library_init in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	perror("PAPI_library_init");
    } 
  
  
    rv = PAPI_thread_init(pthread_self);
    if( rv != PAPI_OK )
	if(task.flags & DEBUG && task.mpi_rank==0) {
	    printf("IPM: %d PAPI_thread_init in hpm_start rv=%d \"%s\"\n",
		   task.mpi_rank,
		   rv,
		   PAPI_strerror(rv));
	    PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	    perror("PAPI_thread_init");
	}


    rv = PAPI_num_counters();

    if(rv < 0) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error num_counters in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
    }

    if ((hwinfo = PAPI_get_hardware_info()) == NULL) {
	PAPI_perror(rv, errstring, PAPI_MAX_STR_LEN);
	printf("IPM: %d papi_error PAPI_get_hardware_info in hpm_start rv=%d \"%s\"\n",
	       task.mpi_rank,
	       rv,
	       PAPI_strerror(rv));
    } else {
	/* do something clever wrt. formal machine description */
    }
  
    return;
}
示例#18
0
int main(int argc, char *argv[]) {


	float rtime1, rtime2, ptime1, ptime2, mflops;
	long long flpops;

	unsigned long int tid;
	int num_hwcntrs = 0;
	int fip = 0, retval;
	float real_time, proc_time;
	long long flpins;

	int i;
	unsigned int EventSet = PAPI_NULL; 
    int count = 0, err_count = 0;


    PAPI_event_info_t info;

    long long ( values2[2] )[2];
    long long min, max;
    int PAPI_event, mythreshold = THRESHOLD;
    char event_name1[PAPI_MAX_STR_LEN];
    const PAPI_hw_info_t *hw_info = NULL;
    int num_events, mask;
    int num_flops = NUM_FLOPS;
    long long elapsed_us, elapsed_cyc;



tests_quiet( argc, argv );  /* Set TESTS_QUIET variable */



    retval = PAPI_library_init( PAPI_VER_CURRENT );
    if ( retval != PAPI_VER_CURRENT )
      test_fail( __FILE__, __LINE__, "PAPI_library_init", retval );

  retval = PAPI_create_eventset( &EventSet );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_create_eventset", retval );

	/* Get hardware info */
  hw_info = PAPI_get_hardware_info(  );
  if ( hw_info == NULL )
      test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 );

  EventSet = 	add_two_nonderived_events( &num_events, &PAPI_event, &mask );

  printf("Using %#x for the overflow event\n",PAPI_event);

  if ( PAPI_event == PAPI_FP_INS ) {
      mythreshold = THRESHOLD;
  }
  else {
		#if defined(linux)
      mythreshold = ( int ) hw_info->cpu_max_mhz * 20000;
		#else
      mythreshold = THRESHOLD * 2;
		#endif
  }

  retval = PAPI_start( EventSet );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_start", retval );

  do_flops( NUM_FLOPS );

	/* stop the calibration run */
  retval = PAPI_stop( EventSet, values2[0] );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_stop", retval );


	/* set up overflow handler */
  retval = PAPI_overflow( EventSet, PAPI_event, mythreshold, 0, handler );
  if ( retval != PAPI_OK ) {
      test_fail( __FILE__, __LINE__, "PAPI_overflow", retval );
  }

	/* Start overflow run */
  retval = PAPI_start( EventSet );
  if ( retval != PAPI_OK ) {
      test_fail( __FILE__, __LINE__, "PAPI_start", retval );
  }

  do_flops( num_flops );

	/* stop overflow run */
  retval = PAPI_stop( EventSet, values2[1] );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_stop", retval );
  retval = PAPI_overflow( EventSet, PAPI_event, 0, 0, handler );
  if ( retval != PAPI_OK )
      test_fail( __FILE__, __LINE__, "PAPI_overflow", retval );

  if ( !TESTS_QUIET ) {
      if ( ( retval =
         PAPI_event_code_to_name( PAPI_event, event_name1 ) ) != PAPI_OK )
         test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval );

     printf( "Test case: Overflow dispatch of 2nd event in set with 2 events.\n" );
     printf( "---------------------------------------------------------------\n" );
     printf( "Threshold for overflow is: %d\n", mythreshold );
     printf( "Using %d iterations\n", num_flops );
     printf( "-----------------------------------------------\n" );

     printf( "Test type    : %16d%16d\n", 1, 2 );
     printf( OUT_FMT, event_name1, ( values2[0] )[1], ( values2[1] )[1] );
     printf( OUT_FMT, "PAPI_TOT_CYC", ( values2[0] )[0], ( values2[1] )[0] );
     printf( "Overflows    : %16s%16d\n", "", total );
     printf( "-----------------------------------------------\n" );
 }

 retval = PAPI_cleanup_eventset( EventSet );
 if ( retval != PAPI_OK )
  test_fail( __FILE__, __LINE__, "PAPI_cleanup_eventset", retval );

retval = PAPI_destroy_eventset( &EventSet );
if ( retval != PAPI_OK )
  test_fail( __FILE__, __LINE__, "PAPI_destroy_eventset", retval );

if ( !TESTS_QUIET ) {
  printf( "Verification:\n" );
#if defined(linux) || defined(__ia64__) || defined(_POWER4)
  num_flops *= 2;
#endif
  if ( PAPI_event == PAPI_FP_INS || PAPI_event == PAPI_FP_OPS ) {
     printf( "Row 1 approximately equals %d %d\n", num_flops, num_flops );
 }
 printf( "Column 1 approximately equals column 2\n" );
 printf( "Row 3 approximately equals %u +- %u %%\n",( unsigned ) ( ( values2[0] )[1] / ( long long ) mythreshold ),( unsigned ) ( OVR_TOLERANCE * 100.0 ) );
}

min =
( long long ) ( ( ( double ) values2[0][1] * ( 1.0 - OVR_TOLERANCE ) ) /
  ( double ) mythreshold );
max =
( long long ) ( ( ( double ) values2[0][1] * ( 1.0 + OVR_TOLERANCE ) ) /
  ( double ) mythreshold );
printf( "Overflows: total(%d) > max(%lld) || total(%d) < min(%lld) \n", total,
  max, total, min );
if ( total > max || total < min )
  test_fail( __FILE__, __LINE__, "Overflows", 1 );



printf("Initial thread id is: %lu\n",tid);

	/* Initialize the PAPI library and get the number of counters available */

if ((num_hwcntrs = PAPI_num_counters()) <= 0)  
  handle_error(1);



  /*  The installation supports PAPI, but has no counters */
if ((num_hwcntrs = PAPI_num_counters()) == 0 )
    fprintf(stderr,"Info:: This machine does not provide hardware counters.");

printf("This system has %d available counters.\n", num_hwcntrs);

if (num_hwcntrs > 2)
  num_hwcntrs = 2;

	 /* Start counting events */




if (PAPI_start_counters(Events, num_hwcntrs) != PAPI_OK)
  handle_error(1);

if (argc != 8) {
  printf("\nError :: Ejecutar como : a.out archivo_BD Num_elem archivo_queries Num_queries N_THREADS numero_K Dimension_objetos\n");
  return 0;
}
TOPK = atoi(argv[6]);
DIM = atoi(argv[7]);
double **DB;
	double **Consultas; //Cola de consultas
	int N_QUERIES, N_DB;
	char str_f[256];
	double dato[DIM];
	int j;
	FILE *f_dist, *fquery;
	Elem *heap, e_temp,*answer;
	int *acum, N_THREADS;


	//N_THREADS es el nro. de threads con el que se lanzará la región paralela
	N_THREADS = atoi(argv[5]);
	//N_QUERIES es el nro. de consultas
	N_QUERIES = atoi(argv[4]);
	N_DB = atoi(argv[2]);

	printf("\nN_QUERIES = %d\nN_THREADS = %d\n", N_QUERIES, N_THREADS);
	fflush(stdout);

	acum = (int *) malloc(sizeof (int)*N_THREADS);
	for (i = 0; i < N_THREADS; i++)
		acum[i] = 0;

	sprintf(str_f, "%s", argv[1]);
	printf("\nAbriendo %s... ", argv[1]);
	fflush(stdout);
	f_dist = fopen(str_f, "r");
	printf("OK\n");
	fflush(stdout);


	Consultas = (double **) malloc(sizeof (double *)*N_QUERIES);
	for (i = 0; i < N_QUERIES; i++)
		Consultas[i] = (double *) malloc(sizeof (double)*DIM);

	DB = (double **) malloc(sizeof (double *)*N_DB);
	for (i = 0; i < N_DB; i++)
		DB[i] = (double *) malloc(sizeof (double)*DIM);

	answer = (Elem *)malloc(sizeof(Elem)*N_QUERIES*TOPK);

	printf("\nCargando DB... ");
	fflush(stdout);
	for (i = 0; i < N_DB; i++) {
		//Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las ","
		//if (leedato_cophir(dato, f_dist) == ERROR || feof(f_dist))
		if (leedato(dato, f_dist) == ERROR || feof(f_dist)) {
			printf("\n\nERROR :: N_DB mal establecido\n\n");
			fflush(stdout);
			fclose(f_dist);
			break;
		}
		copiavalor(DB[i], dato);
	}
	fclose(f_dist);
	printf("OK\n");
	fflush(stdout);

	if ((fquery = fopen(argv[3], "r")) == NULL)
		printf("Error al abrir para lectura el archivo de qeuries: %s\n", argv[3]);
	else
		printf("Abriendo  para lectura %s\n", argv[3]);
	printf("\nCargando Consultas... ");
	fflush(stdout);
	for (i = 0; i < N_QUERIES; i++) {
		//Usar leedato_cophir() cuando se utilice la BD Cophir para no tener problemas con las ","
		//if (leedato_cophir(dato, fquery) == ERROR || feof(fquery))
		if (leedato(dato, fquery) == ERROR || feof(fquery)) {
			printf("\n\nERROR :: N_QUERIES mal establecido, Menos queries que las indicadas\n\n");
			fflush(stdout);
			fclose(fquery);
			break;
		}
		copiavalor(Consultas[i], dato);
	}
	fclose(fquery);
	printf("OK\n");
	fflush(stdout);

	PAPI_start_counters((int*) Events, NUM_EVENTS);
	omp_set_num_threads(N_THREADS);

	elapsed_us = PAPI_get_real_usec(  );

	elapsed_cyc = PAPI_get_real_cyc(  );

	retval =
	PAPI_thread_init( ( unsigned
		long ( * )( void ) ) ( omp_get_thread_num ) );
	if ( retval != PAPI_OK ) {
		if ( retval == PAPI_ECMP )
			test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval );
		else
			test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval );
	}

#pragma omp parallel shared(Consultas, DB, N_QUERIES, N_DB, N_THREADS, acum, DIM)
	{
		float real_time;
		struct timeval t1, t2;
		int i, j;
		Elem *heap, e_temp;
		double d;
		int n_elem = 0;
		int trid = omp_get_thread_num(); //ID del thread
		int procs = omp_get_num_threads(); //Nro. total de threads
		double suma = 0;

		suma = 0;
		heap = (Elem *) malloc(sizeof (Elem) * TOPK);

#pragma omp barrier

#pragma omp master
		{
			gettimeofday(&t1, 0);
		}

		//Cada hilo accede a un subconjunto de las consultas. Cada hio accede de manera circular al arreglo de consultas.
        for (i = trid; i < N_QUERIES; i += procs) {
         n_elem = 0;
         for (j = 0; j < N_DB; j++) {

            d = distancia(Consultas[i], DB[j]);
				//Si la distancia del objeto a la consulta es menor que la raíz del heap, entonces se inserta en el heap. La raíz siempre mantiene la mayor de las distancias

            if(n_elem<TOPK){
               e_temp.dist = d;
               e_temp.ind = j;
               inserta2(heap, &e_temp, &n_elem);
           }
           if (n_elem==TOPK){
               if (d < topH(heap, &n_elem)) {
                  e_temp.dist = d;
                  e_temp.ind = j;
					//Si el heap no está lleno, se inserta el elemento
                  if (n_elem < TOPK)
                     inserta2(heap, &e_temp, &n_elem);
						//Si el heap está lleno, se inserta el elemento nuevo y se saca el que era antes de mayor de distancia. popush2() hace las operaciones de sacar el elemento mayor e insertar el nuevo.
                 else
                     popush2(heap, &n_elem, &e_temp);
             }}
         }

			//En este punto del código se tienen los K elemntos más cercanos a la consulta en 'heap'. Se pueden extraer con extraer2()
         for (j = 0; j < TOPK ; j++) {
           extrae2(heap, &n_elem, &e_temp);
           answer[i*TOPK+j].ind = e_temp.ind;
           answer[i*TOPK+j].dist = e_temp.dist;
       }
			//Realizamos una operación con los resultados para que el compilador no evite hacer instrucciones que considere que el usuario no utiliza. Simplemente cada hilo suma las distancias de los elementos mas cercanos a la consulta 
   }
   Thread( 1000000 * ( tid + 1 ) );



   fflush(stdout);

#pragma omp barrier

#pragma omp master
   {   

    if ( fip > 0 ) {
		/* Setup PAPI library and begin collecting data from the counters */
       if ( fip == 1 ) {
          if ( ( retval =
             PAPI_flips( &real_time, &proc_time, &flpins,
                &mflops ) ) < PAPI_OK )
             test_fail( __FILE__, __LINE__, "PAPI_flips", retval );
     } else {
      if ( ( retval =
         PAPI_flops( &real_time, &proc_time, &flpins,
            &mflops ) ) < PAPI_OK )
         test_fail( __FILE__, __LINE__, "PAPI_flops", retval );
 }

 gettimeofday(&t2, 0);
 real_time = (t2.tv_sec - t1.tv_sec) + (float) (t2.tv_usec - t1.tv_usec) / 1000000;

 Salida_Multihilo = fopen("Salida_Multihilo.txt", "w");
 for (i = 0; i < N_QUERIES; ++i){
  fprintf(Salida_Multihilo, "Consulta id:: %d\n",i);
  for (j = 0; j < TOPK; ++j){
     fprintf(Salida_Multihilo,"ind = %d :: dist = %f\n",answer[(i*TOPK)+j].ind,answer[(i*TOPK)+j].dist);
 }
 fprintf(Salida_Multihilo, "---------------------------------\n");
}
fclose(Salida_Multihilo);

printf("\n\nK = %d", TOPK);
printf("\nReal Time = %f segundos.\n", real_time);
fflush(stdout);


if ( fip == 1 ) {
  if ( ( retval =
     PAPI_flips( &real_time, &proc_time, &flpins,
        &mflops ) ) < PAPI_OK )
     test_fail( __FILE__, __LINE__, "PAPI_flips", retval );
} else {
  if ( ( retval =
     PAPI_flops( &real_time, &proc_time, &flpins,
        &mflops ) ) < PAPI_OK )
     test_fail( __FILE__, __LINE__, "PAPI_flops", retval );
}

if ( !TESTS_QUIET ) {
  if ( fip == 1 ) {
     printf( "Real_time: %f Proc_time: %f Total flpins: ", real_time,
        proc_time );
 } else {
     printf( "Real_time: %f Proc_time: %f Total flpops: ", real_time,
        proc_time );
 }
 printf( LLDFMT, flpins );
 printf( " MFLOPS: %f\n", mflops );
}
}

}
free(heap);



	}//end pragma omp parallel

	elapsed_cyc = PAPI_get_real_cyc(  ) - elapsed_cyc;
	elapsed_us = PAPI_get_real_usec(  ) - elapsed_us;

	if ( !TESTS_QUIET ) {
		printf( "Master real usec   : \t%lld\n", elapsed_us );
		printf( "Master real cycles : \t%lld\n", elapsed_cyc );
	}

	const PAPI_hw_info_t *hwinfo = NULL;
	const PAPI_mh_tlb_info_t *mhinfo = NULL;
	const  PAPI_mh_cache_info_t *mhcacheinfo = NULL;
	const PAPI_mh_level_t *mhlevel = NULL;


	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT)
		exit(1);
	if ((hwinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);	
	if ((mhinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);
	if ((mhcacheinfo = PAPI_get_hardware_info()) == NULL)
		exit(1);
	if ((mhlevel = PAPI_get_hardware_info()) == NULL)
		exit(1);

	printf("\n\nA continuación información actual del equipo\n\n");

	printf("MH Type %d - Num entries %d  - Associativity %d \n",mhinfo->type, mhinfo->num_entries, mhinfo->associativity);
	printf("Cache MH type %d size %d line size %d num_lines %d Associativity %d\n\n",mhcacheinfo->type, mhcacheinfo->size,mhcacheinfo->line_size, mhcacheinfo->num_lines, mhcacheinfo->associativity);



    retval=papi_print_header("Available PAPI preset and user defined events plus hardware information.\n",&hwinfo );


    printf("Total hardware flops = %lld\n",(float)values[1]);
    printf("L2 data cache misses is %lld\n", values[0]);






    retval = PAPI_stop_counters(values, NUM_EVENTS);
    return 0;
}
示例#19
0
/**
 * Called by the CBTF collector service in order to start data collection.
 */
void cbtf_collector_start(const CBTF_DataHeader* header)
{
/**
 * Start sampling.
 *
 * Starts hardware counter (HWC) sampling for the thread executing this
 * function. Initializes the appropriate thread-local data structures and
 * then enables the sampling counter.
 *
 * @param arguments    Encoded function arguments.
 */
    /* Create and access our thread-local storage */
#ifdef USE_EXPLICIT_TLS
    TLS* tls = malloc(sizeof(TLS));
    Assert(tls != NULL);
    CBTF_SetTLS(TLSKey, tls);
#else
    TLS* tls = &the_tls;
#endif
    Assert(tls != NULL);


    tls->defer_sampling=false;

#ifndef NDEBUG
    IsCollectorDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR") != NULL);
    IsCollectorDetailsDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR_DETAILS") != NULL);
#if defined (HAVE_OMPT)
    IsOMPTDebugEnabled = (getenv("CBTF_DEBUG_COLLECTOR_OMPT") != NULL);
#endif
#endif

    /* Decode the passed function arguments */
    // Need to handle the arguments...
    CBTF_hwcsamp_start_sampling_args args;
    memset(&args, 0, sizeof(args));
    args.sampling_rate = 100;

    /* First set defaults */
    int hwcsamp_rate = 100;
    char* hwcsamp_papi_event = "PAPI_TOT_CYC,PAPI_TOT_INS";

#if defined (CBTF_SERVICE_USE_OFFLINE)
    char* hwcsamp_event_param = getenv("CBTF_HWCSAMP_EVENTS");
    if (hwcsamp_event_param != NULL) {
        hwcsamp_papi_event=hwcsamp_event_param;
    }

    const char* sampling_rate = getenv("CBTF_HWCSAMP_RATE");
    if (sampling_rate != NULL) {
        hwcsamp_rate=atoi(sampling_rate);
    }
    args.collector = 1;
    args.experiment = 0;
    tls->data.interval = (uint64_t)(1000000000) / (uint64_t)(hwcsamp_rate);;
#endif


    /* Initialize the actual data blob */
    memcpy(&tls->header, header, sizeof(CBTF_DataHeader));
    initialize_data(tls);


    /* We can not assign mpi rank in the header at this point as it may not
     * be set yet. assign an integer tid value.  omp_tid is used regardless of
     * whether the application is using openmp threads.
     * libmonitor uses the same numbering scheme as openmp.
     */
    tls->header.omp_tid = monitor_get_thread_num();
    tls->header.id = strdup(cbtf_collector_unique_id);
    tls->header.time_begin = CBTF_GetTime();


#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] ENTER cbtf_collector_start\n",tls->header.pid,tls->header.omp_tid);
	}
#endif
    if(hwcsamp_papi_init_done == 0) {
#ifndef NDEBUG
	if (IsCollectorDebugEnabled) {
	    fprintf(stderr,"[%ld,%d] cbtf_collector_start: initialize papi\n",tls->header.pid,tls->header.omp_tid);
	}
#endif
	CBTF_init_papi();
	tls->EventSet = PAPI_NULL;
	tls->data.clock_mhz = (float) hw_info->mhz;
	hwcsamp_papi_init_done = 1;
    } else {
	tls->data.clock_mhz = (float) hw_info->mhz;
    }


    /* PAPI SETUP */
    CBTF_Create_Eventset(&tls->EventSet);

    int rval = PAPI_OK;

#ifndef NDEBUG
    if (IsCollectorDebugEnabled) {
       fprintf(stderr, "PAPI Version: %d.%d.%d.%d\n", PAPI_VERSION_MAJOR( PAPI_VERSION ),
                        PAPI_VERSION_MINOR( PAPI_VERSION ),
                        PAPI_VERSION_REVISION( PAPI_VERSION ),
                        PAPI_VERSION_INCREMENT( PAPI_VERSION ) );
       fprintf(stderr,"System has %d hardware counters.\n", PAPI_num_counters());
    }
#endif

/* In Component PAPI, EventSets must be assigned a component index
 * before you can fiddle with their internals. 0 is always the cpu component */
#if (PAPI_VERSION_MAJOR(PAPI_VERSION)>=4)
    rval = PAPI_assign_eventset_component( tls->EventSet, 0 );
    if (rval != PAPI_OK) {
        CBTF_PAPIerror(rval,"CBTF_Create_Eventset assign_eventset_component");
        return;
    }
#endif

    /* NOTE: if multiplex is turned on, papi internaly uses a SIGPROF handler.
     * Since we are sampling potentially with SIGPROF or now SIGRTMIN and we
     * prefer to limit our events to 6, we do not need multiplexing.
     */
    if (getenv("CBTF_HWCSAMP_MULTIPLEX") != NULL) {
#if !defined(RUNTIME_PLATFORM_BGP) 
	rval = PAPI_set_multiplex( tls->EventSet );
	if ( rval == PAPI_ENOSUPP) {
	    fprintf(stderr,"CBTF_Create_Eventset: Multiplex not supported\n");
	} else if (rval != PAPI_OK)  {
	    CBTF_PAPIerror(rval,"CBTF_Create_Eventset set_multiplex");
	}
#endif
    }

    /* TODO: check return values of direct PAPI calls
     * and handle them as needed.
     */
    /* Rework the code here to call PAPI directly rather than
     * call any OPENSS helper functions due to inconsitent
     * behaviour seen on various lab systems
     */
    int eventcode = 0;
    rval = PAPI_OK;
    if (hwcsamp_papi_event != NULL) {
	char *tfptr, *saveptr=NULL, *tf_token;
	tfptr = strdup(hwcsamp_papi_event);
	for (tf_token = strtok_r(tfptr, ",", &saveptr);
	     tf_token != NULL;
	     tf_token = strtok_r(NULL, ",", &saveptr) ) {

	    PAPI_event_name_to_code(tf_token,&eventcode);
	    rval = PAPI_add_event(tls->EventSet,eventcode);

	    if (rval != PAPI_OK)  {
		CBTF_PAPIerror(rval,"CBTF_Create_Eventset PAPI_event_name_to_code");
	    }
	}

	if (tfptr) free(tfptr);
	
    } else {
	PAPI_event_name_to_code("PAPI_TOT_CYC",&eventcode);
	rval = PAPI_add_event(tls->EventSet,eventcode);
	PAPI_event_name_to_code("PAPI_TOT_INS",&eventcode);
	rval = PAPI_add_event(tls->EventSet,eventcode);
    }

#if defined (HAVE_OMPT)
    /* these are ompt specific.*/
    /* initialize the flags and counts for idle,wait_barrier.  */
    tls->thread_idle =  tls->thread_wait_barrier = tls->thread_barrier = false;
#endif

    /* Begin sampling */
    tls->header.time_begin = CBTF_GetTime();
    CBTF_Start(tls->EventSet);
    CBTF_Timer(tls->data.interval, hwcsampTimerHandler);
}
示例#20
0
int main(int argc, char **argv) {

  int    	size,rank, left, right, you, ndata=127,ndata_max=127,seed;
  int           rv;
  long long int i,j,k;
  unsigned long long int  nflop=0,nmem=1,nsleep=0,nrep=1, myflops;
  char 		*env_ptr;
  double 	*sbuf, *rbuf,*x;
  MPI_Status    *s;
  MPI_Request   *r;
  time_t	ts;


#ifdef HPM

   if((rv = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT )
   {
      fprintf(stderr, "Error: %d %s\n",rv, errstring);
      exit(1);
   }

   if ((num_hwcntrs = PAPI_num_counters()) < PAPI_OK)
   {
      printf("There are no counters available. \n");
      exit(1);
   }

  if ( (rv = PAPI_start_counters(events, 2)) != PAPI_OK) {
    fprintf(stdout, "ERROR PAPI_start_counters rv=%d\n", rv);
    exit(rv);
   }

#endif
   seed = time(&ts);

   flags |= DOMPI;
   while(--argc && argv++) {
  if(!strcmp("-v",*argv)) {
    flags |= DOVERBOSE;
  } else if(!strcmp("-n",*argv)) {
    --argc; argv++;
    nflop = atol(*argv);
  } else if(!strcmp("-N",*argv)) {
    --argc; argv++;
    nrep = atol(*argv);
  } else if(!strcmp("-d",*argv)) {
    --argc; argv++;
    ndata_max = ndata = atol(*argv);
  } else if(!strcmp("-m",*argv)) {
    --argc; argv++;
    nmem = atol(*argv);
  } else if(!strcmp("-s",*argv)) {
    --argc; argv++;
    nsleep = atol(*argv);
  } else if(!strcmp("-spray",*argv)) {
    flags |= DOSPRAY;
  } else if(!strcmp("-c",*argv)) {
    flags |= CORE;
  } else if(!strcmp("-r",*argv)) {
    flags |= REGION;
  } else if(!strcmp("-stair",*argv)) {
    flags |= STAIR_RANK;
  } else if(!strcmp("-stair_region",*argv)) {
    flags |= STAIR_REGION;
  } else if(!strcmp("-nompi",*argv)) {
    flags &= ~DOMPI;
  }
 }
 
 if(flags & DOMPI) {
  MPI_Init(&argc,&argv);

/*
  MPI_Init(&argc,&argv);
*/
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 }
 

 if(nmem) {
 nmem = (nmem*1024*1024/sizeof(double));
 x = (double *)malloc((size_t)(nmem*sizeof(double)));
 for(j=0;j<nrep;j++) {
 for(i=0;i<nmem;i++) {
  x[i] = i;
 }
 for(i=0;i<nmem;i++) {
  x[i] = i*x[i];
 }
 if(x[nmem-1]*x[nmem-1] < 0) {
  printf("trickster\n");
 }
 }
 if(0) free((char *)x);
}
 
#ifdef IPM
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(1,"region_zzzzzzzzzzzZz"); 
#endif
 if(nflop) {
  x = (double *)malloc((size_t)(10*sizeof(double)));
  j = k = 0;
  for(i=0;i<10;i++) {
   x[i] = 1.0;
  }
if(flags & STAIR_RANK) { 
 myflops = (rank*nflop)/size;
} else {
 myflops = nflop;
}
  for(i=0;i<nflop;i++) {
   x[j] = x[j]*x[k];
   j = ((i%9)?(j+1):(0));
   k = ((i%8)?(k+1):(0));
  }
  free((char *)x);
 }

 if(nsleep) {
  sleep(nsleep);
 }
#ifdef IPM
  if(flags & REGION && rank > -1 ) MPI_Pcontrol(-1,"region_zzzzzzzzzzzZz"); 
#endif
 
 if(nmem<nflop) nmem=nflop;
 
 if(nflop>1) printf("FLOPS = %lld BYTES = %lld\n", nflop, nmem);
 
 fflush(stdout);
 
 if(flags & CORE) {
  for(i=0;;i++) {
   x[i] = x[i*i-1000];
  }
 }



  env_ptr = getenv("IPM_SOCKET");
  if(env_ptr) {
   printf("IPM: %d IPM_SOCKET in app %s\n", rank, env_ptr);
  }
  
 if(flags & DOMPI) {
  s = (MPI_Status *)malloc((size_t)(sizeof(MPI_Status)*2*size));
  r = (MPI_Request *)malloc((size_t)(sizeof(MPI_Request)*2*size));


  sbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  rbuf = (double *)malloc((size_t)(ndata_max*sizeof(double)));
  for(i=0;i<ndata_max;i++) { sbuf[i] = rbuf[i] = i; }

  MPI_Bcast(&seed,1,MPI_INT,0,MPI_COMM_WORLD);
  srand48(seed);

  for(i=0;i<nrep;i++) {
   MPI_Bcast(sbuf,ndata_max,MPI_DOUBLE,0,MPI_COMM_WORLD);
  }

  if(size>1) {
  if(!rank) {left=size-1;} else { left = rank-1;}
  if(rank == size-1) { right=0;} else {right=rank+1;}
  you =  (rank < size/2)?(rank+size/2):(rank-size/2);


  for(i=0;i<nrep;i++) {
   if(flags & DOSPRAY) {
    ndata = (long int)(drand48()*ndata_max)+1;
   }
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,right,1,rbuf,ndata,MPI_DOUBLE,left,1,MPI_COMM_WORLD,s);
   MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_a"); 
#endif
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Sendrecv(sbuf,ndata,MPI_DOUBLE,left,1,rbuf,ndata,MPI_DOUBLE,right,1,MPI_COMM_WORLD,s);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);

  MPI_Isend(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD, r);
  MPI_Recv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD, s);
  MPI_Wait(r,s);

  MPI_Irecv(rbuf,ndata,MPI_DOUBLE,MPI_ANY_SOURCE,0,MPI_COMM_WORLD,r);
  MPI_Send(sbuf,ndata,MPI_DOUBLE,you,0,MPI_COMM_WORLD);
  MPI_Wait(r,s);

  
  for(j=0;j<size;j++) {
   MPI_Isend(sbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD, r+j);
   MPI_Irecv(rbuf+j%ndata_max,1,MPI_DOUBLE,j,4,MPI_COMM_WORLD,r+size+j);
  }
  MPI_Waitall(2*size,r,s);
/*
  for(j=0;j<size;j++) {
   printf("rep %d stat %d %d %d\n",i, j, s[j].MPI_SOURCE, s[j+size].MPI_SOURCE);
  }
*/

#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_a"); 
#endif

#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_b"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_b"); 
#endif

 if(1) {
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_c"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_c"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_d"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_d"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_e"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_e"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_f"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_f"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_g"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_g"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_h"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_h"); 
#endif
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(1,"region_i"); 
#endif
  MPI_Allreduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 0,  MPI_COMM_WORLD);
  MPI_Reduce(sbuf,rbuf,ndata-1,MPI_DOUBLE, MPI_SUM, 1,  MPI_COMM_WORLD);
#ifdef IPM
  if(flags & REGION) MPI_Pcontrol(-1,"region_i"); 
#endif
 }

  }
  }


  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Finalize();
  }

#ifdef HPM
   if ((rv=PAPI_stop_counters(values, 2)) != PAPI_OK) {
    fprintf(stdout, "ERROR PAPI_stop_counters rv=%d\n", rv);
    exit(rv);
   }
   printf("PAPI: total instruction/cycles  %lld/%lld %.3e \n", values[0], values[1], values[0]/(values[1]*1.0) );
#endif 

  return 0;   
}
示例#21
0
文件: bench_putget.c 项目: coti/POSH
int main( int argc, char** argv) {

    int rank, size, res, i, j, k;
    char* ptr;
    char* data;
    struct timeval tv_start, tv_end;
    double theTime;
    double** times;

    char* filename = NULL;
    FILE* fd = NULL;

#if PAPI
    int* events = NULL;
    int num_hwcntrs = 0;
    long long int* values = NULL;
    int error = 0;
#endif

    int nb = NBXP;
    int buffsize = DEFAULTSIZE;
    unsigned int it;

    int myBuddyLeft, myBuddyRight, myBuddy;

    int getOnly, putOnly;

    getOnly = 0 ; putOnly = 0;
    if( argc >= 4 ) {
        if( 0 == strncmp( argv[3], "g", 1 ) ) {
            getOnly = 1;
        }
        if( 0 == strncmp( argv[3], "p", 1 ) ) {
            putOnly = 1;
        }
        if( 0 == strncmp( argv[3], "o", 1 ) ) {
            asprintf( &filename, "%s.%d", "putget", getpid() );
        }
    }
    if( argc >= 5 ) {
        if( 0 == strncmp( argv[4], "o", 1 ) ) {
            asprintf( &filename, "%s.%d", "putget", getpid() );
        }
    }

    if( argc >= 3 )
        buffsize = atoi( argv[2] ) ;
    if( argc >= 2 )
        nb = atoi( argv[1] ) ;

#if PAPI
    /* Initialize the PAPI library and get the number of counters available */
    if( (num_hwcntrs = PAPI_num_counters()) <= PAPI_OK ){
        printf( "Error while retrieving counters info\n" );
        return EXIT_FAILURE;
    }

    printf( "This system has %d available counters.\n", num_hwcntrs );

    if( num_hwcntrs < NBCOUNTERS ) {
        printf( "Not enough counters available (%d available, I need %d)\n", num_hwcntrs, NBCOUNTERS );
        error = 1;
        goto finish;
    }

    num_hwcntrs = NBCOUNTERS;

    values = (long long int*) malloc( num_hwcntrs * sizeof( long long int ) );
    events = (int*) malloc( num_hwcntrs * sizeof( int ) );

    events[0] = PAPI_TOT_CYC;
    events[1] = PAPI_TOT_INS;

    /* Start counting events */
    int rc;
    rc = PAPI_start_counters( events, num_hwcntrs );
    if ( rc != PAPI_OK){
        printf( "error starting counting events\n" );
        switch( rc ){
        case PAPI_EINVAL:
            printf( "One or more of the arguments is invalid.\n" );
            break;
        case PAPI_EISRUN:
            printf( "Counters have already been started, you must call PAPI_stop_counters() before you call this function again.\n" );
            break;
        case PAPI_ESYS:
            printf( "A system or C library call failed inside PAPI, see the errno variable.\n" );
            break;
        case PAPI_ENOMEM:
            printf( "Insufficient memory to complete the operation.\n" );
            break;
        case PAPI_ECNFLCT:
            printf( "The underlying counter hardware cannot count this event and other events in the EventSet simultaneously.\n" );
            break;
        case PAPI_ENOEVNT:
            printf( "The PAPI preset is not available on the underlying hardware.\n" );
            break;
        }

        error = 1;
        goto finish;
    }
  
#endif

    start_pes( 0 );
    
    rank = shmem_my_pe();
    size = shmem_n_pes();

    if( size < 2 ) {
        fprintf( stderr, "We need at least 2 processes. Exiting.\n" );
        return EXIT_SUCCESS;
    }

    if( rank == 0 || rank == 1  ) {
        if( NULL != filename ) {
            fd = fopen( filename, "w+" );
            if( NULL == fd ) {
                fprintf( stderr, "pb ouverture fichier sortie\n" );
                return EXIT_FAILURE;
            }
        }
        if( rank == 0 ) {
                printf( "buff size is %d, %d experiments\n", buffsize, nb );
            }
    }
    //printf( "Hello, I am process %d in %d \n", rank, size );

    //    shmem_barrier_all();
    sleep( 2 );
    
    /* check */

    myBuddyLeft = ( rank + 1 ) % size;
    myBuddyRight = ( rank + size - 1 ) % size;

    /*    res = shmem_pe_accessible( myBuddyLeft );
    if( 1 == res ) {
        printf( "[%d/%d] My buddy %d is reachable\n", rank, size, myBuddyLeft );
    } else {
        printf( "[%d/%d] My buddy %d is NOT reachable\n", rank, size, myBuddyLeft );
    }
    res = shmem_pe_accessible( myBuddyRight );
    if( 1 == res ) {
        printf( "[%d/%d] My buddy %d is reachable\n", rank, size, myBuddyRight );
    } else {
        printf( "[%d/%d] My buddy %d is NOT reachable\n", rank, size, myBuddyRight );
        }*/

    /* Allocate some memory */

    ptr = (char*)shmalloc( buffsize );
    data = (char*) malloc( buffsize );

    /* Put stuff there */

    memset( data, rank, buffsize );

    /* Remote read in my buddy's memory */

    if( rank % 2 ) {
        myBuddy = myBuddyRight;
    } else {
        myBuddy = myBuddyLeft;
    }

    shmem_barrier_all();

    if( getOnly != 1 ) {
        
        if( rank == 0 ) {
            
            if( NULL == fd ) {
                printf( "\n\n - * - * - * - * - PUT - * - * - * - * -\n" );
                printf( "[SIZE (B)]\t[BANDWIDTH (Gb/s)]\t[AVERAGE TIME (nsec)]\t[STD DEV]\n" );
            } else {
                fprintf( fd, "\n\n - * - * - * - * - PUT - * - * - * - * -\n" );
                fprintf( fd, "[SIZE (B)]\t[BANDWIDTH (Gb/s)]\t[AVERAGE TIME (nsec)]\t[STD DEV]\n" );
            }
        
            k = 1;
            it = 0;
            while( k <= buffsize ) {
                experimentPut( k, nb, myBuddy, data, ptr, fd );
                k = ( 0x01 << it ) + 3;
                if( k <= buffsize ) {
                    experimentPut( k, nb, myBuddy, data, ptr, fd );
                }
                k = ( 0x01 << it ) - 3;
                if( k > 0 ) {
                    experimentPut( k, nb, myBuddy, data, ptr, fd );
                }
                it++;
                k = 0x01 << it;
            }
            
        }
        
        shmem_barrier_all();
    }

    if( putOnly != 1 ) {
        if( rank == 1 ) {
            
            if( NULL == fd ) {
                printf( "\n\n - * - * - * - * - GET - * - * - * - * -\n" );
                printf( "[SIZE (B)]\t[BANDWIDTH (Gb/s)]\t[AVERAGE TIME (nsec)]\t[STD DEV]\n" );
            } else {
                fprintf( fd, "\n\n - * - * - * - * - GET - * - * - * - * -\n" );
                fprintf( fd, "[SIZE (B)]\t[BANDWIDTH (Gb/s)]\t[AVERAGE TIME (nsec)]\t[STD DEV]\n" );
            }

            k = 1;
            it = 0;
            while( k <= buffsize ) {
                experimentGet( k, nb, myBuddy, data, ptr, fd );
                k = ( 0x01 << it ) + 3;
                if( k <= buffsize ) {
                    experimentGet( k, nb, myBuddy, data, ptr, fd );
                }
                k = ( 0x01 << it ) - 3;
                if( k > 0 ) {
                    experimentGet( k, nb, myBuddy, data, ptr , fd);
                }
                it++;
                k = 0x01 << it;
            }
            
        }
        
        shmem_barrier_all();
    }


 finish:
    if( NULL != filename ) {
        free( filename );
    }
    if( NULL != fd ){
        fclose( fd );
    }
#if PAPI
    if( NULL != events ) {
        free( events );
    }
  if( NULL != values ) {
      free( values );
  }
#endif

if( rank == 0 ) {
    printf( "The end %d\n", EXIT_SUCCESS );
}

  // return EXIT_SUCCESS;
  exit( 0 ) ;
}
示例#22
0
int measure(int (*f)(double *A, unsigned int n), double *A, unsigned int n, int event) {

    /* force program to run on a single CPU */
    cpu_set_t my_set;        /* Define your cpu_set bit mask. */
    CPU_ZERO(&my_set);       /* Initialize it all to 0, i.e. no CPUs selected. */
    CPU_SET(0, &my_set);
    if (sched_setaffinity(0, sizeof(cpu_set_t), &my_set) != 0)
        perror("sched_setaffinity error");

    /* init lib */
    int events[4] = {
            PAPI_STL_ICY, //Cycles with no instruction issue
            PAPI_L2_DCM, //L2 data cache misses
            PAPI_L2_DCH, //L2 data cache hits
            PAPI_FP_OPS //Floating point operations
    };
    char* event_names[4] = {
            "Cycles with no instruction issue: ",
            "L2 data cache misses:             ",
            "L2 data cache hits:               ",
            "Floating point operations:        "
    };

    long long values[1] = {0};
    int eventSet = PAPI_NULL;
    int papi_err;
    bool papi_supported = true;

    if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
        fprintf(stderr, "PAPI is unsupported.\n");
        papi_supported = false;
    }

    if (PAPI_num_counters() < 4) {
        fprintf(stderr, "PAPI is unsupported.\n");
        papi_supported = false;
    }

    if ((papi_err = PAPI_create_eventset(&eventSet)) != PAPI_OK) {
        fprintf(stderr, "Could not create event set: %s\n", PAPI_strerror(papi_err));
    }

    if ((papi_err = PAPI_add_event(eventSet, events[event])) != PAPI_OK ) {
        fprintf(stderr, "Could not add event %d: %s\n", event, PAPI_strerror(papi_err));
    }

    /* start counters */
    if (papi_supported) {
        if ((papi_err = PAPI_start(eventSet)) != PAPI_OK) {
            fprintf(stderr, "Could not start counters: %s\n", PAPI_strerror(papi_err));
        }
    }

    f(A, n);

    /* stop counters */
    if (papi_supported) {
        if ((papi_err = PAPI_stop(eventSet, values)) != PAPI_OK) {
            fprintf(stderr, "Could not get values: %s\n", PAPI_strerror(papi_err));
        }

        int save_to_file = 0;
        if (save_to_file > 0){
            FILE *fp;
            char filename[60];
            sprintf(filename, "results/event%d.txt", event);
            fp = fopen(filename, "w+");
            if (fp == NULL) perror("Error while saving results to file");
            fprintf(fp, "%lld", values[0]);
            fclose(fp);
        }

        printf("%s\t %lld\n", event_names[event], values[0]);
    }
}
示例#23
0
文件: papi.c 项目: DundalkIT/pcp
static int
papi_internal_init(pmdaInterface *dp)
{
    int ec;
    int sts;
    PAPI_event_info_t info;
    char entry[PAPI_HUGE_STR_LEN+12]; // the length papi uses for the symbol name
    unsigned int i = 0;
    pmID pmid;

    sts = sprintf(papi_version, "%d.%d.%d", PAPI_VERSION_MAJOR(PAPI_VERSION),
	    PAPI_VERSION_MINOR(PAPI_VERSION), PAPI_VERSION_REVISION(PAPI_VERSION));
    if (sts < 0) {
	__pmNotifyErr(LOG_ERR, "%s failed to create papi version metric.\n",pmProgname);
	return PM_ERR_GENERIC;
    }

    if ((sts = __pmNewPMNS(&papi_tree)) < 0) {
	__pmNotifyErr(LOG_ERR, "%s failed to create dynamic papi pmns: %s\n",
		      pmProgname, pmErrStr(sts));
	papi_tree = NULL;
	return PM_ERR_GENERIC;
    }

    number_of_counters = PAPI_num_counters();
    if (number_of_counters < 0) {
	__pmNotifyErr(LOG_ERR, "hardware does not support performance counters\n");
	return PM_ERR_APPVERSION;
    }
    else if (number_of_counters == 0) {
	__pmNotifyErr(LOG_WARNING, "no performance counters\n");
    }

    sts = PAPI_library_init(PAPI_VER_CURRENT);
    if (sts != PAPI_VER_CURRENT) {
	__pmNotifyErr(LOG_ERR, "PAPI_library_init error (%d)\n", sts);
	return PM_ERR_GENERIC;
    }

    ec = PAPI_PRESET_MASK;
    PAPI_enum_event(&ec, PAPI_ENUM_FIRST);
    do {
	if (PAPI_get_event_info(ec, &info) == PAPI_OK) {
	    if (info.count && PAPI_PRESET_ENUM_AVAIL) {
		expand_papi_info(i);
		memcpy(&papi_info[i].info, &info, sizeof(PAPI_event_info_t));
		memcpy(&papi_info[i].papi_string_code, info.symbol + 5, strlen(info.symbol)-5);
		snprintf(entry, sizeof(entry),"papi.system.%s", papi_info[i].papi_string_code);
		pmid = pmid_build(dp->domain, CLUSTER_PAPI, i);
		papi_info[i].pmid = pmid;
		__pmAddPMNSNode(papi_tree, pmid, entry);
		memset(&entry[0], 0, sizeof(entry));
		papi_info[i].position = -1;
		papi_info[i].metric_enabled = 0;
		expand_values(i);
		i++;
	    }
	}
    } while(PAPI_enum_event(&ec, 0) == PAPI_OK);

#if defined(HAVE_PAPI_DISABLED_COMP)
    char *tokenized_string;
    int number_of_components;
    int component_id;
    int native;

    number_of_components = PAPI_num_components();
    native = 0 | PAPI_NATIVE_MASK;
    for (component_id = 0; component_id < number_of_components; component_id++) {
	const PAPI_component_info_t *component;
	component = PAPI_get_component_info(component_id);
	if (component->disabled || (strcmp("perf_event", component->name)
				    && strcmp("perf_event_uncore", component->name)))
	    continue;
	sts = PAPI_enum_cmp_event (&native, PAPI_ENUM_FIRST, component_id);
	if (sts == PAPI_OK)
	do {
	    if (PAPI_get_event_info(native, &info) == PAPI_OK) {
		char local_native_metric_name[PAPI_HUGE_STR_LEN] = "";
		int was_tokenized = 0;
		expand_papi_info(i);
		memcpy(&papi_info[i].info, &info, sizeof(PAPI_event_info_t));
		tokenized_string = strtok(info.symbol, "::: -");
		while (tokenized_string != NULL) {
		    size_t remaining = sizeof(local_native_metric_name) -
			strlen(local_native_metric_name) - 1;
		    if (remaining < 1)
			break;
		    strncat(local_native_metric_name, tokenized_string, remaining);
		    was_tokenized = 1;
		    tokenized_string=strtok(NULL, "::: -");
		    if (tokenized_string) {
			remaining = sizeof(local_native_metric_name) -
			    strlen(local_native_metric_name) - 1;
			if (remaining < 1)
			    break;
			strncat(local_native_metric_name, ".", remaining);
		    }
		}
		if (!was_tokenized) {
		    strncpy(papi_info[i].papi_string_code, info.symbol,
			    sizeof(papi_info[i].papi_string_code) - 1);
		}
		else {
		    strncpy(papi_info[i].papi_string_code,
			    local_native_metric_name,
			    sizeof(papi_info[i].papi_string_code) - 1);
		}
		snprintf(entry, sizeof(entry),"papi.system.%s", papi_info[i].papi_string_code);
		pmid = pmid_build(dp->domain, CLUSTER_PAPI, i);
		papi_info[i].pmid = pmid;
		__pmAddPMNSNode(papi_tree, pmid, entry);
		memset(&entry[0], 0, sizeof(entry));
		papi_info[i].position = -1;
		papi_info[i].metric_enabled = 0;
		expand_values(i);
		i++;
	    }
	} while (PAPI_enum_cmp_event(&native, PAPI_ENUM_EVENTS, component_id) == PAPI_OK);
    }
#endif
    pmdaTreeRebuildHash(papi_tree, number_of_events);

    /* Set one-time settings for all future EventSets. */
    if ((sts = PAPI_set_domain(PAPI_DOM_ALL)) != PAPI_OK) {
	handle_papi_error(sts, 0);
	return PM_ERR_GENERIC;
    }
    if ((sts = PAPI_multiplex_init()) != PAPI_OK) {
	handle_papi_error(sts, 0);
	return PM_ERR_GENERIC;
    }

    sts = refresh_metrics(0);
    if (sts != PAPI_OK)
	return PM_ERR_GENERIC;
    return 0;
}