예제 #1
0
void init(PmData *pmdata, PcaCArrayFloat *lib, PcaCArrayFloat *pattern)
{
  int   elsize = sizeof(float);
  float x;

  /* Getting the input parameters from the PCA C array structure */
  pmdata->profile_size  = lib->size[1];
  pmdata->num_templates = lib->size[0]; 

  pmdata->elsize = elsize;
  pmdata->shift_ratio = 3.0f;

  pmdata->template_profiles_db = lib->data;
  pmdata->test_profile_db = pattern->data;

  /* Equivalent to shift_size = roundf((float)profile_size / shift_ratio) */
  x = (float)(pmdata->profile_size) / pmdata->shift_ratio;
  pmdata->shift_size = ((x - (int)(x)) < 0.5) ? (int)floor(x) : (int)ceil(x);

  pmdata->template_exceed     = (uchar*) malloc(sizeof(char)*pmdata->profile_size);
  pmdata->test_exceed_means   = (float*) malloc(elsize*pmdata->shift_size);

  pmdata->template_copy       = (float*) malloc(elsize*pmdata->profile_size);
  pmdata->test_noise_db_array = (float*) malloc(elsize*pmdata->profile_size);

  pmdata->MSE_scores          = (float*) malloc(elsize*pmdata->shift_size);
  pmdata->mag_shift_scores    = (float*) malloc(elsize*21);

  pmdata->minimum_MSE_score   = (float*) malloc(elsize*pmdata->num_templates);
  pmdata->all_shifted_test_db = (float*) malloc(elsize*((pmdata->shift_size+2)*2+pmdata->profile_size));

  /* Set the coefficients for the log and pow functions */
  setcoeff();
}
예제 #2
0
int main(int argc, char *argv[])
{
    char Class;
    logical verified;
    double mflops;

    double t, tmax, trecs[t_last+1];
    int i;
    char *t_names[t_last+1];

    int num_threads;
    if(argc == 2) {
        num_threads = atoi(argv[1]);
        printf("Number of threads received are - %d", num_threads);
    }
    else {
        num_threads = -1;
    }
    if(num_threads != -1) {
        omp_set_dynamic(num_threads);			//Nitin Chugh
        printf(" Dynamic environment state - %d\n", omp_get_dynamic());
        omp_set_num_threads(num_threads);
    }
    printf(" Number of processors - %d\n", omp_get_num_procs());

    //---------------------------------------------------------------------
    // Setup info for timers
    //---------------------------------------------------------------------
    FILE *fp;
    if ((fp = fopen("timer.flag", "r")) != NULL) {
        timeron = true;
        t_names[t_total] = "total";
        t_names[t_rhsx] = "rhsx";
        t_names[t_rhsy] = "rhsy";
        t_names[t_rhsz] = "rhsz";
        t_names[t_rhs] = "rhs";
        t_names[t_jacld] = "jacld";
        t_names[t_blts] = "blts";
        t_names[t_jacu] = "jacu";
        t_names[t_buts] = "buts";
        t_names[t_add] = "add";
        t_names[t_l2norm] = "l2norm";
        fclose(fp);
    } else {
        timeron = false;
    }

    //---------------------------------------------------------------------
    // read input data
    //---------------------------------------------------------------------
    read_input();

    //---------------------------------------------------------------------
    // set up domain sizes
    //---------------------------------------------------------------------
    domain();

    //---------------------------------------------------------------------
    // set up coefficients
    //---------------------------------------------------------------------
    setcoeff();

    //---------------------------------------------------------------------
    // set the boundary values for dependent variables
    //---------------------------------------------------------------------
    setbv();

    //---------------------------------------------------------------------
    // set the initial values for dependent variables
    //---------------------------------------------------------------------
    setiv();

    //---------------------------------------------------------------------
    // compute the forcing term based on prescribed exact solution
    //---------------------------------------------------------------------
    erhs();

    //---------------------------------------------------------------------
    // perform one SSOR iteration to touch all data pages
    //---------------------------------------------------------------------
    ssor(1);

    //---------------------------------------------------------------------
    // reset the boundary and initial values
    //---------------------------------------------------------------------
    setbv();
    setiv();

    //---------------------------------------------------------------------
    // perform the SSOR iterations
    //---------------------------------------------------------------------
    ssor(itmax);

    //---------------------------------------------------------------------
    // compute the solution error
    //---------------------------------------------------------------------
    error();

    //---------------------------------------------------------------------
    // compute the surface integral
    //---------------------------------------------------------------------
    pintgr();

    //---------------------------------------------------------------------
    // verification test
    //---------------------------------------------------------------------
    verify ( rsdnm, errnm, frc, &Class, &verified );
    mflops = (double)itmax * (1984.77 * (double)nx0
                              * (double)ny0
                              * (double)nz0
                              - 10923.3 * pow(((double)(nx0+ny0+nz0)/3.0), 2.0)
                              + 27770.9 * (double)(nx0+ny0+nz0)/3.0
                              - 144010.0)
             / (maxtime*1000000.0);

    print_results("LU", Class, nx0,
                  ny0, nz0, itmax,
                  maxtime, mflops, "          floating point", verified,
                  NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6,
                  "(none)");

    //---------------------------------------------------------------------
    // More timers
    //---------------------------------------------------------------------
    if (timeron) {
        for (i = 1; i <= t_last; i++) {
            trecs[i] = timer_read(i);
        }
        tmax = maxtime;
        if (tmax == 0.0) tmax = 1.0;

        printf("  SECTION     Time (secs)\n");
        for (i = 1; i <= t_last; i++) {
            printf("  %-8s:%9.3f  (%6.2f%%)\n",
                   t_names[i], trecs[i], trecs[i]*100./tmax);
            if (i == t_rhs) {
                t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz];
                printf("     --> %8s:%9.3f  (%6.2f%%)\n", "sub-rhs", t, t*100./tmax);
                t = trecs[i] - t;
                printf("     --> %8s:%9.3f  (%6.2f%%)\n", "rest-rhs", t, t*100./tmax);
            }
        }
    }

    return 0;
}
예제 #3
0
파일: lu.c 프로젝트: BillTheBest/RCCE
//c---------------------------------------------------------------------
//c
//c Authors: S. Weeratunga
//c          V. Venkatakrishnan
//c          E. Barszcz
//c          M. Yarrow
//c C-version: Rob Van der Wijngaart, Intel Corporation
//c
//c---------------------------------------------------------------------
// 
// Copyright 2010 Intel Corporation
// 
//    Licensed under the Apache License, Version 2.0 (the "License");
//    you may not use this file except in compliance with the License.
//    You may obtain a copy of the License at
// 
//        http://www.apache.org/licenses/LICENSE-2.0
// 
//    Unless required by applicable law or agreed to in writing, software
//    distributed under the License is distributed on an "AS IS" BASIS,
//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//    See the License for the specific language governing permissions and
//    limitations under the License.
// 
int RCCE_APP(int argc, char **argv){

//c---------------------------------------------------------------------
//c
//c   driver for the performance evaluation of the solver for
//c   five coupled parabolic/elliptic partial differential equations.
//c
//c---------------------------------------------------------------------



      char class;
      double mflops;
      int ierr, i, j, k, mm, iverified;

//c---------------------------------------------------------------------
//c   initialize communications
//c---------------------------------------------------------------------
       init_comm(&argc, &argv);
//       RCCE_debug_set(RCCE_DEBUG_SYNCH);
//c---------------------------------------------------------------------
//c   read input data
//c---------------------------------------------------------------------
       read_input();

//c---------------------------------------------------------------------
//c   set up processor grid
//c---------------------------------------------------------------------
       proc_grid();

//c---------------------------------------------------------------------
//c   determine the neighbors
//c---------------------------------------------------------------------
       neighbors();

//c---------------------------------------------------------------------
//c   set up sub-domain sizes
//c---------------------------------------------------------------------
       subdomain();

//c---------------------------------------------------------------------
//c   set up coefficients
//c---------------------------------------------------------------------
       setcoeff();

//c---------------------------------------------------------------------
//c   set the boundary values for dependent variables
//c---------------------------------------------------------------------

       setbv();

//c---------------------------------------------------------------------
//c   set the initial values for dependent variables
//c---------------------------------------------------------------------

       setiv();

//c---------------------------------------------------------------------
//c   compute the forcing term based on prescribed exact solution
//c---------------------------------------------------------------------
       erhs();

////c---------------------------------------------------------------------
////c   perform one SSOR iteration to touch all data and program pages 
////c---------------------------------------------------------------------
       ssor(1);

//
////c---------------------------------------------------------------------
////c   reset the boundary and initial values
////c---------------------------------------------------------------------
       setbv();
       setiv();
//
////c---------------------------------------------------------------------
////c   perform the SSOR iterations
////c---------------------------------------------------------------------
       ssor(itmax);

////c---------------------------------------------------------------------
////c   compute the solution error
////c---------------------------------------------------------------------
        error();

////c---------------------------------------------------------------------
////c   compute the surface integral
////c---------------------------------------------------------------------
      pintgr();
//
////c---------------------------------------------------------------------
////c   verification test
////c---------------------------------------------------------------------

      if (id ==0) {
        verify( rsdnm, errnm, &frc, &class );
         mflops = (double)(itmax)*(1984.77*(double)( nx0 )
              *(double)( ny0 )
              *(double)( nz0 )
              -10923.3*((double)( nx0+ny0+nz0 )/3.)*((double)( nx0+ny0+nz0 )/3.)
              +27770.9* (double)( nx0+ny0+nz0 )/3.
              -144010.)
              / (maxtime*1000000.);

          print_results("LU", &class, &nx0,
           &ny0, &nz0, &itmax, &nnodes_compiled,
           &num, &maxtime, &mflops, "          floating point", &iverified, 
           NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6);

//         FILE *perf_file;
//         char name[50] = "/shared/DEMOS/RCCE/NPB_LU/perf."; 
//         char postfix[50]; 
//         sprintf(postfix, "%d", nnodes_compiled); 
//         strcat(name, postfix); 
//         perf_file = fopen(name,"w"); 
//         fprintf(perf_file, "%d", (int)mflops); 
//         fclose(perf_file); 
      }      
예제 #4
0
파일: lu.c 프로젝트: ashwinma/multicl
int main(int argc, char *argv[])
{
  char Class;
  logical verified;
  double mflops;

  double t, tmax, trecs[t_last+1];
  int i;
  char *t_names[t_last+1];

  if (argc == 1) {
    fprintf(stderr, "Usage: %s <kernel directory>\n", argv[0]);
    exit(-1);
  }

  //---------------------------------------------------------------------
  // Setup info for timers
  //---------------------------------------------------------------------
  FILE *fp;
  if ((fp = fopen("timer.flag", "r")) != NULL) {
    timeron = true;
    t_names[t_total] = "total";
    t_names[t_rhsx] = "rhsx";
    t_names[t_rhsy] = "rhsy";
    t_names[t_rhsz] = "rhsz";
    t_names[t_rhs] = "rhs";
    t_names[t_jacld] = "jacld";
    t_names[t_blts] = "blts";
    t_names[t_jacu] = "jacu";
    t_names[t_buts] = "buts";
    t_names[t_add] = "add";
    t_names[t_l2norm] = "l2norm";

    t_names[t_setbv] = "setbv";
    t_names[t_setiv] = "setiv";
    t_names[t_erhs] = "erhs";
    t_names[t_error] = "error";
    t_names[t_pintgr] = "pintgr";
    t_names[t_blts1] = "blts1";
    t_names[t_buts1] = "buts1";
    fclose(fp);
  } else {
    timeron = false;
  }

  //---------------------------------------------------------------------
  // read input data
  //---------------------------------------------------------------------
  read_input();

  //---------------------------------------------------------------------
  // set up domain sizes
  //---------------------------------------------------------------------
  domain();

  //---------------------------------------------------------------------
  // set up OpenCL environment
  //---------------------------------------------------------------------
  setup_opencl(argc, argv);

  //---------------------------------------------------------------------
  // set up coefficients
  //---------------------------------------------------------------------
  setcoeff();

  //---------------------------------------------------------------------
  // set the boundary values for dependent variables
  //---------------------------------------------------------------------
  setbv();

  //---------------------------------------------------------------------
  // set the initial values for dependent variables
  //---------------------------------------------------------------------
  setiv();

  //---------------------------------------------------------------------
  // compute the forcing term based on prescribed exact solution
  //---------------------------------------------------------------------
  erhs();

  //---------------------------------------------------------------------
  // perform one SSOR iteration to touch all data pages
  //---------------------------------------------------------------------
  ssor(1);

  //---------------------------------------------------------------------
  // reset the boundary and initial values
  //---------------------------------------------------------------------
  setbv();
  setiv();

  //---------------------------------------------------------------------
  // perform the SSOR iterations
  //---------------------------------------------------------------------
  ssor(itmax);

  //---------------------------------------------------------------------
  // compute the solution error
  //---------------------------------------------------------------------
  error();

  //---------------------------------------------------------------------
  // compute the surface integral
  //---------------------------------------------------------------------
  pintgr();

  //---------------------------------------------------------------------
  // verification test
  //---------------------------------------------------------------------
  verify ( rsdnm, errnm, frc, &Class, &verified );
  mflops = (double)itmax * (1984.77 * (double)nx0
      * (double)ny0
      * (double)nz0
      - 10923.3 * pow(((double)(nx0+ny0+nz0)/3.0), 2.0) 
      + 27770.9 * (double)(nx0+ny0+nz0)/3.0
      - 144010.0)
    / (maxtime*1000000.0);

  c_print_results("LU", Class, nx0,
                  ny0, nz0, itmax,
                  maxtime, mflops, "          floating point", verified, 
                  NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, 
                  "(none)",
                  clu_GetDeviceTypeName(device_type),
                  device_name);

  //---------------------------------------------------------------------
  // More timers
  //---------------------------------------------------------------------
  if (timeron) {
    for (i = 1; i <= t_last; i++) {
      trecs[i] = timer_read(i);
    }
    tmax = maxtime;
    if (tmax == 0.0) tmax = 1.0;

    printf("  SECTION     Time (secs)\n");
    for (i = 1; i <= t_last; i++) {
      printf("  %-8s:%9.4f  (%6.2f%%)\n",
          t_names[i], trecs[i], trecs[i]*100./tmax);
      if (i == t_rhs) {
        t = trecs[t_rhsx] + trecs[t_rhsy] + trecs[t_rhsz];
        printf("     --> %8s:%9.3f  (%6.2f%%)\n", "sub-rhs", t, t*100./tmax);
        t = trecs[i] - t;
        printf("     --> %8s:%9.3f  (%6.2f%%)\n", "rest-rhs", t, t*100./tmax);
      }
    }
  }

  release_opencl();

  fflush(stdout);

  return 0;
}