示例#1
0
static inline void perform_local_work(void)
{
# ifdef TIME_WORKLOAD
    qtimer_t work_timer = qtimer_create();
    qtimer_start(work_timer);
# endif // TIME_WORKLOAD
    volatile unsigned long work = workload;
    long rand_per = (long)qtimer_fastrand();
    long rand_var = (long)qtimer_fastrand();

    rand_per = (rand_per<0) ? (-rand_per)%100 : rand_per%100;
    if (rand_per < workload_per) {
        rand_var = (rand_var<0) ? (-rand_var)%100 : rand_var%100;
        work += (workload * (workload_var * 0.01)) * (rand_var * 0.01);
    }

    for (int i = 0; i < work; i++) {
        work = work % 1000000000;
    }
    work++;
# ifdef TIME_WORKLOAD
    qtimer_stop(work_timer);
    fprintf(stdout, "Worked for %f\n", qtimer_secs(work_timer));
    qtimer_destroy(work_timer);
# endif // TIME_WORKLOAD
}
示例#2
0
int main(int argc, char *argv[])
{
    pthread_t rets[NUM_THREADS];
    qtimer_t timer = qtimer_create();
    double cumulative_time = 0.0;
    size_t counter;

    CHECK_VERBOSE();

    for (int iteration = 0; iteration < 10; iteration++) {
        qtimer_start(timer);
        for (int i = 0; i < NUM_THREADS; i++) {
            pthread_create(&(rets[i]), NULL, qincr, &counter);
        }
        for (int i = 0; i < NUM_THREADS; i++) {
            pthread_join(rets[i], NULL);
        }
        qtimer_stop(timer);
        iprintf("\ttest iteration %i: %f secs\n", iteration,
                qtimer_secs(timer));
        cumulative_time += qtimer_secs(timer);
    }
    printf("pthread time: %f\n", cumulative_time / 10.0);

    return 0;
}
示例#3
0
int main(int argc, char *argv[])
{
    aligned_t rets[NUM_THREADS];
    qtimer_t timer = qtimer_create();
    double cumulative_time = 0.0;

    if (qthread_initialize() != QTHREAD_SUCCESS) {
        fprintf(stderr, "qthread library could not be initialized!\n");
        exit(EXIT_FAILURE);
    }
    CHECK_VERBOSE();

    for (int iteration = 0; iteration < 10; iteration++) {
        qtimer_start(timer);
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_fork(qincr, NULL, &(rets[i]));
        }
        for (int i = 0; i < NUM_THREADS; i++) {
            qthread_readFF(NULL, &(rets[i]));
        }
        qtimer_stop(timer);
        iprintf("\ttest iteration %i: %f secs\n", iteration,
                qtimer_secs(timer));
        cumulative_time += qtimer_secs(timer);
    }
    printf("qthread time: %f\n", cumulative_time / 10.0);

    return 0;
}
void test_print_qthread(size_t i) {
  qtimer_t t = qtimer_create();
  qtimer_start(t);
  do {
    qthread_yield();
    qtimer_stop(t);
  } while(qtimer_secs(t) < 1);
  qtimer_destroy(t);
  //std::cout << i << "\n";
}
示例#5
0
int main(int   argc,
         char *argv[])
{
    uint64_t      count    = 1048576;
    int           par_fork = 0;
    unsigned long threads  = 1;

    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    NUMARG(count, "MT_COUNT");
    NUMARG(par_fork, "MT_PAR_FORK");
    assert(0 != count);

#pragma omp parallel
#pragma omp single
    {
        timer   = qtimer_create();
        threads = omp_get_num_threads();

        if (par_fork) {
            qtimer_start(timer);
#pragma omp parallel for
            for (uint64_t i = 0; i < count; i++) {
#pragma omp task untied
                null_task(NULL);
            }
        } else {
            qtimer_start(timer);
#pragma omp task untied
            for (uint64_t i = 0; i < count; i++) {
#pragma omp task untied
                null_task(NULL);
            }
        }

#pragma omp taskwait

        qtimer_stop(timer);
    }

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

    printf("%lu %lu %f\n",
           threads,
           (unsigned long)count,
           total_time);

    return 0;
}
示例#6
0
文件: qtimer.c 项目: Agobin/chapel
int main(int   argc,
         char *argv[])
{
    qtimer_t t;

    assert(qthread_initialize() == QTHREAD_SUCCESS);

    CHECK_VERBOSE();

    t = qtimer_create();
    assert(t);
    qtimer_start(t);
    qtimer_stop(t);
    if (qtimer_secs(t) == 0) {
        fprintf(stderr, "qtimer_secs(t) reported zero length time.\n");
    } else if (qtimer_secs(t) < 0) {
        fprintf(stderr, "qtimer_secs(t) thinks time went backwards (%g).\n",
                qtimer_secs(t));
    }
    iprintf("time to find self and assert it: %g secs\n", qtimer_secs(t));

    qtimer_start(t);
    qtimer_stop(t);
    assert(qtimer_secs(t) >= 0.0);
    if (qtimer_secs(t) == 0.0) {
        iprintf("inlining reduces calltime to zero (apparently)\n");
    } else {
        iprintf("smallest measurable time: %g secs\n", qtimer_secs(t));
    }

    qtimer_destroy(t);

    // Now to test fastrand
    ks_test();
    runs();
    autocorrelation();

    qthread_finalize();

    return 0;
}
示例#7
0
int main(int argc, char *argv[])
{
    int n = 10;
    int m = 10;
    num_timesteps = 10;
    workload = 0;
    workload_per = 0;
    workload_var = 0;
    int print_final = 0;
    int alltime = 0;

    CHECK_VERBOSE();
    NUMARG(n, "N");
    NUMARG(m, "M");
    NUMARG(num_timesteps, "TIMESTEPS");
    NUMARG(workload, "WORKLOAD");
    NUMARG(workload_per, "WORKLOAD_PER");
    NUMARG(workload_var, "WORKLOAD_VAR");
    NUMARG(print_final, "PRINT_FINAL");
    NUMARG(alltime, "ALL_TIME");

    assert (n > 0 && m > 0);

    // Initialize Qthreads
    assert(qthread_initialize() == 0);

    qtimer_t alloc_timer = qtimer_create();
    qtimer_t init_timer = qtimer_create();
    qtimer_t exec_timer = qtimer_create();

    // Allocate memory for 3-stage stencil (with boundary padding)
    qtimer_start(alloc_timer);
    stencil_t points;
    points.N = n + 2;
    points.M = m + 2;

    for (int s = 0; s < NUM_STAGES; s++) {
        points.stage[s] = malloc(points.N*sizeof(aligned_t *));
        assert(NULL != points.stage[s]);
        for (int i = 0; i < points.N; i++) {
            points.stage[s][i] = calloc(points.M, sizeof(aligned_t));
            assert(NULL != points.stage[s][i]);
        }
    }
    qtimer_stop(alloc_timer);

    // Initialize first stage and set boundary conditions
    qtimer_start(init_timer);
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            qthread_writeF_const(&points.stage[0][i][j], 0);
            for (int s = 1; s < NUM_STAGES; s++)
                qthread_empty(&points.stage[s][i][j]);
        }
    }
    for (int i = 0; i < points.N; i++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][i][0], BOUNDARY);
            qthread_writeF_const(&points.stage[s][i][points.M-1], BOUNDARY);
#else
            points.stage[s][i][0] = BOUNDARY;
            points.stage[s][i][points.M-1] = BOUNDARY;
#endif
        }
    }
    for (int j = 0; j < points.M; j++) {
        for (int s = 0; s < NUM_STAGES; s++) {
#ifdef BOUNDARY_SYNC
            qthread_writeF_const(&points.stage[s][0][j], BOUNDARY);
            qthread_writeF_const(&points.stage[s][points.N-1][j], BOUNDARY);
#else
            points.stage[s][0][j] = BOUNDARY;
            points.stage[s][points.N-1][j] = BOUNDARY;
#endif
        }
    }
    qtimer_stop(init_timer);

    // Create barrier to synchronize on completion of calculations
    qtimer_start(exec_timer);
    points.barrier = qt_feb_barrier_create(n*m+1);

    // Spawn tasks to start calculating updates at each point
    update_args_t args = {&points, -1, -1, 1, 1};
    for (int i = 1; i < points.N-1; i++) {
        for (int j = 1; j < points.M-1; j++) {
            args.i = i;
            args.j = j;
            qthread_fork_syncvar_copyargs(update, &args, sizeof(update_args_t), NULL);
        }
    }

    // Wait for calculations to finish
    qt_feb_barrier_enter(points.barrier);
    qtimer_stop(exec_timer);

    // Print timing info
    if (alltime) {
        fprintf(stderr, "Allocation time: %f\n", qtimer_secs(alloc_timer));
        fprintf(stderr, "Initialization time: %f\n", qtimer_secs(init_timer));
        fprintf(stderr, "Execution time: %f\n", qtimer_secs(exec_timer));
    } else {
        fprintf(stdout, "%f\n", qtimer_secs(exec_timer));
    }

    // Print stencils
    if (print_final) {
        size_t final = (num_timesteps % NUM_STAGES);
        iprintf("Stage %lu:\n", prev_stage(prev_stage(final)));
        print_stage(&points, prev_stage(prev_stage(final)));
        iprintf("\nStage %lu:\n", prev_stage(final));
        print_stage(&points, prev_stage(final));
        iprintf("\nStage %lu:\n", final);
        print_stage(&points, final);
    }

    qt_feb_barrier_destroy(points.barrier);
    qtimer_destroy(alloc_timer);
    qtimer_destroy(init_timer);
    qtimer_destroy(exec_timer);

    // Free allocated memory
    for (int i = 0; i < points.N; i++) {
        free(points.stage[0][i]);
        free(points.stage[1][i]);
        free(points.stage[2][i]);
    }
    free(points.stage[0]);
    free(points.stage[1]);
    free(points.stage[2]);

    return 0;
}
int
  main(int argc, char * argv[])
{
  RT_MODEL * S;
  const char * status;
  int_T count;
  int exit_code = exit_success;
  boolean_T parseError = FALSE;
  real_T final_time = -2;              /* Let model select final time */
  int scheduling_priority;
  struct qsched_param scheduling;
  t_period timeout;
  t_timer_notify notify;
  t_error result;

  /*
   * Make controller threads higher priority than external mode threads:
   *   ext_priority = priority of lowest priority external mode thread
   *   min_priority = minimum allowable priority of lowest priority model task
   *   max_priority = maximum allowable priority of lowest priority model task
   */
  int ext_priority = qsched_get_priority_min(QSCHED_FIFO);
  int min_priority = ext_priority + 2;
  int max_priority = qsched_get_priority_max(QSCHED_FIFO) - 0;
  qsigset_t signal_set;
  qsigaction_t action;
  int_T stack_size = 0;                /* default stack size */
  (void) ssPrintf("Entered main(argc=%d, argv=%p)\n", argc, argv);
  for (count = 0; count < argc; count++) {
    (void) ssPrintf("  argv[%d] = %s\n", count, argv[count]);
  }

  scheduling_priority = 2;             /* default priority */
  if (scheduling_priority < min_priority)
    scheduling_priority = min_priority;
  else if (scheduling_priority > max_priority)
    scheduling_priority = max_priority;

  /*
   * Parse the standard RTW parameters.  Let all unrecognized parameters
   * pass through to external mode for parsing.  NULL out all args handled
   * so that the external mode parsing can ignore them.
   */
  for (count = 1; count < argc; ) {
    const char *option = argv[count++];
    char extraneous_characters[2];
    if ((strcmp(option, "-tf") == 0) && (count != argc)) {/* final time */
      const char * tf_argument = argv[count++];
      double time_value;               /* use a double for the sscanf since real_T may be a float or a double depending on the platform */
      if (strcmp(tf_argument, "inf") == 0) {
        time_value = RUN_FOREVER;
      } else {
        int items = sscanf(tf_argument, "%lf%1s", &time_value,
                           extraneous_characters);
        if ((items != 1) || (time_value < 0.0) ) {
          (void) fprintf(stderr,
                         "final_time must be a positive, real value or inf.\n");
          parseError = true;
          break;
        }
      }

      final_time = (real_T) time_value;
      argv[count-2] = NULL;
      argv[count-1] = NULL;
    } else if ((strcmp(option, "-pri") == 0) && (count != argc)) {/* base priority */
      const char * tf_argument = argv[count++];
      int priority;                    /* use an int for the sscanf since int_T may be the wrong size depending on the platform */
      int items = sscanf(tf_argument, "%d%1s", &priority, extraneous_characters);
      if ((items != 1) || (priority < min_priority) ) {
        (void) fprintf(stderr,
                       "priority must be a greater than or equal to %d.\n",
                       min_priority);
        parseError = true;
        break;
      }

      if (priority > max_priority) {
        (void) fprintf(stderr, "priority must be less than or equal to %d.\n",
                       max_priority);
        parseError = true;
        break;
      }

      scheduling_priority = priority;
      argv[count-2] = NULL;
      argv[count-1] = NULL;
    } else if ((strcmp(option, "-ss") == 0) && (count != argc)) {/* stack size */
      const char * stack_argument = argv[count++];
      int stack;                       /* use an int for the sscanf since int_T may be the wrong size depending on the platform */
      int items = sscanf(stack_argument, "%d%1s", &stack, extraneous_characters);
      if ((items != 1) || (stack < QTHREAD_STACK_MIN) ) {
        (void) fprintf(stderr,
                       "stack size must be a integral value greater than or equal to %d.\n",
                       QTHREAD_STACK_MIN);
        parseError = true;
        break;
      }

      stack_size = (int_T)stack;
      argv[count-2] = NULL;
      argv[count-1] = NULL;
    } else if ((strcmp(option, "-d") == 0) && (count != argc)) {/* current directory */
      const char * path_name = argv[count++];
      _chdir(path_name);
      argv[count-2] = NULL;
      argv[count-1] = NULL;
    }
  }

  rtExtModeQuarcParseArgs(argc, (const char **) argv, "shmem://Crane:1");

  /*
   * Check for unprocessed ("unhandled") args.
   */
  for (count = 1; count < argc; count++) {
    if (argv[count] != NULL) {
      (void) fprintf(stderr, "Unexpected command line argument: \"%s\".\n",
                     argv[count]);
      parseError = TRUE;
    }
  }

  if (parseError) {
    (void) fprintf(stderr,
                   "\nUsage: Crane -option1 val1 -option2 val2 -option3 ...\n\n");
    (void) fprintf(stderr,
                   "\t-tf  20               - sets final time to 20 seconds\n");
    (void) fprintf(stderr,
                   "\t-d   C:\\data          - sets current directory to C:\\data\n");
    (void) fprintf(stderr,
                   "\t-pri 5                - sets the minimum thread priority\n");
    (void) fprintf(stderr,
                   "\t-ss  65536            - sets the stack size for model threads\n");
    (void) fprintf(stderr,
                   "\t-w                    - wait for host to connect before starting\n");
    (void) fprintf(stderr,
                   "\t-uri shmem://mymodel  - set external mode URL to \"shmem://mymodel\"\n");
    (void) fprintf(stderr, "\n");
    return (exit_failure);
  }

  /****************************
   * Initialize global memory *
   ****************************/
  (void)memset(&GBLbuf, 0, sizeof(GBLbuf));

  /************************
   * Initialize the model *
   ************************/
  rt_InitInfAndNaN(sizeof(real_T));
  S = Crane();
  if (rtmGetErrorStatus(S) != NULL) {
    (void) fprintf(stderr, "Error during model registration: %s\n",
                   rtmGetErrorStatus(S));
    return (exit_failure);
  }

  if (final_time >= 0.0 || final_time == RUN_FOREVER) {
    rtmSetTFinal(S, final_time);
  } else {
    rtmSetTFinal(S, rtInf);
  }

  action.sa_handler = control_c_handler;
  action.sa_flags = 0;
  qsigemptyset(&action.sa_mask);
  qsigaction(SIGINT, &action, NULL);
  qsigaction(SIGBREAK, &action, NULL);
  qsigemptyset(&signal_set);
  qsigaddset(&signal_set, SIGINT);
  qsigaddset(&signal_set, SIGBREAK);
  qthread_sigmask(QSIG_UNBLOCK, &signal_set, NULL);
  initialize_sizes(S);
  initialize_sample_times(S);
  status = rt_SimInitTimingEngine(rtmGetNumSampleTimes(S),
    rtmGetStepSize(S),
    rtmGetSampleTimePtr(S),
    rtmGetOffsetTimePtr(S),
    rtmGetSampleHitPtr(S),
    rtmGetSampleTimeTaskIDPtr(S),
    rtmGetTStart(S),
    &rtmGetSimTimeStep(S),
    &rtmGetTimingData(S));
  if (status != NULL) {
    (void) fprintf(stderr, "Failed to initialize sample time engine: %s\n",
                   status);
    return (exit_failure);
  }

  rt_CreateIntegrationData(S);
  fflush(stdout);
  if (rtExtModeQuarcStartup(rtmGetRTWExtModeInfo(S),
       rtmGetNumSampleTimes(S),
       &rtmGetStopRequested(S),
       ext_priority,                   /* external mode thread priority */
       stack_size,
       SS_HAVESTDIO)) {
    (void) ssPrintf("\n** starting the model **\n");
    start(S);
    if (rtmGetErrorStatus(S) == NULL) {
      /*************************************************************************
       * Execute the model.
       *************************************************************************/
      if (rtmGetTFinal(S) == RUN_FOREVER) {
        (void) ssPrintf("\n**May run forever. Model stop time set to infinity.**\n");
      }

      timeout.seconds = (t_long) (rtmGetStepSize(S));
      timeout.nanoseconds = (t_int) ((rtmGetStepSize(S) - timeout.seconds) *
        1000000000L);
      result = qtimer_event_create(&notify.notify_value.event);
      if (result == 0) {
        t_timer timer;
        scheduling.sched_priority = scheduling_priority;
        qthread_setschedparam(qthread_self(), QSCHED_FIFO, &scheduling);
        notify.notify_type = TIMER_NOTIFY_EVENT;
        result = qtimer_create(&notify, &timer);
        if (result == 0) {
          result = qtimer_begin_resolution(timer, &timeout);
          if (result == 0) {
            t_period actual_timeout;
            (void) ssPrintf("Creating main thread with priority %d and period %g...\n",
                            scheduling_priority, rtmGetStepSize(S));
            result = qtimer_get_actual_period(timer, &timeout, &actual_timeout);
            if (result == 0 && (timeout.nanoseconds !=
                                actual_timeout.nanoseconds || timeout.seconds !=
                                actual_timeout.seconds))
              (void) ssPrintf("*** Actual period will be %g ***\n",
                              actual_timeout.seconds + 1e-9 *
                              actual_timeout.nanoseconds);
            fflush(stdout);
            result = qtimer_set_time(timer, &timeout, true);
            if (result == 0) {
              /* Enter the periodic loop */
              while (result == 0) {
                if (GBLbuf.stopExecutionFlag || rtmGetStopRequested(S)) {
                  break;
                }

                if (rtmGetTFinal(S) != RUN_FOREVER && rtmGetTFinal(S) - rtmGetT
                    (S) <= rtmGetT(S)*DBL_EPSILON) {
                  break;
                }

                if (qtimer_get_overrun(timer) > 0) {
                  (void) fprintf(stderr,
                                 "Sampling rate is too fast for base rate\n");
                  fflush(stderr);
                }

                rt_OneStep(S);
                result = qtimer_event_wait(notify.notify_value.event);
              }

              /* disarm the timer */
              qtimer_cancel(timer);
              if (rtmGetStopRequested(S) == false && rtmGetErrorStatus(S) ==
                  NULL) {
                /* Execute model last time step if final time expired */
                rt_OneStep(S);
              }

              (void) ssPrintf("Main thread exited\n");
            } else {
              msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof
                (GBLbuf.submessage));
              string_format(GBLbuf.message, sizeof(GBLbuf.message),
                            "Unable to set base rate. %s", GBLbuf.submessage);
              rtmSetErrorStatus(S, GBLbuf.message);
            }

            qtimer_end_resolution(timer);
          } else {
            msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof
              (GBLbuf.submessage));
            string_format(GBLbuf.message, sizeof(GBLbuf.message),
                          "Sampling period of %lg is too fast for the system clock. %s",
                          rtmGetStepSize(S), GBLbuf.submessage);
            rtmSetErrorStatus(S, GBLbuf.message);
          }

          qtimer_delete(timer);
        } else {
          msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof
            (GBLbuf.submessage));
          string_format(GBLbuf.message, sizeof(GBLbuf.message),
                        "Unable to create timer for base rate. %s",
                        GBLbuf.submessage);
          rtmSetErrorStatus(S, GBLbuf.message);
        }
      } else {
        msg_get_error_messageA(NULL, result, GBLbuf.submessage, sizeof
          (GBLbuf.submessage));
        string_format(GBLbuf.message, sizeof(GBLbuf.message),
                      "Unable to create timer event for base rate. %s",
                      GBLbuf.submessage);
        rtmSetErrorStatus(S, GBLbuf.message);
      }

      GBLbuf.stopExecutionFlag = 1;
    }
  } else {
    rtmSetErrorStatus(S, "Unable to initialize external mode.");
  }

  rtExtSetReturnStatus(rtmGetErrorStatus(S));
  rtExtModeQuarcCleanup(rtmGetNumSampleTimes(S));

  /********************
   * Cleanup and exit *
   ********************/
  if (rtmGetErrorStatus(S) != NULL) {
    (void) fprintf(stderr, "%s\n", rtmGetErrorStatus(S));
    exit_code = exit_failure;
  }

  (void) ssPrintf("Invoking model termination function...\n");
  terminate(S);
  (void) ssPrintf("Exiting real-time code\n");
  return (exit_code);
}
示例#9
0
int main(int argc, char *argv[])
{
    aligned_t *ui_array, *ui_array2;
    double *d_array, *d_array2;
    size_t len = 1000000;
    qtimer_t timer = qtimer_create();
    double cumulative_time_qutil = 0.0;
    double cumulative_time_libc = 0.0;
    int using_doubles = 0;
    unsigned long iterations = 10;

    qthread_initialize();

    CHECK_VERBOSE();
    printf("%i threads\n", (int)qthread_num_workers());
    NUMARG(len, "TEST_LEN");
    NUMARG(iterations, "TEST_ITERATIONS");
    NUMARG(using_doubles, "TEST_USING_DOUBLES");
    printf("using %s\n", using_doubles ? "doubles" : "aligned_ts");

    if (using_doubles) {
        d_array = calloc(len, sizeof(double));
	printf("array is %s\n", human_readable(len * sizeof(double)));
        assert(d_array);
        // madvise(d_array,len*sizeof(double), MADV_SEQUENTIAL);
        for (unsigned int i = 0; i < len; i++) {
            d_array[i] = ((double)random()) / ((double)RAND_MAX) + random();
        }
        d_array2 = calloc(len, sizeof(double));
        assert(d_array2);
        // madvise(d_array2,len*sizeof(double), MADV_RANDOM);
        iprintf("double array generated...\n");
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qutil_qsort(d_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with qutil took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
        cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu doubles with qutil took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_qutil);
        for (unsigned int i = 0; i < iterations; i++) {
            memcpy(d_array2, d_array, len * sizeof(double));
            qtimer_start(timer);
            qsort(d_array2, len, sizeof(double), dcmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
            iprintf("\t%u: sorting %lu doubles with libc took: %f seconds\n",
                    i, (unsigned long)len, qtimer_secs(timer));
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu doubles with libc took: %f seconds\n",
               (unsigned long)len, cumulative_time_libc);
        free(d_array);
        free(d_array2);
    } else {
        ui_array = calloc(len, sizeof(aligned_t));
	printf("array is %s\n", human_readable(len * sizeof(aligned_t)));
        for (unsigned int i = 0; i < len; i++) {
            ui_array[i] = random();
        }
        ui_array2 = calloc(len, sizeof(aligned_t));
        iprintf("ui_array generated...\n");
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qutil_aligned_qsort(ui_array2, len);
            qtimer_stop(timer);
            cumulative_time_qutil += qtimer_secs(timer);
        }
	cumulative_time_qutil /= (double)iterations;
        printf("sorting %lu aligned_ts with qutil took: %f seconds\n",
               (unsigned long)len, cumulative_time_qutil);
        for (int i = 0; i < iterations; i++) {
            memcpy(ui_array2, ui_array, len * sizeof(aligned_t));
            qtimer_start(timer);
            qsort(ui_array2, len, sizeof(double), acmp);
            qtimer_stop(timer);
            cumulative_time_libc += qtimer_secs(timer);
        }
	cumulative_time_libc /= (double)iterations;
        printf("sorting %lu aligned_ts with libc took: %f seconds (avg)\n",
               (unsigned long)len, cumulative_time_libc);
        free(ui_array);
        free(ui_array2);
    }
    if (cumulative_time_qutil < cumulative_time_libc) {
	printf("qutil with %lu threads provides a %0.2fx speedup.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    } else {
	printf("qutil with %lu threads provides a %0.2fx slowdown.\n", (unsigned long)qthread_num_shepherds(), cumulative_time_libc/cumulative_time_qutil);
    }

    qtimer_destroy(timer);

    return 0;
}
示例#10
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#pragma omp parallel
#pragma omp single
#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
#pragma omp parallel
#pragma omp single nowait
#pragma omp task untied
    retval = visit(&root, root.num_children);

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / omp_get_num_threads());
#endif /* ifdef PRINT_STATS */

    return 0;
}
示例#11
0
int main(int   argc,
         char *argv[])
{
    size_t       threads = 1000, i;
    aligned_t   *rets;
    qtimer_t     t;
    unsigned int iter, iterations = 10;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(threads, "THREADS");
    NUMARG(iterations, "ITERATIONS");

    initme = (aligned_t *)calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = (aligned_t *)malloc(iterations * threads * sizeof(aligned_t));
    assert(rets);

    iprintf("creating the barrier for %zu threads\n", threads + 1);
    wait_on_me = qt_feb_barrier_create(threads + 1);    // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 0; i < threads; i++) {
            qthread_fork(barrier_thread, wait_on_me, rets + (iter * threads) + i);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_feb_barrier_enter(wait_on_me);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));

        initme_idx = 0;

        for (i = 0; i < threads; i++) {
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Destroying barrier...\n");
    qt_feb_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    /* this loop shouldn't be necessary... but seems to avoid crashes in rare
     * cases (in other words there must a race condition in qthread_finalize()
     * if there are outstanding threads out there) */
    for (i = 0; i < threads * 2; i++) {
        aligned_t tmp = 1;
        qthread_readFF(&tmp, rets + i);
        assert(tmp == 0);
    }
    return 0;
}
示例#12
0
int main(int   argc,
         char *argv[])
{
    size_t     threads, i;
    aligned_t *rets;
    qtimer_t   t;
    unsigned int iter, iterations = 10;
    double tot = 0.0;

    assert(qthread_initialize() == 0);
    t = qtimer_create();

    CHECK_VERBOSE();
    NUMARG(iterations, "ITERATIONS");

    threads = qthread_num_workers();
    iprintf("%i shepherds...\n", qthread_num_shepherds());
    iprintf("%i threads...\n", (int)threads);

    initme = calloc(threads, sizeof(aligned_t));
    assert(initme);

    rets = malloc(threads * sizeof(aligned_t));
    assert(rets);

    iprintf("Creating a barrier to block %i threads\n", threads);
    wait_on_me = qt_barrier_create(threads, REGION_BARRIER, 0);     // all my spawnees plus me
    assert(wait_on_me);

    for (iter = 0; iter < iterations; iter++) {
        iprintf("%i: forking the threads\n", iter);
        for (i = 1; i < threads; i++) {
            void *arg[2] = {wait_on_me, (void*)(intptr_t)i};
            qthread_spawn(barrier_thread, arg, sizeof(void*)*2, rets + i, 0, NULL, i, 0);
        }
        iprintf("%i: done forking the threads, entering the barrier\n", iter);
        qtimer_start(t);
        qt_barrier_enter(wait_on_me, 0);
        qtimer_stop(t);
        iprintf("%i: main thread exited barrier in %f seconds\n", iter, qtimer_secs(t));
        tot += qtimer_secs(t);

        // reset
        initme_idx = 1;

        // check retvals
        for (i = 1; i < threads; i++) {
            qthread_readFF(NULL, rets + i);
            if (initme[i] != iter + 1) {
                iprintf("initme[%i] = %i (should be %i)\n", (int)i,
                        (int)initme[i], iter + 1);
            }
            assert(initme[i] == iter + 1);
        }
    }

    iprintf("Average barrier time = %f\n", tot / iterations);

    iprintf("Destroying the barrier...\n");
    qt_barrier_destroy(wait_on_me);

    iprintf("Success!\n");

    return 0;
}
示例#13
0
文件: uts_cilk.c 项目: Agobin/chapel
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_TREE_TYPE");
        tree_type = (tree_t)tmp;
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    {
        unsigned long tmp = 0;
        NUMARG(tmp, "UTS_SHAPE_FN");
        shape_fn = (shape_t)tmp;
    }
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);

    nodecount = 1;
    long retval;
    {
        retval = _Cilk_spawn visit(root);

        _Cilk_sync;
    }

    total_num_nodes = retval;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    LOG_UTS_RESULTS_YAML(total_num_nodes, total_time)
    LOG_ENV_CILK_YAML()
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / __cilkrts_get_nworkers());
#endif /* ifdef PRINT_STATS */

    return 0;
}
示例#14
0
int main(int   argc,
         char *argv[])
{
    uint64_t total_num_nodes = 0;
    qtimer_t timer;
    double   total_time = 0.0;

    CHECK_VERBOSE();

    {
        unsigned int tmp = (unsigned int)tree_type;
        NUMARG(tmp, "UTS_TREE_TYPE");
        if (tmp <= BALANCED) {
            tree_type = (tree_t)tmp;
        } else {
            fprintf(stderr, "invalid tree type\n");
            return EXIT_FAILURE;
        }
        tmp = (unsigned int)shape_fn;
        NUMARG(tmp, "UTS_SHAPE_FN");
        if (tmp <= FIXED) {
            shape_fn = (shape_t)tmp;
        } else {
            fprintf(stderr, "invalid shape function\n");
            return EXIT_FAILURE;
        }
    }
    DBLARG(bf_0, "UTS_BF_0");
    NUMARG(root_seed, "UTS_ROOT_SEED");
    NUMARG(tree_depth, "UTS_TREE_DEPTH");
    DBLARG(non_leaf_prob, "UTS_NON_LEAF_PROB");
    NUMARG(non_leaf_bf, "UTS_NON_LEAF_NUM");
    NUMARG(shift_depth, "UTS_SHIFT_DEPTH");
    NUMARG(num_samples, "UTS_NUM_SAMPLES");

    // If the operator did not attempt to set a stack size, force
    // a reasonable lower bound
    if (!getenv("QT_STACK_SIZE") && !getenv("QTHREAD_STACK_SIZE"))
        setenv("QT_STACK_SIZE", "32768", 0);

    assert(qthread_initialize() == 0);

#ifdef PRINT_STATS
    print_stats();
#else
    print_banner();
#endif

    timer = qtimer_create();
    qtimer_start(timer);

    node_t root;
    root.height = 0;
    rng_init(root.state.state, root_seed);
    root.num_children = calc_num_children(&root);
    aligned_t donecount = 0;
    root.dc = &donecount;
    qthread_empty(&donecount);
    aligned_t tot = 0;
    root.acc = &tot;
    root.expect = 1;

    qthread_fork_syncvar(visit, &root, NULL);
    qthread_readFF(NULL, root.dc);
    total_num_nodes = tot;

    qtimer_stop(timer);

    total_time = qtimer_secs(timer);

    qtimer_destroy(timer);

#ifdef PRINT_STATS
    printf("tree-size %lu\ntree-depth %d\nnum-leaves %llu\nperc-leaves %.2f\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("exec-time %.3f\ntotal-perf %.0f\npu-perf %.0f\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#else
    printf("Tree size = %lu, tree depth = %d, num leaves = %llu (%.2f%%)\n",
           (unsigned long)total_num_nodes,
           (int)tree_height,
           (unsigned long long)num_leaves,
           num_leaves / (float)total_num_nodes * 100.0);
    printf("Wallclock time = %.3f sec, performance = %.0f "
           "nodes/sec (%.0f nodes/sec per PE)\n\n",
           total_time,
           total_num_nodes / total_time,
           total_num_nodes / total_time / qthread_num_workers());
#endif /* ifdef PRINT_STATS */

    return 0;
}