Beispiel #1
0
/*---------------------------------------------------------------------------*/
na_return_t
NA_Progress(na_class_t *na_class, na_context_t *context, unsigned int timeout)
{
    struct na_private_context *na_private_context =
            (struct na_private_context *) context;
    double remaining = timeout / 1000.0; /* Convert timeout in ms into seconds */
    na_return_t ret = NA_SUCCESS;

    if (!na_class) {
        NA_LOG_ERROR("NULL NA class");
        ret = NA_INVALID_PARAM;
        goto done;
    }
    if (!context) {
        NA_LOG_ERROR("NULL context");
        ret = NA_INVALID_PARAM;
        goto done;
    }
    if (!na_class->progress) {
        NA_LOG_ERROR("progress plugin callback is not defined");
        ret = NA_PROTOCOL_ERROR;
        goto done;
    }

    /* TODO option for concurrent progress */

    /* Prevent multiple threads from concurrently calling progress on the same
     * context */
    hg_thread_mutex_lock(&na_private_context->progress_mutex);

    while (na_private_context->progressing) {
        hg_time_t t1, t2;

        if (remaining <= 0) {
            /* Timeout is 0 so leave */
            hg_thread_mutex_unlock(&na_private_context->progress_mutex);
            ret = NA_TIMEOUT;
            goto done;
        }

        hg_time_get_current(&t1);

        if (hg_thread_cond_timedwait(&na_private_context->progress_cond,
                &na_private_context->progress_mutex,
                (unsigned int) (remaining * 1000)) != HG_UTIL_SUCCESS) {
            /* Timeout occurred so leave */
            hg_thread_mutex_unlock(&na_private_context->progress_mutex);
            ret = NA_TIMEOUT;
            goto done;
        }

        hg_time_get_current(&t2);
        remaining -= hg_time_to_double(hg_time_subtract(t2, t1));
        if (remaining < 0) {
            /* Give a chance to call progress with timeout of 0 if
             * progressing is NA_FALSE */
            remaining = 0;
        }
    }
    na_private_context->progressing = NA_TRUE;

    hg_thread_mutex_unlock(&na_private_context->progress_mutex);

    /* Try to make progress for remaining time */
    ret = na_class->progress(na_class, context,
            (unsigned int) (remaining * 1000));

    hg_thread_mutex_lock(&na_private_context->progress_mutex);

    /* At this point, either progress succeeded or failed with NA_TIMEOUT,
     * meaning remaining time is now 0, so wake up other threads waiting */
    na_private_context->progressing = NA_FALSE;
    hg_thread_cond_signal(&na_private_context->progress_cond);

    hg_thread_mutex_unlock(&na_private_context->progress_mutex);

done:
    return ret;
}
Beispiel #2
0
/*---------------------------------------------------------------------------*/
static na_return_t
na_test_measure_latency(struct na_test_lat_info *na_test_lat_info,
    na_size_t size)
{
    char *send_buf = NULL, *recv_buf = NULL;
    void *send_buf_data, *recv_buf_data;
    size_t loop = (size_t) na_test_lat_info->na_test_info.loop * 100;
    size_t skip = SMALL_SKIP;
    na_op_id_t send_op_id;
    na_op_id_t recv_op_id;
    hg_request_t *recv_request = NULL;
    na_size_t unexpected_header_size =
        NA_Msg_get_unexpected_header_size(na_test_lat_info->na_class);
    na_size_t buf_size =
        size < unexpected_header_size ? unexpected_header_size : size;
    size_t avg_iter;
    double time_read = 0, read_lat;
    na_return_t ret = NA_SUCCESS;
    size_t i;

    /* Prepare send_buf */
    if (buf_size == unexpected_header_size)
        buf_size++;
    send_buf = NA_Msg_buf_alloc(na_test_lat_info->na_class, buf_size,
        &send_buf_data);
    NA_Msg_init_unexpected(na_test_lat_info->na_class, send_buf, buf_size);
    for (i = unexpected_header_size; i < buf_size; i++)
        send_buf[i] = (char) i;

    /* Prepare recv buf */
    recv_buf = NA_Msg_buf_alloc(na_test_lat_info->na_class, buf_size,
        &recv_buf_data);
    memset(recv_buf, 0, buf_size);

    /* Create operation IDs */
    send_op_id = NA_Op_create(na_test_lat_info->na_class);
    recv_op_id = NA_Op_create(na_test_lat_info->na_class);

    recv_request = hg_request_create(na_test_lat_info->request_class);

    /* Warm up */
    for (i = 0; i < skip; i++) {
        /* Post recv */
        ret = NA_Msg_recv_expected(na_test_lat_info->na_class,
            na_test_lat_info->context, na_test_recv_expected_cb, recv_request,
            recv_buf, buf_size, recv_buf_data, na_test_lat_info->target_addr, 0,
            0, &recv_op_id);
        if (ret != NA_SUCCESS) {
            NA_LOG_ERROR("NA_Msg_recv_expected() failed");
            goto done;
        }

        /* Post send */
        ret = NA_Msg_send_unexpected(na_test_lat_info->na_class,
            na_test_lat_info->context, NULL, NULL, send_buf, buf_size,
            send_buf_data, na_test_lat_info->target_addr, 0, 0, &send_op_id);
        if (ret != NA_SUCCESS) {
            NA_LOG_ERROR("NA_Msg_send_unexpected() failed");
            goto done;
        }

        hg_request_wait(recv_request, NA_MAX_IDLE_TIME, NULL);
        hg_request_reset(recv_request);
    }

    NA_Test_barrier(&na_test_lat_info->na_test_info);

    /* Actual benchmark */
    for (avg_iter = 0; avg_iter < loop; avg_iter++) {
        hg_time_t t1, t2;

        hg_time_get_current(&t1);

        /* Post recv */
        ret = NA_Msg_recv_expected(na_test_lat_info->na_class,
            na_test_lat_info->context, na_test_recv_expected_cb, recv_request,
            recv_buf, buf_size, recv_buf_data, na_test_lat_info->target_addr, 0,
            1, &recv_op_id);
        if (ret != NA_SUCCESS) {
            NA_LOG_ERROR("NA_Msg_recv_expected() failed");
            goto done;
        }

        /* Post send */
        ret = NA_Msg_send_unexpected(na_test_lat_info->na_class,
            na_test_lat_info->context, NULL, NULL, send_buf, buf_size,
            send_buf_data, na_test_lat_info->target_addr, 0, 1,
            &send_op_id);
        if (ret != NA_SUCCESS) {
            NA_LOG_ERROR("NA_Msg_send_unexpected() failed");
            goto done;
        }

        hg_request_wait(recv_request, NA_MAX_IDLE_TIME, NULL);
        NA_Test_barrier(&na_test_lat_info->na_test_info);
        hg_time_get_current(&t2);
        time_read += hg_time_to_double(hg_time_subtract(t2, t1));

        hg_request_reset(recv_request);

#ifdef MERCURY_TESTING_HAS_VERIFY_DATA
    /* Check recv buf */
    const char *recv_buf_ptr = (const char*) recv_buf;

    for (i = NA_Msg_get_unexpected_header_size(na_test_lat_info->na_class);
        i < buf_size; i++) {
        if (recv_buf_ptr[i] != (char) i) {
            fprintf(stderr, "Error detected in bulk transfer, buf[%d] = %d, "
                "was expecting %d!\n", (int) i, (char) recv_buf_ptr[i],
                (char) i);
            break;
        }
    }
#endif

    /* At this point we have received everything so work out the bandwidth */
#ifdef MERCURY_TESTING_PRINT_PARTIAL
        read_lat = time_read * 1.0e6
            / (double) ((avg_iter + 1) * 2 *
                (unsigned int) na_test_lat_info->na_test_info.mpi_comm_size);
        if (na_test_lat_info->na_test_info.mpi_comm_rank == 0)
            fprintf(stdout, "%-*d%*.*f\r", 10, (int) size, NWIDTH,
                NDIGITS, read_lat);
#endif
    }
#ifndef MERCURY_TESTING_PRINT_PARTIAL
    read_lat = time_read * 1.0e6
        / (double) (loop * 2 *
            (unsigned int) na_test_lat_info->na_test_info.mpi_comm_size);
    if (na_test_lat_info->na_test_info.mpi_comm_rank == 0)
        fprintf(stdout, "%-*d%*.*f", 10, (int) size, NWIDTH, NDIGITS,
            read_lat);
#endif
    if (na_test_lat_info->na_test_info.mpi_comm_rank == 0)
        fprintf(stdout, "\n");

done:
    /* Clean up resources */
    hg_request_destroy(recv_request);
    NA_Op_destroy(na_test_lat_info->na_class, send_op_id);
    NA_Op_destroy(na_test_lat_info->na_class, recv_op_id);
    NA_Msg_buf_free(na_test_lat_info->na_class, send_buf, send_buf_data);
    NA_Msg_buf_free(na_test_lat_info->na_class, recv_buf, recv_buf_data);
    return ret;
}
Beispiel #3
0
static hg_return_t
measure_rpc(hg_class_t *hg_class, hg_context_t *context, na_addr_t addr,
        hg_request_class_t *request_class)
{
    int avg_iter;
    double time_read = 0, min_time_read = 0, max_time_read = 0;
    hg_return_t ret = HG_SUCCESS;

    size_t i;

    if (na_test_comm_rank_g == 0) {
        printf("# Executing RPC with %d client(s) -- loop %d time(s)\n",
                na_test_comm_size_g, MERCURY_TESTING_MAX_LOOP);
    }

    if (na_test_comm_rank_g == 0) printf("# Warming up...\n");

    /* Warm up for RPC */
    for (i = 0; i < RPC_SKIP; i++) {
        hg_request_t *request;
        hg_handle_t handle;

        request = hg_request_create(request_class);

        ret = HG_Create(hg_class, context, addr, hg_test_perf_rpc_id_g, &handle);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not start call\n");
            goto done;
        }

        ret = HG_Forward(handle, hg_test_perf_forward_cb, request, NULL);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not forward call\n");
            goto done;
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);

        /* Complete */
        ret = HG_Destroy(handle);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not complete\n");
            goto done;
        }

        hg_request_destroy(request);
    }

    NA_Test_barrier();

    if (na_test_comm_rank_g == 0) printf("%*s%*s%*s%*s%*s%*s",
            10, "# Time (s)", 10, "Min (s)", 10, "Max (s)",
            12, "Calls (c/s)", 12, "Min (c/s)", 12, "Max (c/s)");
    if (na_test_comm_rank_g == 0) printf("\n");

    /* RPC benchmark */
    for (avg_iter = 0; avg_iter < MERCURY_TESTING_MAX_LOOP; avg_iter++) {
        hg_request_t *request;
        hg_handle_t handle;
        hg_time_t t1, t2;
        double td, part_time_read;
        double calls_per_sec, min_calls_per_sec, max_calls_per_sec;

        request = hg_request_create(request_class);

        ret = HG_Create(hg_class, context, addr, hg_test_perf_rpc_id_g, &handle);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not start call\n");
            goto done;
        }

        hg_time_get_current(&t1);

        ret = HG_Forward(handle, hg_test_perf_forward_cb, request, NULL);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not forward call\n");
            goto done;
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);

        NA_Test_barrier();

        hg_time_get_current(&t2);
        td = hg_time_to_double(hg_time_subtract(t2, t1));

        time_read += td;
        if (!min_time_read) min_time_read = time_read;
        min_time_read = (td < min_time_read) ? td : min_time_read;
        max_time_read = (td > max_time_read) ? td : max_time_read;

        /* Complete */
        ret = HG_Destroy(handle);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not complete\n");
            goto done;
        }

        hg_request_destroy(request);

        part_time_read = time_read / (avg_iter + 1);
        calls_per_sec = na_test_comm_size_g / part_time_read;
        min_calls_per_sec = na_test_comm_size_g / max_time_read;
        max_calls_per_sec = na_test_comm_size_g / min_time_read;

        /* At this point we have received everything so work out the bandwidth */
        if (na_test_comm_rank_g == 0) {
            printf("%*f%*f%*f%*.*f%*.*f%*.*f\r",
                10, part_time_read, 10, min_time_read, 10, max_time_read,
                12, 2, calls_per_sec, 12, 2, min_calls_per_sec, 12, 2,
                max_calls_per_sec);
        }
    }
    if (na_test_comm_rank_g == 0) printf("\n");

done:
    return ret;
}
Beispiel #4
0
static hg_return_t
measure_bulk_transfer(struct hg_test_info *hg_test_info, size_t total_size,
    unsigned int nhandles)
{
    bulk_write_in_t in_struct;
    char *bulk_buf;
    void **buf_ptrs;
    size_t *buf_sizes;
    hg_bulk_t bulk_handle = HG_BULK_NULL;
    size_t nbytes = total_size;
    double nmbytes = (double) total_size / (1024 * 1024);
    size_t loop = (total_size > LARGE_SIZE) ? MERCURY_TESTING_MAX_LOOP :
        MERCURY_TESTING_MAX_LOOP * 10;
    size_t skip = (total_size > LARGE_SIZE) ? LARGE_SKIP : SMALL_SKIP;
    hg_handle_t *handles = NULL;
    hg_request_t *request;
    struct hg_test_perf_args args;
    size_t avg_iter;
    double time_read = 0, read_bandwidth;
    hg_return_t ret = HG_SUCCESS;
    size_t i;

    /* Prepare bulk_buf */
    bulk_buf = malloc(nbytes);
    for (i = 0; i < nbytes; i++)
        bulk_buf[i] = 1;
    buf_ptrs = (void **) &bulk_buf;
    buf_sizes = &nbytes;

    /* Create handles */
    handles = malloc(nhandles * sizeof(hg_handle_t));
    for (i = 0; i < nhandles; i++) {
        ret = HG_Create(hg_test_info->context, hg_test_info->target_addr,
            hg_test_perf_bulk_read_id_g, &handles[i]);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not start call\n");
            goto done;
        }
    }

    request = hg_request_create(hg_test_info->request_class);
    hg_atomic_init32(&args.op_completed_count, 0);
    args.op_count = nhandles;
    args.request = request;

    /* Register memory */
    ret = HG_Bulk_create(hg_test_info->hg_class, 1, buf_ptrs,
        (hg_size_t *) buf_sizes, HG_BULK_READWRITE, &bulk_handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not create bulk data handle\n");
        goto done;
    }

    /* Fill input structure */
    in_struct.fildes = 0;
    in_struct.bulk_handle = bulk_handle;

    /* Warm up for bulk data */
    for (i = 0; i < skip; i++) {
        unsigned int j;

        for (j = 0; j < nhandles; j++) {
            ret = HG_Forward(handles[j], hg_test_perf_forward_cb, &args, &in_struct);
            if (ret != HG_SUCCESS) {
                fprintf(stderr, "Could not forward call\n");
                goto done;
            }
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);
        hg_request_reset(request);
        hg_atomic_set32(&args.op_completed_count, 0);
    }

    NA_Test_barrier(&hg_test_info->na_test_info);

    /* Bulk data benchmark */
    for (avg_iter = 0; avg_iter < loop; avg_iter++) {
        hg_time_t t1, t2;
        unsigned int j;

        hg_time_get_current(&t1);

        for (j = 0; j < nhandles; j++) {
            ret = HG_Forward(handles[j], hg_test_perf_forward_cb, &args, &in_struct);
            if (ret != HG_SUCCESS) {
                fprintf(stderr, "Could not forward call\n");
                goto done;
            }
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);
        NA_Test_barrier(&hg_test_info->na_test_info);
        hg_time_get_current(&t2);
        time_read += hg_time_to_double(hg_time_subtract(t2, t1));

        hg_request_reset(request);
        hg_atomic_set32(&args.op_completed_count, 0);

#ifdef MERCURY_TESTING_PRINT_PARTIAL
        read_bandwidth = nmbytes
            * (double) (nhandles * (avg_iter + 1) *
                (unsigned int) hg_test_info->na_test_info.mpi_comm_size)
            / time_read;

        /* At this point we have received everything so work out the bandwidth */
        if (hg_test_info->na_test_info.mpi_comm_rank == 0)
            fprintf(stdout, "%-*d%*.*f\r", 10, (int) nbytes, NWIDTH,
                NDIGITS, read_bandwidth);
#endif
#ifdef MERCURY_TESTING_HAS_VERIFY_DATA
        for (i = 0; i < nbytes; i++) {
            if (bulk_buf[i] != (char) i) {
                printf("Error detected in bulk transfer, buf[%d] = %d, "
                    "was expecting %d!\n", (int) i, (char) bulk_buf[i],
                    (char) i);
                break;
            }
        }
#endif
    }
#ifndef MERCURY_TESTING_PRINT_PARTIAL
    read_bandwidth = nmbytes
        * (double) (nhandles * loop *
            (unsigned int) hg_test_info->na_test_info.mpi_comm_size)
        / time_read;

    /* At this point we have received everything so work out the bandwidth */
    if (hg_test_info->na_test_info.mpi_comm_rank == 0)
        fprintf(stdout, "%-*d%*.*f", 10, (int) nbytes, NWIDTH, NDIGITS,
            read_bandwidth);
#endif
    if (hg_test_info->na_test_info.mpi_comm_rank == 0) fprintf(stdout, "\n");

    /* Free memory handle */
    ret = HG_Bulk_free(bulk_handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not free bulk data handle\n");
        goto done;
    }

    /* Complete */
    hg_request_destroy(request);
    for (i = 0; i < nhandles; i++) {
        ret = HG_Destroy(handles[i]);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not complete\n");
            goto done;
        }
    }

done:
    free(bulk_buf);
    free(handles);
    return ret;
}
Beispiel #5
0
static hg_return_t
measure_bulk_transfer(hg_class_t *hg_class, hg_context_t *context,
        na_addr_t addr, hg_request_class_t *request_class)
{
    bulk_write_in_t in_struct;

    int *bulk_buf;
    void *buf_ptr[1];
    size_t count = (1024 * 1024 * MERCURY_TESTING_BUFFER_SIZE) / sizeof(int);
    hg_bulk_t bulk_handle = HG_BULK_NULL;
    size_t nbytes;
    double nmbytes;
    hg_handle_t handle;

    int avg_iter;
    double time_read = 0, min_time_read = 0, max_time_read = 0;

    struct hg_info *hg_info = NULL;

    hg_return_t ret = HG_SUCCESS;
    size_t i;

    /* Prepare bulk_buf */
    nbytes = count * sizeof(int);
    nmbytes = (double) nbytes / (1024 * 1024);
    if (na_test_comm_rank_g == 0) {
        printf("# Reading Bulk Data (%f MB) with %d client(s) -- loop %d time(s)\n",
                nmbytes, na_test_comm_size_g, MERCURY_TESTING_MAX_LOOP);
    }

    bulk_buf = (int *) malloc(nbytes);
    for (i = 0; i < count; i++) {
        bulk_buf[i] = (int) i;
    }
    *buf_ptr = bulk_buf;

    ret = HG_Create(hg_class, context, addr, hg_test_perf_bulk_id_g,
            &handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not start call\n");
        goto done;
    }

    /* Must get info to retrieve bulk class if not provided by user */
    hg_info = HG_Get_info(handle);

    /* Register memory */
    ret = HG_Bulk_create(hg_info->hg_bulk_class, 1, buf_ptr, &nbytes,
            HG_BULK_READ_ONLY, &bulk_handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not create bulk data handle\n");
        goto done;
    }

    /* Fill input structure */
    in_struct.fildes = 0;
    in_struct.bulk_handle = bulk_handle;

    if (na_test_comm_rank_g == 0) printf("# Warming up...\n");

    /* Warm up for bulk data */
    for (i = 0; i < BULK_SKIP; i++) {
        hg_request_t *request;

        request = hg_request_create(request_class);

        ret = HG_Forward(handle, hg_test_perf_forward_cb, request, &in_struct);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not forward call\n");
            goto done;
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);

        hg_request_destroy(request);
    }

    NA_Test_barrier();

    if (na_test_comm_rank_g == 0) printf("%*s%*s%*s%*s%*s%*s",
            10, "# Time (s)", 10, "Min (s)", 10, "Max (s)",
            12, "BW (MB/s)", 12, "Min (MB/s)", 12, "Max (MB/s)");
    if (na_test_comm_rank_g == 0) printf("\n");

    /* Bulk data benchmark */
    for (avg_iter = 0; avg_iter < MERCURY_TESTING_MAX_LOOP; avg_iter++) {
        hg_request_t *request;
        hg_time_t t1, t2;
        double td, part_time_read;
        double read_bandwidth, min_read_bandwidth, max_read_bandwidth;

        request = hg_request_create(request_class);

        hg_time_get_current(&t1);

        ret = HG_Forward(handle, hg_test_perf_forward_cb, request, &in_struct);
        if (ret != HG_SUCCESS) {
            fprintf(stderr, "Could not forward call\n");
            goto done;
        }

        hg_request_wait(request, HG_MAX_IDLE_TIME, NULL);

        NA_Test_barrier();

        hg_time_get_current(&t2);
        td = hg_time_to_double(hg_time_subtract(t2, t1));

        time_read += td;
        if (!min_time_read) min_time_read = time_read;
        min_time_read = (td < min_time_read) ? td : min_time_read;
        max_time_read = (td > max_time_read) ? td : max_time_read;

        hg_request_destroy(request);

        part_time_read = time_read / (avg_iter + 1);
        read_bandwidth = nmbytes * na_test_comm_size_g / part_time_read;
        min_read_bandwidth = nmbytes * na_test_comm_size_g / max_time_read;
        max_read_bandwidth = nmbytes * na_test_comm_size_g / min_time_read;

        /* At this point we have received everything so work out the bandwidth */
        if (na_test_comm_rank_g == 0) {
            printf("%*f%*f%*f%*.*f%*.*f%*.*f\r",
                10, part_time_read, 10, min_time_read, 10, max_time_read,
                12, 2, read_bandwidth, 12, 2, min_read_bandwidth, 12, 2,
                max_read_bandwidth);
        }
    }
    if (na_test_comm_rank_g == 0) printf("\n");

    /* Free memory handle */
    ret = HG_Bulk_free(bulk_handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not free bulk data handle\n");
        goto done;
    }

    /* Free bulk data */
    free(bulk_buf);

    /* Complete */
    ret = HG_Destroy(handle);
    if (ret != HG_SUCCESS) {
        fprintf(stderr, "Could not complete\n");
        goto done;
    }

done:
    return ret;
}