/**
 * Conversion function. They deal with data-types in 3 ways, always making local copies.
 * In order to allow performance testings, there are 3 functions:
 *  - one copying directly from one memory location to another one using the
 *    data-type copy function.
 *  - one which use a 2 convertors created with the same data-type
 *  - and one using 2 convertors created from different data-types.
 *
 */
static int local_copy_ddt_count( const opal_datatype_t const* pdt, int count )
{
    OPAL_PTRDIFF_TYPE extent;
    void *pdst, *psrc;
    TIMER_DATA_TYPE start, end;
    long total_time;

    opal_datatype_type_extent( pdt, &extent );

    pdst = malloc( extent * count );
    psrc = malloc( extent * count );

    {
        int i;
        for( i = 0; i < (count * extent); i++ )
            ((char*)psrc)[i] = i % 128 + 32;
    }
    memset( pdst, 0, count * extent );

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) {
        printf( "Unable to copy the datatype in the function local_copy_ddt_count."
                " Is the datatype committed ?\n" );
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "direct local copy in %ld microsec\n", total_time );
    free( pdst );
    free( psrc );

    return OPAL_SUCCESS;
}
예제 #2
0
void run_test(int num_threads, int num_iters, int cache_size, int lock_type) {
	int rc;
	long t;
	struct timespec start, end;
	pthread_t* threads = malloc(sizeof(pthread_t) * num_threads);

	shared.num_iters = num_iters;
	shared.cache = malloc(sizeof(Cache));
	cache_init(shared.cache, cache_size, lock_type);
	pthread_barrier_init(&shared.barrier, NULL, num_threads);

	tracepoint(tl, start_test, num_threads, num_iters, (!lock_type ? "mutex" : "rwlock"));
	clock_gettime(CLOCK_MONOTONIC, &start);
	for(t = 0; t < num_threads; t++) {
		rc = pthread_create(&threads[t], NULL, run_thread, (void *)t);
		if (rc != 0) {
			fprintf(stderr, "Error at pthread_create() with id %d\n", rc);
			exit(-1);
		}
	}
	for(t = 0; t < num_threads; t++) {
		pthread_join(threads[t], NULL);
	}
	clock_gettime(CLOCK_MONOTONIC, &end);
	tracepoint(tl, end_test, ELAPSED_TIME(start, end));

	cache_destroy(shared.cache);
	free(shared.cache);
	pthread_barrier_destroy(&shared.barrier);
	free(threads);
}
예제 #3
0
/**
 *  Conversion function. They deal with data-types in 3 ways, always making local copies.
 * In order to allow performance testings, there are 3 functions:
 *  - one copying directly from one memory location to another one using the
 *    data-type copy function.
 *  - one which use a 2 convertors created with the same data-type
 *  - and one using 2 convertors created from different data-types.
 *
 */
static int local_copy_ddt_count( ompi_datatype_t* pdt, int count )
{
    void *pdst, *psrc;
    TIMER_DATA_TYPE start, end;
    long total_time;
    size_t length;

    length = compute_buffer_length(pdt, count);

    pdst = malloc(length);
    psrc = malloc(length);

    for( int i = 0; i < length; i++ )
	((char*)psrc)[i] = i % 128 + 32;
    memset(pdst, 0, length);

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    if( OMPI_SUCCESS != ompi_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) {
        printf( "Unable to copy the datatype in the function local_copy_ddt_count."
                " Is the datatype committed ?\n" );
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "direct local copy in %ld microsec\n", total_time );
    free(pdst);
    free(psrc);

    return OMPI_SUCCESS;
}
예제 #4
0
/**
 * Conversion function. They deal with data-types in 3 ways, always making local copies.
 * In order to allow performance testings, there are 3 functions:
 *  - one copying directly from one memory location to another one using the
 *    data-type copy function.
 *  - one which use a 2 convertors created with the same data-type
 *  - and one using 2 convertors created from different data-types.
 *
 */
static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count )
{
    OPAL_PTRDIFF_TYPE lb, extent;
    size_t malloced_size;
    char *odst, *osrc;
    void *pdst, *psrc;
    TIMER_DATA_TYPE start, end;
    long total_time;
    int errors = 0;

    malloced_size = compute_memory_size(pdt, count);
    opal_datatype_get_extent( pdt, &lb, &extent );

    odst = (char*)malloc( malloced_size );
    osrc = (char*)malloc( malloced_size );

    {
        for( size_t i = 0; i < malloced_size; i++ )
            osrc[i] = i % 128 + 32;
        memcpy(odst, osrc, malloced_size);
    }
    pdst = odst - lb;
    psrc = osrc - lb;

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) {
        printf( "Unable to copy the datatype in the function local_copy_ddt_count."
                " Is the datatype committed ?\n" );
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "direct local copy in %ld microsec\n", total_time );
    if(outputFlags & VALIDATE_DATA) {
        for( size_t i = 0; i < malloced_size; i++ ) {
            if( odst[i] != osrc[i] ) {
                printf("error at position %lu (%d != %d)\n",
                       (unsigned long)i, (int)(odst[i]), (int)(osrc[i]));
                errors++;
                if(outputFlags & QUIT_ON_FIRST_ERROR) {
                    opal_datatype_dump(pdt);
                    assert(0); exit(-1);
                }
            }
        }
        if( 0 == errors ) {
            printf("Validation check succesfully passed\n");
        } else {
            printf("Found %d errors. Giving up!\n", errors);
            exit(-1);
        }
    }
    free( odst );
    free( osrc );

    return (0 == errors ? OPAL_SUCCESS : errors);
}
예제 #5
0
static int test_upper( unsigned int length )
{
    ompi_datatype_t *pdt;
    opal_convertor_t * pConv;
    int rc = OMPI_SUCCESS;
    unsigned int i, iov_count, split_chunk, total_length;
    size_t max_data;
    struct iovec iov[5];
    TIMER_DATA_TYPE start, end;
    long total_time;

    printf( "test upper matrix\n" );
    pdt = upper_matrix( length );
    /*dt_dump( pdt );*/

    total_length = length * (length + 1) * ( sizeof(double) / 2);

    pConv = opal_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != opal_convertor_prepare_for_send( pConv, &(pdt->super), 1, NULL ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OMPI_ERROR;
    }

    GET_TIME( start );
    split_chunk = (length + 1) * sizeof(double);
    /*    split_chunk = (total_length + 1) * sizeof(double); */
    for( i = total_length; i > 0; ) {
        iov_count = 5;
        max_data = 0;
        opal_convertor_raw( pConv, iov, &iov_count, &max_data );
        i -= max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "complete raw in %ld microsec\n", total_time );

    /* test the automatic destruction pf the data */
    ompi_datatype_destroy( &pdt );
    assert( pdt == NULL );

    OBJ_RELEASE( pConv );
    return rc;
}
예제 #6
0
/**
 *  Conversion function. They deal with data-types in 3 ways, always making local copies.
 * In order to allow performance testings, there are 3 functions:
 *  - one copying directly from one memory location to another one using the
 *    data-type copy function.
 *  - one which use a 2 convertors created with the same data-type
 *  - and one using 2 convertors created from different data-types.
 *
 */
static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num )
{
    struct iovec* iov;
    opal_convertor_t* convertor;
    TIMER_DATA_TYPE start, end;
    long total_time;
    uint32_t iov_count = iov_num;
    size_t max_data = 0, remaining_length;

    iov = (struct iovec*)malloc(iov_num * sizeof(struct iovec));

    convertor = opal_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != opal_convertor_prepare_for_send( convertor, &(pdt->super), count, NULL ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OMPI_ERROR;
    }

    remaining_length = count * pdt->super.size;
    GET_TIME( start );
    while( 0 == opal_convertor_raw(convertor, iov, &iov_count, &max_data) ) {
#if 0
        printf( "New raw extraction (iov_count = %d, max_data = %zu)\n",
                iov_count, max_data );
        for( i = 0; i < iov_count; i++ ) {
            printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len );
        }
#endif
        remaining_length -= max_data;
        iov_count = iov_num;
    }
    remaining_length -= max_data;
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "raw extraction in %ld microsec\n", total_time );
    OBJ_RELEASE( convertor );
    if( remaining_length != 0 ) {
        printf( "Not all raw description was been extracted (%lu bytes missing)\n",
                (unsigned long) remaining_length );
    }
    free(iov);
    return OMPI_SUCCESS;
}
예제 #7
0
void *run_thread(void *tid) {
	int i, id = (int)(long) tid;
	struct timespec start, end;
	int iters = shared.num_iters;

	pthread_barrier_wait(&shared.barrier);

	tracepoint(tl, start_test_thread, id);
	clock_gettime(CLOCK_MONOTONIC, &start);
	for (i = 0; i < iters; i++) {
		int r = rand() % DATA_SIZE_FULL;
		if ((rand() % 100) < 80) {
			cache_get(shared.cache, GET_KEY(r));
		} else {
			cache_put(shared.cache, GET_KEY(r), GET_VAL(r));
		}
	}
	clock_gettime(CLOCK_MONOTONIC, &end);
	tracepoint(tl, end_test_thread, id, ELAPSED_TIME(start, end));

	//~ printf("%d\n", id);
	pthread_exit(NULL);
}
static int test_upper( unsigned int length )
{
    double *mat1, *mat2, *inbuf;
    opal_datatype_t *pdt;
    opal_convertor_t * pConv;
    char *ptr;
    int rc;
    unsigned int i, j, iov_count, split_chunk, total_length;
    size_t max_data;
    struct iovec a;
    TIMER_DATA_TYPE start, end;
    long total_time;

    printf( "test upper matrix\n" );
    pdt = upper_matrix( length );
    opal_datatype_dump( pdt );

    mat1 = malloc( length * length * sizeof(double) );
    init_random_upper_matrix( length, mat1 );
    mat2 = calloc( length * length, sizeof(double) );

    total_length = length * (length + 1) * ( sizeof(double) / 2);
    inbuf = (double*)malloc( total_length );
    ptr = (char*)inbuf;
    /* copy upper matrix in the array simulating the input buffer */
    for( i = 0; i < length; i++ ) {
        uint32_t pos = i * length + i;
        for( j = i; j < length; j++, pos++ ) {
            *inbuf = mat1[pos];
            inbuf++;
        }
    }
    inbuf = (double*)ptr;
    pConv = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, pdt, 1, mat2 ) ) {
        printf( "Cannot attach the datatype to a convertor\n" );
        return OPAL_ERROR;
    }

    GET_TIME( start );
    split_chunk = (length + 1) * sizeof(double);
    /*    split_chunk = (total_length + 1) * sizeof(double); */
    for( i = total_length; i > 0; ) {
        if( i <= split_chunk ) {  /* equal test just to be able to set a breakpoint */
            split_chunk = i;
        }
        a.iov_base = ptr;
        a.iov_len = split_chunk;
        iov_count = 1;
        max_data = split_chunk;
        opal_convertor_unpack( pConv, &a, &iov_count, &max_data );
        ptr += max_data;
        i -= max_data;
        if( mat2[0] != inbuf[0] ) assert(0);
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "complete unpacking in %ld microsec\n", total_time );
    free( inbuf );
    rc = check_diag_matrix( length, mat1, mat2 );
    free( mat1 );
    free( mat2 );

    /* test the automatic destruction pf the data */
    opal_datatype_destroy( &pdt );
    assert( pdt == NULL );

    OBJ_RELEASE( pConv );
    return rc;
}
static int local_copy_with_convertor( const opal_datatype_t const* pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    opal_datatype_type_extent( pdt, &extent );

    pdst  = malloc( extent * count );
    psrc  = malloc( extent * count );
    ptemp = malloc( chunk );

    {
        int i = 0;
        for( ; i < (count * extent); ((char*)psrc)[i] = i % 128 + 32, i++ );
    }
    memset( pdst, 0, count * extent );

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
예제 #10
0
static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk )
{
    OPAL_PTRDIFF_TYPE lb, extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, malloced_size;
    int32_t done1 = 0, done2 = 0, errors = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    malloced_size = compute_memory_size(pdt, count);
    opal_datatype_get_extent( pdt, &lb, &extent );

    odst = (char*)malloc( malloced_size );
    osrc = (char*)malloc( malloced_size );
    ptemp = malloc( chunk );

    {
        for( size_t i = 0; i < malloced_size; osrc[i] = i % 128 + 32, i++ );
        memcpy(odst, osrc, malloced_size);
    }
    pdst  = odst - lb;
    psrc  = osrc - lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
        if( outputFlags & RESET_CONVERTORS ) {
            struct dt_stack_t stack[1+send_convertor->stack_pos];
            int i, stack_pos = send_convertor->stack_pos;
            size_t pos;

            if( 0 == done1 ) {
                memcpy(stack, send_convertor->pStack, (1+send_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(send_convertor, &pos);
                pos = length;
                opal_convertor_set_position(send_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != send_convertor->pStack[i].index )
                        {errors = 1; printf("send stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, send_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != send_convertor->pStack[i].count ) {
                        if( stack[i].type == send_convertor->pStack[i].type ) {
                            {errors = 1; printf("send stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= send_convertor->pStack[i].type) )
                                {errors = 1; printf("send stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)send_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (send_convertor->pStack[i].count * opal_datatype_basicDatatypes[send_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("send stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)send_convertor->pStack[i].type, send_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != send_convertor->pStack[i].disp )
                        {errors = 1; printf("send stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)send_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
            if( 0 == done2 ) {
                memcpy(stack, recv_convertor->pStack, (1+recv_convertor->stack_pos) * sizeof(struct dt_stack_t));
                pos = 0;
                opal_convertor_set_position(recv_convertor, &pos);
                pos = length;
                opal_convertor_set_position(recv_convertor, &pos);
                assert(pos == length);
                for(i = 0; i <= stack_pos; i++ ) {
                    if( stack[i].index != recv_convertor->pStack[i].index )
                        {errors = 1; printf("recv stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n",
                                            i, stack[i].index, recv_convertor->pStack[i].index,
                                            length, pdt->size * count);}
                    if( stack[i].count != recv_convertor->pStack[i].count ) {
                        if( stack[i].type == recv_convertor->pStack[i].type ) {
                            {errors = 1; printf("recv stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n",
                                                    i, stack[i].count, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        } else {
                            if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= recv_convertor->pStack[i].type) )
                                {errors = 1; printf("recv stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, (int)recv_convertor->pStack[i].type,
                                                    length, pdt->size * count);}
                            else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) !=
                                     (recv_convertor->pStack[i].count * opal_datatype_basicDatatypes[recv_convertor->pStack[i].type]->size) )
                                {errors = 1; printf("recv stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n",
                                                    i, (int)stack[i].type, stack[i].count,
                                                    (int)recv_convertor->pStack[i].type, recv_convertor->pStack[i].count,
                                                    length, pdt->size * count);}
                        }
                    }
                    if( stack[i].disp != recv_convertor->pStack[i].disp )
                        {errors = 1; printf("recv stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n",
                                            i, (void*)stack[i].disp, (void*)recv_convertor->pStack[i].disp,
                                            length, pdt->size * count);}
                    if(0 != errors) {assert(0); exit(-1);}
                }
            }
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying same data-type using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );

    if(outputFlags & VALIDATE_DATA) {
        for( size_t i = errors = 0; i < malloced_size; i++ ) {
            if( odst[i] != osrc[i] ) {
                printf("error at position %lu (%d != %d)\n",
                       (unsigned long)i, (int)(odst[i]), (int)(osrc[i]));
                errors++;
                if(outputFlags & QUIT_ON_FIRST_ERROR) {
                    opal_datatype_dump(pdt);
                    assert(0); exit(-1);
                }
            }
        }
        if( 0 == errors ) {
            printf("Validation check succesfully passed\n");
        } else {
            printf("Found %d errors. Giving up!\n", errors);
            exit(-1);
        }
    }
 clean_and_return:
    if( NULL != send_convertor ) OBJ_RELEASE( send_convertor );
    if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor );

    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return (0 == errors ? OPAL_SUCCESS : errors);
}
예제 #11
0
static int
local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, int send_count,
                                      opal_datatype_t const * const recv_type, int recv_count,
                                      int chunk )
{
    OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    char *odst, *osrc;
    opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data, length = 0, send_malloced_size, recv_malloced_size;;
    int32_t done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    send_malloced_size = compute_memory_size(send_type, send_count);
    recv_malloced_size = compute_memory_size(recv_type, recv_count);

    opal_datatype_get_extent( send_type, &send_lb, &send_extent );
    opal_datatype_get_extent( recv_type, &recv_lb, &recv_extent );

    odst = (char*)malloc( recv_malloced_size );
    osrc = (char*)malloc( send_malloced_size );
    ptemp = malloc( chunk );

    /* fill up the receiver with ZEROS */
    {
        for( size_t i = 0; i < send_malloced_size; i++ )
            osrc[i] = i % 128 + 32;
    }
    memset( odst, 0, recv_malloced_size );
    pdst  = odst - recv_lb;
    psrc  = osrc - send_lb;

    send_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = opal_convertor_create( remote_arch, 0 );
    if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;

        if( outputFlags & RESET_CONVERTORS ) {
            size_t pos = 0;
            opal_convertor_set_position(send_convertor, &pos);
            pos = length;
            opal_convertor_set_position(send_convertor, &pos);
            assert(pos == length);

            pos = 0;
            opal_convertor_set_position(recv_convertor, &pos);
            pos = length;
            opal_convertor_set_position(recv_convertor, &pos);
            assert(pos == length);
        }
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != odst ) free( odst );
    if( NULL != osrc ) free( osrc );
    if( NULL != ptemp ) free( ptemp );
    return OPAL_SUCCESS;
}
예제 #12
0
파일: ddt_test.c 프로젝트: aosm/openmpi
static int
local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count,
                                      ompi_datatype_t* recv_type, int recv_count,
                                      int chunk )
{
    MPI_Aint send_extent, recv_extent;
    void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
    ompi_convertor_t *send_convertor = NULL, *recv_convertor = NULL;
    struct iovec iov;
    uint32_t iov_count;
    size_t max_data;
    int32_t length = 0, done1 = 0, done2 = 0;
    TIMER_DATA_TYPE start, end, unpack_start, unpack_end;
    long total_time, unpack_time = 0;

    ompi_ddt_type_extent( send_type, &send_extent );
    ompi_ddt_type_extent( recv_type, &recv_extent );

    pdst  = malloc( recv_extent * recv_count );
    psrc  = malloc( send_extent * send_count );
    ptemp = malloc( chunk );

    /* fill up the receiver with ZEROS */
    {
        int i;
        for( i = 0; i < (send_count * send_extent); i++ )
            ((char*)psrc)[i] = i % 128 + 32;
    }
    memset( pdst, 0, recv_count * recv_extent );

    send_convertor = ompi_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != ompi_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) {
        printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }
    recv_convertor = ompi_convertor_create( remote_arch, 0 );
    if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) {
        printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
        goto clean_and_return;
    }

    cache_trash();  /* make sure the cache is useless */

    GET_TIME( start );
    while( (done1 & done2) != 1 ) {
        /* They are supposed to finish in exactly the same time. */
        if( done1 | done2 ) {
            printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n",
                    (done1 ? "finish" : "not finish"),
                    (done2 ? "finish" : "not finish") );
        }

        max_data = chunk;
        iov_count = 1;
        iov.iov_base = ptemp;
        iov.iov_len = chunk;

        if( done1 == 0 ) {
            done1 = ompi_convertor_pack( send_convertor, &iov, &iov_count, &max_data );
        }

        if( done2 == 0 ) {
            GET_TIME( unpack_start );
            done2 = ompi_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data );
            GET_TIME( unpack_end );
            unpack_time += ELAPSED_TIME( unpack_start, unpack_end );
        }

        length += max_data;
    }
    GET_TIME( end );
    total_time = ELAPSED_TIME( start, end );
    printf( "copying different data-types using convertors in %ld microsec\n", total_time );
    printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time,
            total_time - unpack_time );
 clean_and_return:
    if( send_convertor != NULL ) {
        OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL );
    }
    if( recv_convertor != NULL ) {
        OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL );
    }
    if( NULL != pdst ) free( pdst );
    if( NULL != psrc ) free( psrc );
    if( NULL != ptemp ) free( ptemp );
    return OMPI_SUCCESS;
}