/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( const opal_datatype_t const* pdt, int count ) { OPAL_PTRDIFF_TYPE extent; void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; opal_datatype_type_extent( pdt, &extent ); pdst = malloc( extent * count ); psrc = malloc( extent * count ); { int i; for( i = 0; i < (count * extent); i++ ) ((char*)psrc)[i] = i % 128 + 32; } memset( pdst, 0, count * extent ); cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); free( pdst ); free( psrc ); return OPAL_SUCCESS; }
void run_test(int num_threads, int num_iters, int cache_size, int lock_type) { int rc; long t; struct timespec start, end; pthread_t* threads = malloc(sizeof(pthread_t) * num_threads); shared.num_iters = num_iters; shared.cache = malloc(sizeof(Cache)); cache_init(shared.cache, cache_size, lock_type); pthread_barrier_init(&shared.barrier, NULL, num_threads); tracepoint(tl, start_test, num_threads, num_iters, (!lock_type ? "mutex" : "rwlock")); clock_gettime(CLOCK_MONOTONIC, &start); for(t = 0; t < num_threads; t++) { rc = pthread_create(&threads[t], NULL, run_thread, (void *)t); if (rc != 0) { fprintf(stderr, "Error at pthread_create() with id %d\n", rc); exit(-1); } } for(t = 0; t < num_threads; t++) { pthread_join(threads[t], NULL); } clock_gettime(CLOCK_MONOTONIC, &end); tracepoint(tl, end_test, ELAPSED_TIME(start, end)); cache_destroy(shared.cache); free(shared.cache); pthread_barrier_destroy(&shared.barrier); free(threads); }
/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( ompi_datatype_t* pdt, int count ) { void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; size_t length; length = compute_buffer_length(pdt, count); pdst = malloc(length); psrc = malloc(length); for( int i = 0; i < length; i++ ) ((char*)psrc)[i] = i % 128 + 32; memset(pdst, 0, length); cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OMPI_SUCCESS != ompi_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); free(pdst); free(psrc); return OMPI_SUCCESS; }
/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count ) { OPAL_PTRDIFF_TYPE lb, extent; size_t malloced_size; char *odst, *osrc; void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; int errors = 0; malloced_size = compute_memory_size(pdt, count); opal_datatype_get_extent( pdt, &lb, &extent ); odst = (char*)malloc( malloced_size ); osrc = (char*)malloc( malloced_size ); { for( size_t i = 0; i < malloced_size; i++ ) osrc[i] = i % 128 + 32; memcpy(odst, osrc, malloced_size); } pdst = odst - lb; psrc = osrc - lb; cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); if(outputFlags & VALIDATE_DATA) { for( size_t i = 0; i < malloced_size; i++ ) { if( odst[i] != osrc[i] ) { printf("error at position %lu (%d != %d)\n", (unsigned long)i, (int)(odst[i]), (int)(osrc[i])); errors++; if(outputFlags & QUIT_ON_FIRST_ERROR) { opal_datatype_dump(pdt); assert(0); exit(-1); } } } if( 0 == errors ) { printf("Validation check succesfully passed\n"); } else { printf("Found %d errors. Giving up!\n", errors); exit(-1); } } free( odst ); free( osrc ); return (0 == errors ? OPAL_SUCCESS : errors); }
static int test_upper( unsigned int length ) { ompi_datatype_t *pdt; opal_convertor_t * pConv; int rc = OMPI_SUCCESS; unsigned int i, iov_count, split_chunk, total_length; size_t max_data; struct iovec iov[5]; TIMER_DATA_TYPE start, end; long total_time; printf( "test upper matrix\n" ); pdt = upper_matrix( length ); /*dt_dump( pdt );*/ total_length = length * (length + 1) * ( sizeof(double) / 2); pConv = opal_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != opal_convertor_prepare_for_send( pConv, &(pdt->super), 1, NULL ) ) { printf( "Cannot attach the datatype to a convertor\n" ); return OMPI_ERROR; } GET_TIME( start ); split_chunk = (length + 1) * sizeof(double); /* split_chunk = (total_length + 1) * sizeof(double); */ for( i = total_length; i > 0; ) { iov_count = 5; max_data = 0; opal_convertor_raw( pConv, iov, &iov_count, &max_data ); i -= max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "complete raw in %ld microsec\n", total_time ); /* test the automatic destruction pf the data */ ompi_datatype_destroy( &pdt ); assert( pdt == NULL ); OBJ_RELEASE( pConv ); return rc; }
/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) { struct iovec* iov; opal_convertor_t* convertor; TIMER_DATA_TYPE start, end; long total_time; uint32_t iov_count = iov_num; size_t max_data = 0, remaining_length; iov = (struct iovec*)malloc(iov_num * sizeof(struct iovec)); convertor = opal_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != opal_convertor_prepare_for_send( convertor, &(pdt->super), count, NULL ) ) { printf( "Cannot attach the datatype to a convertor\n" ); return OMPI_ERROR; } remaining_length = count * pdt->super.size; GET_TIME( start ); while( 0 == opal_convertor_raw(convertor, iov, &iov_count, &max_data) ) { #if 0 printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", iov_count, max_data ); for( i = 0; i < iov_count; i++ ) { printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); } #endif remaining_length -= max_data; iov_count = iov_num; } remaining_length -= max_data; GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "raw extraction in %ld microsec\n", total_time ); OBJ_RELEASE( convertor ); if( remaining_length != 0 ) { printf( "Not all raw description was been extracted (%lu bytes missing)\n", (unsigned long) remaining_length ); } free(iov); return OMPI_SUCCESS; }
void *run_thread(void *tid) { int i, id = (int)(long) tid; struct timespec start, end; int iters = shared.num_iters; pthread_barrier_wait(&shared.barrier); tracepoint(tl, start_test_thread, id); clock_gettime(CLOCK_MONOTONIC, &start); for (i = 0; i < iters; i++) { int r = rand() % DATA_SIZE_FULL; if ((rand() % 100) < 80) { cache_get(shared.cache, GET_KEY(r)); } else { cache_put(shared.cache, GET_KEY(r), GET_VAL(r)); } } clock_gettime(CLOCK_MONOTONIC, &end); tracepoint(tl, end_test_thread, id, ELAPSED_TIME(start, end)); //~ printf("%d\n", id); pthread_exit(NULL); }
static int test_upper( unsigned int length ) { double *mat1, *mat2, *inbuf; opal_datatype_t *pdt; opal_convertor_t * pConv; char *ptr; int rc; unsigned int i, j, iov_count, split_chunk, total_length; size_t max_data; struct iovec a; TIMER_DATA_TYPE start, end; long total_time; printf( "test upper matrix\n" ); pdt = upper_matrix( length ); opal_datatype_dump( pdt ); mat1 = malloc( length * length * sizeof(double) ); init_random_upper_matrix( length, mat1 ); mat2 = calloc( length * length, sizeof(double) ); total_length = length * (length + 1) * ( sizeof(double) / 2); inbuf = (double*)malloc( total_length ); ptr = (char*)inbuf; /* copy upper matrix in the array simulating the input buffer */ for( i = 0; i < length; i++ ) { uint32_t pos = i * length + i; for( j = i; j < length; j++, pos++ ) { *inbuf = mat1[pos]; inbuf++; } } inbuf = (double*)ptr; pConv = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, pdt, 1, mat2 ) ) { printf( "Cannot attach the datatype to a convertor\n" ); return OPAL_ERROR; } GET_TIME( start ); split_chunk = (length + 1) * sizeof(double); /* split_chunk = (total_length + 1) * sizeof(double); */ for( i = total_length; i > 0; ) { if( i <= split_chunk ) { /* equal test just to be able to set a breakpoint */ split_chunk = i; } a.iov_base = ptr; a.iov_len = split_chunk; iov_count = 1; max_data = split_chunk; opal_convertor_unpack( pConv, &a, &iov_count, &max_data ); ptr += max_data; i -= max_data; if( mat2[0] != inbuf[0] ) assert(0); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "complete unpacking in %ld microsec\n", total_time ); free( inbuf ); rc = check_diag_matrix( length, mat1, mat2 ); free( mat1 ); free( mat2 ); /* test the automatic destruction pf the data */ opal_datatype_destroy( &pdt ); assert( pdt == NULL ); OBJ_RELEASE( pConv ); return rc; }
static int local_copy_with_convertor( const opal_datatype_t const* pdt, int count, int chunk ) { OPAL_PTRDIFF_TYPE extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; opal_datatype_type_extent( pdt, &extent ); pdst = malloc( extent * count ); psrc = malloc( extent * count ); ptemp = malloc( chunk ); { int i = 0; for( ; i < (count * extent); ((char*)psrc)[i] = i % 128 + 32, i++ ); } memset( pdst, 0, count * extent ); send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OPAL_SUCCESS; }
static int local_copy_with_convertor( opal_datatype_t const * const pdt, int count, int chunk ) { OPAL_PTRDIFF_TYPE lb, extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data, length = 0, malloced_size; int32_t done1 = 0, done2 = 0, errors = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; malloced_size = compute_memory_size(pdt, count); opal_datatype_get_extent( pdt, &lb, &extent ); odst = (char*)malloc( malloced_size ); osrc = (char*)malloc( malloced_size ); ptemp = malloc( chunk ); { for( size_t i = 0; i < malloced_size; osrc[i] = i % 128 + 32, i++ ); memcpy(odst, osrc, malloced_size); } pdst = odst - lb; psrc = osrc - lb; send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, pdt, count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, pdt, count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; if( outputFlags & RESET_CONVERTORS ) { struct dt_stack_t stack[1+send_convertor->stack_pos]; int i, stack_pos = send_convertor->stack_pos; size_t pos; if( 0 == done1 ) { memcpy(stack, send_convertor->pStack, (1+send_convertor->stack_pos) * sizeof(struct dt_stack_t)); pos = 0; opal_convertor_set_position(send_convertor, &pos); pos = length; opal_convertor_set_position(send_convertor, &pos); assert(pos == length); for(i = 0; i <= stack_pos; i++ ) { if( stack[i].index != send_convertor->pStack[i].index ) {errors = 1; printf("send stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n", i, stack[i].index, send_convertor->pStack[i].index, length, pdt->size * count);} if( stack[i].count != send_convertor->pStack[i].count ) { if( stack[i].type == send_convertor->pStack[i].type ) { {errors = 1; printf("send stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n", i, stack[i].count, send_convertor->pStack[i].count, length, pdt->size * count);} } else { if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= send_convertor->pStack[i].type) ) {errors = 1; printf("send stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n", i, (int)stack[i].type, (int)send_convertor->pStack[i].type, length, pdt->size * count);} else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) != (send_convertor->pStack[i].count * opal_datatype_basicDatatypes[send_convertor->pStack[i].type]->size) ) {errors = 1; printf("send stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n", i, (int)stack[i].type, stack[i].count, (int)send_convertor->pStack[i].type, send_convertor->pStack[i].count, length, pdt->size * count);} } } if( stack[i].disp != send_convertor->pStack[i].disp ) {errors = 1; printf("send stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n", i, (void*)stack[i].disp, (void*)send_convertor->pStack[i].disp, length, pdt->size * count);} if(0 != errors) {assert(0); exit(-1);} } } if( 0 == done2 ) { memcpy(stack, recv_convertor->pStack, (1+recv_convertor->stack_pos) * sizeof(struct dt_stack_t)); pos = 0; opal_convertor_set_position(recv_convertor, &pos); pos = length; opal_convertor_set_position(recv_convertor, &pos); assert(pos == length); for(i = 0; i <= stack_pos; i++ ) { if( stack[i].index != recv_convertor->pStack[i].index ) {errors = 1; printf("recv stack[%d].index differs (orig %d != new %d) (completed %lu/%lu)\n", i, stack[i].index, recv_convertor->pStack[i].index, length, pdt->size * count);} if( stack[i].count != recv_convertor->pStack[i].count ) { if( stack[i].type == recv_convertor->pStack[i].type ) { {errors = 1; printf("recv stack[%d].count differs (orig %lu != new %lu) (completed %lu/%lu)\n", i, stack[i].count, recv_convertor->pStack[i].count, length, pdt->size * count);} } else { if( (OPAL_DATATYPE_MAX_PREDEFINED <= stack[i].type) || (OPAL_DATATYPE_MAX_PREDEFINED <= recv_convertor->pStack[i].type) ) {errors = 1; printf("recv stack[%d].type wrong (orig %d != new %d) (completed %lu/%lu)\n", i, (int)stack[i].type, (int)recv_convertor->pStack[i].type, length, pdt->size * count);} else if( (stack[i].count * opal_datatype_basicDatatypes[stack[i].type]->size) != (recv_convertor->pStack[i].count * opal_datatype_basicDatatypes[recv_convertor->pStack[i].type]->size) ) {errors = 1; printf("recv stack[%d].type*count differs (orig (%d,%lu) != new (%d, %lu)) (completed %lu/%lu)\n", i, (int)stack[i].type, stack[i].count, (int)recv_convertor->pStack[i].type, recv_convertor->pStack[i].count, length, pdt->size * count);} } } if( stack[i].disp != recv_convertor->pStack[i].disp ) {errors = 1; printf("recv stack[%d].disp differs (orig %p != new %p) (completed %lu/%lu)\n", i, (void*)stack[i].disp, (void*)recv_convertor->pStack[i].disp, length, pdt->size * count);} if(0 != errors) {assert(0); exit(-1);} } } } } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying same data-type using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); if(outputFlags & VALIDATE_DATA) { for( size_t i = errors = 0; i < malloced_size; i++ ) { if( odst[i] != osrc[i] ) { printf("error at position %lu (%d != %d)\n", (unsigned long)i, (int)(odst[i]), (int)(osrc[i])); errors++; if(outputFlags & QUIT_ON_FIRST_ERROR) { opal_datatype_dump(pdt); assert(0); exit(-1); } } } if( 0 == errors ) { printf("Validation check succesfully passed\n"); } else { printf("Found %d errors. Giving up!\n", errors); exit(-1); } } clean_and_return: if( NULL != send_convertor ) OBJ_RELEASE( send_convertor ); if( NULL != recv_convertor ) OBJ_RELEASE( recv_convertor ); if( NULL != odst ) free( odst ); if( NULL != osrc ) free( osrc ); if( NULL != ptemp ) free( ptemp ); return (0 == errors ? OPAL_SUCCESS : errors); }
static int local_copy_with_convertor_2datatypes( opal_datatype_t const * const send_type, int send_count, opal_datatype_t const * const recv_type, int recv_count, int chunk ) { OPAL_PTRDIFF_TYPE send_lb, send_extent, recv_lb, recv_extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; char *odst, *osrc; opal_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data, length = 0, send_malloced_size, recv_malloced_size;; int32_t done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; send_malloced_size = compute_memory_size(send_type, send_count); recv_malloced_size = compute_memory_size(recv_type, recv_count); opal_datatype_get_extent( send_type, &send_lb, &send_extent ); opal_datatype_get_extent( recv_type, &recv_lb, &recv_extent ); odst = (char*)malloc( recv_malloced_size ); osrc = (char*)malloc( send_malloced_size ); ptemp = malloc( chunk ); /* fill up the receiver with ZEROS */ { for( size_t i = 0; i < send_malloced_size; i++ ) osrc[i] = i % 128 + 32; } memset( odst, 0, recv_malloced_size ); pdst = odst - recv_lb; psrc = osrc - send_lb; send_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = opal_convertor_create( remote_arch, 0 ); if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = opal_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = opal_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; if( outputFlags & RESET_CONVERTORS ) { size_t pos = 0; opal_convertor_set_position(send_convertor, &pos); pos = length; opal_convertor_set_position(send_convertor, &pos); assert(pos == length); pos = 0; opal_convertor_set_position(recv_convertor, &pos); pos = length; opal_convertor_set_position(recv_convertor, &pos); assert(pos == length); } } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying different data-types using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( send_convertor != NULL ) { OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL ); } if( recv_convertor != NULL ) { OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL ); } if( NULL != odst ) free( odst ); if( NULL != osrc ) free( osrc ); if( NULL != ptemp ) free( ptemp ); return OPAL_SUCCESS; }
static int local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count, ompi_datatype_t* recv_type, int recv_count, int chunk ) { MPI_Aint send_extent, recv_extent; void *pdst = NULL, *psrc = NULL, *ptemp = NULL; ompi_convertor_t *send_convertor = NULL, *recv_convertor = NULL; struct iovec iov; uint32_t iov_count; size_t max_data; int32_t length = 0, done1 = 0, done2 = 0; TIMER_DATA_TYPE start, end, unpack_start, unpack_end; long total_time, unpack_time = 0; ompi_ddt_type_extent( send_type, &send_extent ); ompi_ddt_type_extent( recv_type, &recv_extent ); pdst = malloc( recv_extent * recv_count ); psrc = malloc( send_extent * send_count ); ptemp = malloc( chunk ); /* fill up the receiver with ZEROS */ { int i; for( i = 0; i < (send_count * send_extent); i++ ) ((char*)psrc)[i] = i % 128 + 32; } memset( pdst, 0, recv_count * recv_extent ); send_convertor = ompi_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != ompi_convertor_prepare_for_send( send_convertor, send_type, send_count, psrc ) ) { printf( "Unable to create the send convertor. Is the datatype committed ?\n" ); goto clean_and_return; } recv_convertor = ompi_convertor_create( remote_arch, 0 ); if( OMPI_SUCCESS != ompi_convertor_prepare_for_recv( recv_convertor, recv_type, recv_count, pdst ) ) { printf( "Unable to create the recv convertor. Is the datatype committed ?\n" ); goto clean_and_return; } cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); while( (done1 & done2) != 1 ) { /* They are supposed to finish in exactly the same time. */ if( done1 | done2 ) { printf( "WRONG !!! the send is %s but the receive is %s in local_copy_with_convertor_2datatypes\n", (done1 ? "finish" : "not finish"), (done2 ? "finish" : "not finish") ); } max_data = chunk; iov_count = 1; iov.iov_base = ptemp; iov.iov_len = chunk; if( done1 == 0 ) { done1 = ompi_convertor_pack( send_convertor, &iov, &iov_count, &max_data ); } if( done2 == 0 ) { GET_TIME( unpack_start ); done2 = ompi_convertor_unpack( recv_convertor, &iov, &iov_count, &max_data ); GET_TIME( unpack_end ); unpack_time += ELAPSED_TIME( unpack_start, unpack_end ); } length += max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "copying different data-types using convertors in %ld microsec\n", total_time ); printf( "\t unpack in %ld microsec [pack in %ld microsec]\n", unpack_time, total_time - unpack_time ); clean_and_return: if( send_convertor != NULL ) { OBJ_RELEASE( send_convertor ); assert( send_convertor == NULL ); } if( recv_convertor != NULL ) { OBJ_RELEASE( recv_convertor ); assert( recv_convertor == NULL ); } if( NULL != pdst ) free( pdst ); if( NULL != psrc ) free( psrc ); if( NULL != ptemp ) free( ptemp ); return OMPI_SUCCESS; }