void ompi_java_releaseWritePtr( void *ptr, ompi_java_buffer_t *item, JNIEnv *env, jobject buf, jboolean db, int offset, int count, MPI_Datatype type, int baseType) { if(db || !buf || !ptr) return; if(opal_datatype_is_contiguous_memory_layout(&type->super, count)) { int length = count * getTypeExtent(env, type); setArrayRegion(env, buf, baseType, offset, length, ptr); } else { void *inBuf, *inBase; inBuf = ompi_java_getArrayCritical(&inBase, env, buf, offset); int rc = opal_datatype_copy_content_same_ddt( &type->super, count, inBuf, ptr); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); (*env)->ReleasePrimitiveArrayCritical(env, buf, inBase, 0); } releaseBuffer(ptr, item); }
static void* getReadPtrvRank( ompi_java_buffer_t **item, JNIEnv *env, jobject buf, int offset, int *counts, int *displs, int size, int rank, MPI_Datatype type, int baseType) { int extent = getTypeExtent(env, type), length = extent * getCountv(counts, displs, size); void *ptr = getBuffer(env, item, length); int rootOff = offset + extent * displs[rank]; if(opal_datatype_is_contiguous_memory_layout(&type->super, counts[rank])) { int rootLength = extent * counts[rank]; void *rootPtr = (char*)ptr + extent * displs[rank]; getArrayRegion(env, buf, baseType, rootOff, rootLength, rootPtr); } else { void *inBuf, *inBase; inBuf = ompi_java_getArrayCritical(&inBase, env, buf, rootOff); int rc = opal_datatype_copy_content_same_ddt( &type->super, counts[rank], ptr, inBuf); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); (*env)->ReleasePrimitiveArrayCritical(env, buf, inBase, JNI_ABORT); } return ptr; }
static void* getReadPtrRank( ompi_java_buffer_t **item, JNIEnv *env, jobject buf, int offset, int count, int size, int rank, MPI_Datatype type, int baseType) { int extent = getTypeExtent(env, type), rLen = extent * count, length = rLen * size, rDispl = rLen * rank, rOff = offset + rDispl; void *ptr = getBuffer(env, item, length); void *rPtr = (char*)ptr + rDispl; if(opal_datatype_is_contiguous_memory_layout(&type->super, count)) { getArrayRegion(env, buf, baseType, rOff, rLen, rPtr); } else { void *bufPtr, *bufBase; bufPtr = ompi_java_getArrayCritical(&bufBase, env, buf, rOff); int rc = opal_datatype_copy_content_same_ddt( &type->super, count, rPtr, bufPtr); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); (*env)->ReleasePrimitiveArrayCritical(env, buf, bufBase, JNI_ABORT); } return ptr; }
static void* getReadPtr(ompi_java_buffer_t **item, JNIEnv *env, jobject buf, int offset, int count, MPI_Datatype type, int baseType) { int length = count * getTypeExtent(env, type); void *ptr = getBuffer(env, item, length); if(opal_datatype_is_contiguous_memory_layout(&type->super, count)) { getArrayRegion(env, buf, baseType, offset, length, ptr); } else { void *inBuf, *inBase; inBuf = ompi_java_getArrayCritical(&inBase, env, buf, offset); int rc = opal_datatype_copy_content_same_ddt( &type->super, count, ptr, inBuf); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); (*env)->ReleasePrimitiveArrayCritical(env, buf, inBase, JNI_ABORT); } return ptr; }
/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( const opal_datatype_t const* pdt, int count ) { OPAL_PTRDIFF_TYPE extent; void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; opal_datatype_type_extent( pdt, &extent ); pdst = malloc( extent * count ); psrc = malloc( extent * count ); { int i; for( i = 0; i < (count * extent); i++ ) ((char*)psrc)[i] = i % 128 + 32; } memset( pdst, 0, count * extent ); cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); free( pdst ); free( psrc ); return OPAL_SUCCESS; }
/** * Conversion function. They deal with data-types in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the * data-type copy function. * - one which use a 2 convertors created with the same data-type * - and one using 2 convertors created from different data-types. * */ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count ) { OPAL_PTRDIFF_TYPE lb, extent; size_t malloced_size; char *odst, *osrc; void *pdst, *psrc; TIMER_DATA_TYPE start, end; long total_time; int errors = 0; malloced_size = compute_memory_size(pdt, count); opal_datatype_get_extent( pdt, &lb, &extent ); odst = (char*)malloc( malloced_size ); osrc = (char*)malloc( malloced_size ); { for( size_t i = 0; i < malloced_size; i++ ) osrc[i] = i % 128 + 32; memcpy(odst, osrc, malloced_size); } pdst = odst - lb; psrc = osrc - lb; cache_trash(); /* make sure the cache is useless */ GET_TIME( start ); if( OPAL_SUCCESS != opal_datatype_copy_content_same_ddt( pdt, count, pdst, psrc ) ) { printf( "Unable to copy the datatype in the function local_copy_ddt_count." " Is the datatype committed ?\n" ); } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); printf( "direct local copy in %ld microsec\n", total_time ); if(outputFlags & VALIDATE_DATA) { for( size_t i = 0; i < malloced_size; i++ ) { if( odst[i] != osrc[i] ) { printf("error at position %lu (%d != %d)\n", (unsigned long)i, (int)(odst[i]), (int)(osrc[i])); errors++; if(outputFlags & QUIT_ON_FIRST_ERROR) { opal_datatype_dump(pdt); assert(0); exit(-1); } } } if( 0 == errors ) { printf("Validation check succesfully passed\n"); } else { printf("Found %d errors. Giving up!\n", errors); exit(-1); } } free( odst ); free( osrc ); return (0 == errors ? OPAL_SUCCESS : errors); }
void ompi_java_releaseWritePtrv( void *ptr, ompi_java_buffer_t *item, JNIEnv *env, jobject buf, jboolean db, int offset, int *counts, int *displs, int size, MPI_Datatype type, int baseType) { if(db || !buf || !ptr) return; int i; int extent = getTypeExtent(env, type); if(opal_datatype_is_contiguous_memory_layout(&type->super, 2)) { for(i = 0; i < size; i++) { int iOff = offset + extent * displs[i], iLen = extent * counts[i]; void *iPtr = (char*)ptr + extent * displs[i]; setArrayRegion(env, buf, baseType, iOff, iLen, iPtr); } } else { void *bufPtr, *bufBase; bufPtr = ompi_java_getArrayCritical(&bufBase, env, buf, offset); for(i = 0; i < size; i++) { int iOff = extent * displs[i]; char *iBuf = iOff + (char*)bufPtr, *iPtr = iOff + (char*)ptr; int rc = opal_datatype_copy_content_same_ddt( &type->super, counts[i], iBuf, iPtr); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); } (*env)->ReleasePrimitiveArrayCritical(env, buf, bufBase, 0); } releaseBuffer(ptr, item); }
static void* getReadPtrvAll( ompi_java_buffer_t **item, JNIEnv *env, jobject buf, int offset, int *counts, int *displs, int size, MPI_Datatype type, int baseType) { int i, extent = getTypeExtent(env, type), length = extent * getCountv(counts, displs, size); void *ptr = getBuffer(env, item, length); if(opal_datatype_is_contiguous_memory_layout(&type->super, 2)) { for(i = 0; i < size; i++) { int iOff = offset + extent * displs[i], iLen = extent * counts[i]; void *iPtr = (char*)ptr + extent * displs[i]; getArrayRegion(env, buf, baseType, iOff, iLen, iPtr); } } else { void *bufPtr, *bufBase; bufPtr = ompi_java_getArrayCritical(&bufBase, env, buf, offset); for(i = 0; i < size; i++) { int iOff = extent * displs[i]; char *iBuf = iOff + (char*)bufPtr, *iPtr = iOff + (char*)ptr; int rc = opal_datatype_copy_content_same_ddt( &type->super, counts[i], iPtr, iBuf); ompi_java_exceptionCheck(env, rc==OPAL_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); } (*env)->ReleasePrimitiveArrayCritical(env, buf, bufBase, JNI_ABORT); } return ptr; }
/* * opal_datatype_sndrcv * * Function: - copy MPI message from buffer into another * - send/recv done if cannot optimize * Accepts: - send buffer * - send count * - send datatype * - receive buffer * - receive count * - receive datatype * - tag * - communicator * Returns: - MPI_SUCCESS or error code */ int32_t ompi_datatype_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf, int32_t rcount, const ompi_datatype_t* rdtype) { opal_convertor_t send_convertor, recv_convertor; struct iovec iov; int length, completed; uint32_t iov_count; size_t max_data; /* First check if we really have something to do */ if (0 == rcount || 0 == rdtype->super.size) { return ((0 == scount || 0 == sdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE); } /* If same datatypes used, just copy. */ if (sdtype == rdtype) { int32_t count = ( scount < rcount ? scount : rcount ); opal_datatype_copy_content_same_ddt(&(rdtype->super), count, (char*)rbuf, (char*)sbuf); return ((scount > rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } /* If receive packed. */ if (rdtype->id == OMPI_DATATYPE_MPI_PACKED) { OBJ_CONSTRUCT( &send_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(sdtype->super), scount, sbuf, 0, &send_convertor ); iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)rbuf; iov.iov_len = scount * sdtype->super.size; if( (int32_t)iov.iov_len > rcount ) iov.iov_len = rcount; opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &send_convertor ); return ((max_data < (size_t)rcount) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } /* If send packed. */ if (sdtype->id == OMPI_DATATYPE_MPI_PACKED) { OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(rdtype->super), rcount, rbuf, 0, &recv_convertor ); iov_count = 1; iov.iov_base = (IOVBASE_TYPE*)sbuf; iov.iov_len = rcount * rdtype->super.size; if( (int32_t)iov.iov_len > scount ) iov.iov_len = scount; opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data ); OBJ_DESTRUCT( &recv_convertor ); return (((size_t)scount > max_data) ? MPI_ERR_TRUNCATE : MPI_SUCCESS); } iov.iov_len = length = 64 * 1024; iov.iov_base = (IOVBASE_TYPE*)malloc( length * sizeof(char) ); OBJ_CONSTRUCT( &send_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_send( ompi_mpi_local_convertor, &(sdtype->super), scount, sbuf, 0, &send_convertor ); OBJ_CONSTRUCT( &recv_convertor, opal_convertor_t ); opal_convertor_copy_and_prepare_for_recv( ompi_mpi_local_convertor, &(rdtype->super), rcount, rbuf, 0, &recv_convertor ); completed = 0; while( !completed ) { iov.iov_len = length; iov_count = 1; max_data = length; completed |= opal_convertor_pack( &send_convertor, &iov, &iov_count, &max_data ); completed |= opal_convertor_unpack( &recv_convertor, &iov, &iov_count, &max_data ); } free( iov.iov_base ); OBJ_DESTRUCT( &send_convertor ); OBJ_DESTRUCT( &recv_convertor ); return ( (scount * sdtype->super.size) <= (rcount * rdtype->super.size) ? MPI_SUCCESS : MPI_ERR_TRUNCATE ); }