int main(int argc, char *argv){
	MPI_Init(NULL, NULL);
	int rank, size;
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	MPI_Datatype new_type;
	int i,j,length = 8;
	int x[length][length];
	int y[length][length];
	for(i=0;i<length;i++){
		for(j=0;j<length;j++){
			x[i][j] = i*j;
			y[i][j] = -1;
		}

	}
	printf("Input matrix:\n");
	for(i=0;i<length;i++){
		for(j=0;j<length;j++){
			printf("%d\t", x[i][j]);
		}
		printf("\n");
	}
	/* Lower Triangular Matrix
	int count = length;
	int blocklength[] = {1,2,3,4,5,6,7,8};
	int displ[] = {0,8,16,24,32,40,48,56};
	*/
	
	/* Upper Triangular Matrix
	int count = length;
	int blocklength[] = {8,7,6,5,4,3,2,1};
	int displ[] = {0,9,18,27,36,45,54,63};
	*/

	int count = length;
	int blocklength[] = {1,1,1,1,1,1,1,1};
	int displ[] = {0,9,18,27,36,45,54,63};
	
	MPI_Type_indexed(count, blocklength, displ, MPI_INT, &new_type);
	MPI_Type_commit(&new_type);

	if(rank == 0){
		MPI_Send(&x[0][0],1, new_type, 1,1, MPI_COMM_WORLD);
	}else{
		MPI_Recv(&y[0][0],1, new_type, 0,1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	}
	printf("Resultant matrix:\n");
	for(i=0;i<length;i++){
		for(j=0;j<length;j++){
			printf("%d\t",y[i][j]);
		}
		printf("\n");

	}

	MPI_Finalize();
	return 0;
}
void ompi_type_indexed_f(MPI_Fint *count, MPI_Fint *array_of_blocklengths,
			MPI_Fint *array_of_displacements, MPI_Fint *oldtype,
			MPI_Fint *newtype, MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Datatype c_old = MPI_Type_f2c(*oldtype);
    MPI_Datatype c_new;
    OMPI_ARRAY_NAME_DECL(array_of_blocklengths);
    OMPI_ARRAY_NAME_DECL(array_of_displacements);

    OMPI_ARRAY_FINT_2_INT(array_of_blocklengths, *count);
    OMPI_ARRAY_FINT_2_INT(array_of_displacements, *count);

    c_ierr = MPI_Type_indexed(OMPI_FINT_2_INT(*count),
                              OMPI_ARRAY_NAME_CONVERT(array_of_blocklengths), 
                              OMPI_ARRAY_NAME_CONVERT(array_of_displacements),
                              c_old, &c_new);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_blocklengths);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(array_of_displacements);

    if (MPI_SUCCESS == c_ierr) {
        *newtype = MPI_Type_c2f(c_new);
    }
}
static void MPI_mask_to_type(MPI_Datatype* newtype, const Array3d<bool, PNX, PNX, PNX>& use) {
    int indices[PNX * PNX * PNX / 8];
    int blocklens[PNX * PNX * PNX / 8];
    int count, index;
    count = 0;
    blocklens[0] = 0;
    indices[0] = 0;
    int sz = 0;
    for (int k0 = 0; k0 < PNX; k0++) {
        for (int j0 = 0; j0 < PNX; j0++) {
            for (int i0 = 0; i0 < PNX; i0++) {
                if (use(i0, j0, k0) == true) {
                    index = i0 + PNX * (j0 + PNX * k0);
                    if (blocklens[count] + indices[count] == index) {
                        blocklens[count]++;
                        sz++;
                    } else {
                        sz++;
                        count++;
                        blocklens[count] = 1;
                        indices[count] = index;
                    }
                }
            }
        }
    }
    count++;
    MPI_Type_indexed(count, blocklens, indices, MPI_DOUBLE_PRECISION, newtype);
    MPI_Type_commit(newtype);

}
int fill_lcc_elems_points(char* file_in, int read_key, int myrank, int nprocs, int nintci, int nintcf, int **lcc,
        int points_count, int** points, int* elems, int *local_global_index,  int **local_global_index_g,
        int **lcc_g, int points_count_g, int** points_g, int **elems_g, int *int_cells_per_proc) {
    int proc=0, i=0;
    MPI_Status status;
    MPI_Datatype index_type;
    
    if (read_key == POSL_INIT_ONE_READ) {
        if (myrank == 0) {
            for (proc=1; proc<nprocs; ++proc) {
                for (i=0; i<int_cells_per_proc[proc]; ++i) {
                    MPI_Send(lcc_g[local_global_index_g[proc][i]], 6, MPI_INT, proc, 
                            POSL_MPI_TAG_LCC, MPI_COMM_WORLD);
                }
                
                for (i=0; i<points_count; i++) {
                    MPI_Send(points_g[i], 3, MPI_INT, proc, POSL_MPI_TAG_POINTS, MPI_COMM_WORLD);
                }
                
                //create and register new datatype within mpi
                int mpi_block_length[int_cells_per_proc[proc]], mpi_displacements[int_cells_per_proc[proc]];
                for (i=0; i<int_cells_per_proc[proc]; ++i) {
                    mpi_block_length[i] = 8;
                    mpi_displacements[i] = 8*local_global_index_g[proc][i];
                }
                MPI_Type_indexed(int_cells_per_proc[proc], mpi_block_length, mpi_displacements, 
                        MPI_INT, &index_type);
                MPI_Type_commit(&index_type);
                
                MPI_Send(*(elems_g), 1, index_type, proc, POSL_MPI_TAG_ELEMENTS, MPI_COMM_WORLD);
            }
        } else {
            for (i=nintci; i<nintcf+1; ++i) {
                MPI_Recv(lcc[i], 6, MPI_INT, 0, POSL_MPI_TAG_LCC, MPI_COMM_WORLD, &status);
            }
            for (i=0; i<points_count; i++) {
                MPI_Recv(points[i], 3, MPI_INT, 0, POSL_MPI_TAG_POINTS, MPI_COMM_WORLD, &status);
            }
            MPI_Recv(elems, (nintcf+1)*8, MPI_INT, 0, POSL_MPI_TAG_ELEMENTS, MPI_COMM_WORLD, &status);
        }
    }
    if (read_key == POSL_INIT_ONE_READ && myrank == 0) {
        for(i=nintci; i<nintcf+1; i++) {
            memcpy(lcc[i], lcc_g[local_global_index[i]], 6*sizeof(int));
            memcpy(&(elems[8*i]), &(*elems_g)[local_global_index[i]*8], 8*sizeof(int));
        }
        for (i=0; i<points_count; i++) {
            memcpy(points[i], points_g[i], 3*sizeof(int));
        }
    }
    if (read_key == POSL_INIT_ALL_READ) {
        read_lcc_local(file_in, nintci, nintcf, lcc, local_global_index);
        for (i=0; i<points_count; i++) {
            memcpy(points[i], points_g[i], 3*sizeof(int));
        }
    }
    return POSL_OK;
}
Beispiel #5
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int position, pack_size, i;
    int dis[2], blklens[2];
    MPI_Datatype type;
    int send_buffer[60];
    int recv_buffer[60];
    int pack_buffer[1000];

    MTest_Init(&argc, &argv);

    /* Initialize data in the buffers */
    for (i = 0; i < 60; i++) {
        send_buffer[i] = i;
        recv_buffer[i] = -1;
        pack_buffer[i] = -2;
    }

    /* Create an indexed type with an empty first block */
    dis[0] = 0;
    dis[1] = 20;

    blklens[0] = 0;
    blklens[1] = 40;

    MPI_Type_indexed(2, blklens, dis, MPI_INT, &type);
    MPI_Type_commit(&type);

    position = 0;
    MPI_Pack(send_buffer, 1, type, pack_buffer, sizeof(pack_buffer), &position, MPI_COMM_WORLD);
    pack_size = position;
    position = 0;
    MPI_Unpack(pack_buffer, pack_size, &position, recv_buffer, 1, type, MPI_COMM_WORLD);

    /* Check that the last 40 entries of the recv_buffer have the corresponding
     * elements from the send buffer */
    for (i = 0; i < 20; i++) {
        if (recv_buffer[i] != -1) {
            errs++;
            fprintf(stderr, "recv_buffer[%d] = %d, should = -1\n", i, recv_buffer[i]);
        }
    }
    for (i = 20; i < 60; i++) {
        if (recv_buffer[i] != i) {
            errs++;
            fprintf(stderr, "recv_buffer[%d] = %d, should = %d\n", i, recv_buffer[i], i);
        }
    }
    MPI_Type_free(&type);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;

}
Beispiel #6
0
/*
 * Setup indexed type info and handlers.
 *
 * A indexed datatype is created by using following parameters.
 * nblock:   Number of blocks.
 * blocklen: Number of elements in each block. Each block has the same length.
 * stride:   Strided number of elements between two adjacent blocks. The
 *           displacement of each block is set as (index of current block * stride).
 * lb:       Lower bound of the new datatype.
 * oldtype:  Datatype of element.
 */
static int MTestTypeIndexedCreate(MPI_Aint nblock, MPI_Aint blocklen, MPI_Aint stride, MPI_Aint lb,
                                  MPI_Datatype oldtype, const char *typename_prefix,
                                  MTestDatatype * mtype)
{
    int merr = 0;
    char type_name[128];
    int i;

    MTestTypeReset(mtype);

    merr = MPI_Type_size(oldtype, &mtype->basesize);
    if (merr)
        MTestPrintError(merr);

    mtype->displs = (int *) malloc(nblock * sizeof(int));
    mtype->displ_in_bytes = (MPI_Aint *) malloc(nblock * sizeof(MPI_Aint));
    mtype->index = (int *) malloc(nblock * sizeof(int));
    if (!mtype->displs || !mtype->displ_in_bytes || !mtype->index) {
        char errmsg[128] = { 0 };
        sprintf(errmsg, "Out of memory in %s", __FUNCTION__);
        MTestError(errmsg);
    }

    mtype->nblock = nblock;
    for (i = 0; i < nblock; i++) {
        mtype->index[i] = blocklen;
        mtype->displs[i] = lb + stride * i;     /*stride between the start of two blocks */
        mtype->displ_in_bytes[i] = (lb + stride * i) * mtype->basesize;
    }

    /* Indexed uses displacement in oldtypes */
    merr = MPI_Type_indexed(nblock, mtype->index, mtype->displs, oldtype, &mtype->datatype);
    if (merr)
        MTestPrintError(merr);
    merr = MPI_Type_commit(&mtype->datatype);
    if (merr)
        MTestPrintError(merr);

    memset(type_name, 0, sizeof(type_name));
    sprintf(type_name, "%s %s (%ld nblock %ld blocklen %ld stride %ld lb)", typename_prefix,
            "index", nblock, blocklen, stride, lb);
    merr = MPI_Type_set_name(mtype->datatype, (char *) type_name);
    if (merr)
        MTestPrintError(merr);

    mtype->InitBuf = MTestTypeIndexedInit;
    mtype->FreeBuf = MTestTypeFree;
    mtype->CheckBuf = MTestTypeIndexedCheckbuf;

    return merr;
}
Beispiel #7
0
int main(int argc, char *argv[])
{
    int i,j,N=5;
    double A[N][N],B[N][N];
    MPI_Status status;
    MPI_Datatype type;
    int len[N],disp[N];

    MPI_Init(&argc, &argv);
    for (i=0;i<N;i++)
	for (j=0;j<N;j++){
	    A[i][j]=10*i+j;
	}
    printf("A=\n");
    for(i=0;i<N;i++)
    {
	for(j=0;j<N;j++)
    	    printf("%d ",(int)A[i][j]);
	printf("\n");
    }
    len[0]=1;
    disp[0]=0;
    for(i=1;i<N;i++)
    {
	len[i]=len[i-1]+1;
	disp[i]=i*N;
    }
    MPI_Type_indexed(N,len,disp,MPI_DOUBLE, &type);
    MPI_Type_commit(&type);
    MPI_Sendrecv(A,1,type, 0, 111,
	    B,1,type,0,111,
	    MPI_COMM_SELF,&status);
    MPI_Type_free(&type);
    printf("B=\n");
    for(i=0;i<N;i++)
    {
	for(j=0;j<=i;j++)
	    printf("%d ",(int)B[i][j]);
	printf("\n");
    }
    MPI_Finalize();
    return 0;
}
/*
 * Class:     mpi_Datatype
 * Method:    GetIndexed
 * Signature: (I[I[I)V
 */
JNIEXPORT void JNICALL Java_mpi_Datatype_GetIndexed
  (JNIEnv *env, jobject jthis, jintArray blocklengths, jintArray
displacements, jobject oldtype)
{
  MPI_Datatype type;
  int count=(*env)->GetArrayLength(env,blocklengths);
  jboolean isCopy=JNI_TRUE;
  jint *lengths; jint *disps;

  clearFreeList(env) ;

  lengths=(*env)->GetIntArrayElements(env,blocklengths,&isCopy);
  disps = (*env)->GetIntArrayElements(env,displacements,&isCopy);
  MPI_Type_indexed(count, (int*)lengths, (int*)disps, 
    (MPI_Datatype)((*env)->GetLongField(env,oldtype,DatatypehandleID)), &type);
  (*env)->ReleaseIntArrayElements(env,blocklengths,lengths,0);
  (*env)->ReleaseIntArrayElements(env,displacements,disps,0);

  (*env)->SetLongField(env,jthis, DatatypehandleID, (jlong)type);
}
JNIEXPORT jlong JNICALL Java_mpi_Datatype_getIndexed(
        JNIEnv *env, jclass clazz, jintArray blockLengths,
        jintArray disps, jlong oldType)
{
    MPI_Datatype type;
    int count = (*env)->GetArrayLength(env, blockLengths);

    jint *jBlockLengths, *jDispl;
    int  *cBlockLengths, *cDispl;
    ompi_java_getIntArray(env, blockLengths, &jBlockLengths, &cBlockLengths);
    ompi_java_getIntArray(env, disps, &jDispl, &cDispl);

    int rc = MPI_Type_indexed(count, cBlockLengths, cDispl,
                              (MPI_Datatype)oldType, &type);

    ompi_java_exceptionCheck(env, rc);
    ompi_java_forgetIntArray(env, blockLengths, jBlockLengths, cBlockLengths);
    ompi_java_forgetIntArray(env, disps, jDispl, cDispl);
    return (jlong)type;
}
   main(int argc, char *argv[])  {
   int numtasks, rank, source=0, dest, tag=1, i;
   int blocklengths[2], displacements[2];
   float a[16] = 
     {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 
      9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
   float b[NELEMENTS]; 

   MPI_Status stat;
   MPI_Datatype indextype;   // required variable

   MPI_Init(&argc,&argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &numtasks);

   blocklengths[0] = 4;
   blocklengths[1] = 2;
   displacements[0] = 5;
   displacements[1] = 12;
   
   // create indexed derived data type
   MPI_Type_indexed(2, blocklengths, displacements, MPI_FLOAT, &indextype);
   MPI_Type_commit(&indextype);

   if (rank == 0) {
     for (i=0; i<numtasks; i++) 
      // task 0 sends one element of indextype to all tasks
        MPI_Send(a, 1, indextype, i, tag, MPI_COMM_WORLD);
     }

   // all tasks receive indextype data from task 0
   MPI_Recv(b, NELEMENTS, MPI_FLOAT, source, tag, MPI_COMM_WORLD, &stat);
   printf("rank= %d  b= %3.1f %3.1f %3.1f %3.1f %3.1f %3.1f\n",
          rank,b[0],b[1],b[2],b[3],b[4],b[5]);
   
   // free datatype when done using it
   MPI_Type_free(&indextype);
   MPI_Finalize();
   }
Beispiel #11
0
EXPORT_MPI_API void FORTRAN_API mpi_type_indexed_( MPI_Fint *count, MPI_Fint blocklens[], MPI_Fint indices[], MPI_Fint *old_type, MPI_Fint *newtype, MPI_Fint *__ierr )
{
    int          i;
    int          *l_blocklens = 0;
    int          local_l_blocklens[MPIR_USE_LOCAL_ARRAY];
    int          *l_indices = 0;
    int          local_l_indices[MPIR_USE_LOCAL_ARRAY];
    MPI_Datatype ldatatype;
    static char myname[] = "MPI_TYPE_INDEXED";

    if ((int)*count > 0) {
        if ((int)*count > MPIR_USE_LOCAL_ARRAY) {
            MPIR_FALLOC(l_blocklens,(int *) MALLOC( *count * sizeof(int) ),
                        MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname );

            MPIR_FALLOC(l_indices,(int *) MALLOC( *count * sizeof(int) ),
                        MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, myname );
        }
        else {
            l_blocklens = local_l_blocklens;
            l_indices = local_l_indices;
        }

        for (i=0; i<(int)*count; i++) {
            l_indices[i] = (int)indices[i];
            l_blocklens[i] = (int)blocklens[i];
        }
    }

    *__ierr = MPI_Type_indexed((int)*count, l_blocklens, l_indices,
                               MPI_Type_f2c(*old_type),
                               &ldatatype);
    if ((int)*count > MPIR_USE_LOCAL_ARRAY) {
        FREE( l_indices );
        FREE( l_blocklens );
    }
    *newtype = MPI_Type_c2f(ldatatype);
}
int main(int argc, char **argv) {
    int rank, nranks, rank_world, nranks_world;
    int i, j, peer, bufsize, errors;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;
    MPI_Comm shr_comm;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks_world);

    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shr_comm);

    MPI_Comm_rank(shr_comm, &rank);
    MPI_Comm_size(shr_comm, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    MPI_Win_allocate_shared(bufsize, 1, MPI_INFO_NULL, shr_comm, &win_buf, &buf_win);

    MPI_Win_fence(0, buf_win);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf + i) = -1.0;
        *(src_buf + i) =  1.0 + rank;
    }

    MPI_Win_fence(0, buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided accumulate operations */

    for (i = 0; i < ITERATIONS; i++) {
        int idx_rem[SUB_YDIM];
        int blk_len[SUB_YDIM];
        MPI_Datatype src_type, dst_type;

        for (j = 0; j < SUB_YDIM; j++) {
            idx_rem[j] = j*XDIM;
            blk_len[j] = SUB_XDIM;
        }

        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &src_type);
        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &dst_type);

        MPI_Type_commit(&src_type);
        MPI_Type_commit(&dst_type);

        /* PUT */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_REPLACE, buf_win);
        MPI_Win_unlock(peer, buf_win);

        /* GET */
        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
        MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0,
                           1, dst_type, MPI_NO_OP, buf_win);
        MPI_Win_unlock(peer, buf_win);

        MPI_Type_free(&src_type);
        MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
        for (j = 0; j < SUB_YDIM; j++) {
            const double actual   = *(win_buf + i + j*XDIM);
            const double expected = (1.0 + ((rank+nranks-1)%nranks));
            if (fabs(actual - expected) > 1.0e-10) {
                SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
                                rank, j, i, expected, actual); );
                errors++;
                fflush(stdout);
            }
        }
Beispiel #13
0
int main(int argc, char *argv[])
{
    int errs = 0, err;
    int i, rank, size, source, dest;
    int blksize, totsize;
    int *recvBuf = 0, *srcBuf = 0;
    MPI_Comm comm;
    MPI_Win win;
    MPI_Aint extent;
    MPI_Datatype originType;
    int counts[2];
    int displs[2];

    MTest_Init(&argc, &argv);

    /* Select the communicator and datatypes */
    comm = MPI_COMM_WORLD;

    /* Create the datatype */
    /* One MPI Implementation fails this test with sufficiently large
     * values of blksize - it appears to convert this type to an
     * incorrect contiguous move */
    blksize = 2048;
    counts[0] = blksize;
    counts[1] = blksize;
    displs[0] = 0;
    displs[1] = blksize + 1;
    MPI_Type_indexed(2, counts, displs, MPI_INT, &originType);
    MPI_Type_commit(&originType);

    totsize = 2 * blksize;

    /* Determine the sender and receiver */
    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);
    source = 0;
    dest = size - 1;

    recvBuf = (int *) malloc(totsize * sizeof(int));
    srcBuf = (int *) malloc((totsize + 1) * sizeof(int));

    if (!recvBuf || !srcBuf) {
        fprintf(stderr, "Could not allocate buffers\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Initialize the send and recv buffers */
    for (i = 0; i < totsize; i++) {
        recvBuf[i] = -1;
    }
    for (i = 0; i < blksize; i++) {
        srcBuf[i] = i;
        srcBuf[blksize + 1 + i] = blksize + i;
    }
    srcBuf[blksize] = -1;

    MPI_Type_extent(MPI_INT, &extent);
    MPI_Win_create(recvBuf, totsize * extent, extent, MPI_INFO_NULL, comm, &win);
    MPI_Win_fence(0, win);
    if (rank == source) {
        /* To improve reporting of problems about operations, we
         * change the error handler to errors return */
        MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN);

        err = MPI_Put(srcBuf, 1, originType, dest, 0, totsize, MPI_INT, win);
        errs += CheckMPIErr(err);
        err = MPI_Win_fence(0, win);
        errs += CheckMPIErr(err);
    }
    else if (rank == dest) {
        MPI_Win_fence(0, win);
        for (i = 0; i < totsize; i++) {
            if (recvBuf[i] != i) {
                errs++;
                if (errs < 10) {
                    printf("recvBuf[%d] = %d should = %d\n", i, recvBuf[i], i);
                }
            }
        }
    }
    else {
        MPI_Win_fence(0, win);
    }

    MPI_Type_free(&originType);
    MPI_Win_free(&win);
    free(recvBuf);
    free(srcBuf);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Beispiel #14
0
int main(int argc, char *argv[])
{
    /* Variable declarations */
    int a[100][100], b[100][100];
    int disp[100], block[100];
    MPI_Datatype ltype;
	
    int bufsize, position = 0;
    void *buffer;
	
    int i, j, errs = 0;
	
    /* Initialize a to some known values and zero out b. */
    for(i = 0; i < 100; i++) {
	for(j = 0; j < 100; j++) {
	    a[i][j] = 1000*i + j;
	    b[i][j] = 0;
	}
    }
	
    /* Initialize MPI */
    MTest_Init( &argc, &argv );
  
    parse_args(argc, argv);

    for(i = 0; i < 100; i++) {
	/* Fortran version has disp(i) = 100*(i-1) + i and block(i) = 100-i. */
	/* This code here is wrong. It compacts everything together,
	 * which isn't what we want.
	 * What we want is to put the lower triangular values into b and leave
	 * the rest of it unchanged, right?
	 */
	block[i] = i+1;
	disp[i] = 100*i;
    }
	
    /* Create datatype for lower triangular part. */
    MPI_Type_indexed(100, block, disp, MPI_INT, &ltype);
    MPI_Type_commit(&ltype);
	
    /* Pack it. */
    MPI_Pack_size(1, ltype, MPI_COMM_WORLD, &bufsize);
    buffer = (void *) malloc((unsigned) bufsize);
    MPI_Pack( a, 1, ltype, buffer, bufsize, &position, MPI_COMM_WORLD );
	
    /* Unpack the buffer into b. */
    position = 0;
    MPI_Unpack(buffer, bufsize, &position, b, 1, ltype, MPI_COMM_WORLD);
	
    for(i = 0; i < 100; i++) {
	for(j = 0; j < 100; j++) {
	    if (j > i && b[i][j] != 0) {
		errs++;
		if (verbose) fprintf(stderr, "b[%d][%d] = %d; should be %d\n",
				     i, j, b[i][j], 0);
	    }
	    else if (j <= i && b[i][j] != 1000*i + j) {
		errs++;
		if (verbose) fprintf(stderr, "b[%d][%d] = %d; should be %d\n",
				     i, j, b[i][j], 1000*i + j);
	    }
	}
    }

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Beispiel #15
0
int main(int argc, char **argv)
{
    int itr, i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *src_buf;
    MPI_Win buf_win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);

    for (i = 0; i < XDIM * YDIM; i++) {
        *(win_buf + i) = -1.0;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank + 1) % nranks;

    /* Perform ITERATIONS strided accumulate operations */

    for (itr = 0; itr < ITERATIONS; itr++) {
        MPI_Aint idx_loc[SUB_YDIM];
        int idx_rem[SUB_YDIM];
        int blk_len[SUB_YDIM];
        MPI_Datatype src_type, dst_type;

        for (i = 0; i < SUB_YDIM; i++) {
            MPI_Get_address(&src_buf[i * XDIM], &idx_loc[i]);
            idx_rem[i] = i * XDIM;
            blk_len[i] = SUB_XDIM;
        }

#ifdef ABSOLUTE
        MPI_Type_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_DOUBLE, &src_type);
#else
        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &src_type);
#endif
        MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &dst_type);

        MPI_Type_commit(&src_type);
        MPI_Type_commit(&dst_type);

        MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);

#ifdef ABSOLUTE
        MPI_Accumulate(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, MPI_SUM, buf_win);
#else
        MPI_Accumulate(src_buf, 1, src_type, peer, 0, 1, dst_type, MPI_SUM, buf_win);
#endif

        MPI_Win_unlock(peer, buf_win);

        MPI_Type_free(&src_type);
        MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
        for (j = 0; j < SUB_YDIM; j++) {
            const double actual = *(win_buf + i + j * XDIM);
            const double expected = -1.0 + (1.0 + ((rank + nranks - 1) % nranks)) * (ITERATIONS);
            if (fabs(actual - expected) > 1.0e-10) {
                SQUELCH(printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
                               rank, j, i, expected, actual););
                errors++;
                fflush(stdout);
            }
        }
Beispiel #16
0
/** 
 * Start communication among neighbouring processors.
 *
 * If the self->ctype is set to P2P, then the data are copied to the
 * send buffer, self->buf_send, otherwise, the data are sent directly.
 *
 * The parameter, pos, indicates the offset of the parameter, data, to
 * the beginning of the local part of the distributed vector and
 * matrix.
 *
 * @param self A communication handler.
 * @param data The data to be sent.
 * @param pos  The distance between the beginning of the parameter
 *             data and the beginning of the local part of the
 *             distirbuted vector.
 *
 * @return 0 on success.
 */
int parms_CommDataBegin(parms_Comm self, void *data, int pos)
{
    FLOAT *data_send;
    MPI_Comm comm;
    int i, j, index, start, end, length, tag;

    data_send = (FLOAT *)data;
    comm = self->comm;
    tag = 100;
#if defined(DBL_CMPLX)  
    for (i = 0; i < self->nprecv; i++) {
        start = self->ptrvrecv[i];
        end = self->ptrvrecv[i+1];
        length = end - start;
        MPI_Irecv(&self->buf_recv[start], length, MPI_CMPLX,
                  self->procs_recv[i], tag, comm, &self->req_recv[i]);
    }
    if (self->ctype == P2P) {
        for (i = 0; i < self->npsend; i++) {
            start  = self->ptrvsend[i];
            end    = self->ptrvsend[i+1];
            length = end - start;
            /* copy data to the send buffer */
            for (j = start; j < end; j++) {
                index = self->vlist_send[j];
                self->buf_send[j] = data_send[index-pos];
            }
            MPI_Isend(&self->buf_send[start], length, MPI_CMPLX,
                      self->procs_send[i], tag, comm, &self->req_send[i]);
        }
    }
    else if (self->ctype == DERIVED) {
        if (self->isdt_alloc == false) {
            int *blen, *disp_index;

            if (self->npsend) {
                PARMS_NEWARRAY(blen,       self->mdata_send);
                PARMS_NEWARRAY(disp_index, self->mdata_send);
                PARMS_NEWARRAY(self->dtype_send, self->npsend);

            }
            for (i = 0; i < self->npsend; i++) {
                start = self->ptrvsend[i];
                end   = self->ptrvsend[i+1];
                length = end - start;
                for (j = start; j < end; j++) {
                    disp_index[j-start] = self->vlist_send[j]-pos;
                    blen[j-start] = 1;
                }
                MPI_Type_indexed(length, blen, disp_index, MPI_CMPLX,
                                 &self->dtype_send[i]);
                MPI_Type_commit(&self->dtype_send[i]);
            }
            self->isdt_alloc = true;
        }

        for (i = 0; i < self->npsend; i++) {
            MPI_Isend(data_send-pos, 1, self->dtype_send[i],
                      self->procs_send[i], tag, comm, &self->req_send[i]);
        }
    }
#else
    for (i = 0; i < self->nprecv; i++) {
        start = self->ptrvrecv[i];
        end = self->ptrvrecv[i+1];
        length = end - start;
        MPI_Irecv(&self->buf_recv[start], length, MPI_DOUBLE,
                  self->procs_recv[i], tag, comm, &self->req_recv[i]);//int MPI_Irecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request)
    }
    if (self->ctype == P2P) {
        for (i = 0; i < self->npsend; i++) {
            start  = self->ptrvsend[i];
            end    = self->ptrvsend[i+1];
            length = end - start;
            /* copy data to the send buffer */
            for (j = start; j < end; j++) {
                index = self->vlist_send[j];
                self->buf_send[j] = data_send[index-pos];
            }
            MPI_Isend(&self->buf_send[start], length, MPI_DOUBLE,
                      self->procs_send[i], tag, comm, &self->req_send[i]); //int MPI_Isend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request)
        }
    }
    else if (self->ctype == DERIVED) {
        if (self->isdt_alloc == false) {
            int *blen, *disp_index;

            if (self->npsend) {
                PARMS_NEWARRAY(blen,       self->mdata_send);
                PARMS_NEWARRAY(disp_index, self->mdata_send);
                PARMS_NEWARRAY(self->dtype_send, self->npsend);

            }
            for (i = 0; i < self->npsend; i++) {
                start = self->ptrvsend[i];
                end   = self->ptrvsend[i+1];
                length = end - start;
                for (j = start; j < end; j++) {
                    disp_index[j-start] = self->vlist_send[j]-pos;
                    blen[j-start] = 1;
                }
                MPI_Type_indexed(length, blen, disp_index, MPI_DOUBLE,
                                 &self->dtype_send[i]);
                MPI_Type_commit(&self->dtype_send[i]);
            }
            self->isdt_alloc = true;
        }

        for (i = 0; i < self->npsend; i++) {
            MPI_Isend(data_send-pos, 1, self->dtype_send[i],
                      self->procs_send[i], tag, comm, &self->req_send[i]);
        }
    }
#endif

    return 0;
}
Beispiel #17
0
int compute_solution(int nprocs, int myrank, const int max_iters, int nintci, int nintcf, int nextcf, int** lcc, double* bp,
                     double* bs, double* bw, double* bl, double* bn, double* be, double* bh,
                     double* cnorm, double* var, double *su, double* cgup, double* residual_ratio,
                     int* local_global_index, int* global_local_index, int nghb_cnt, 
                     int* nghb_to_rank, int* send_cnt, int** send_lst, int *recv_cnt, int** recv_lst){
    /** parameters used in gccg */
    int iter = 1;
    int if1 = 0;
    int if2 = 0;
    int nor = 1;
    int nor1 = nor - 1;
    int nc = 0;
    int nomax = 3;
    
    /** the reference residual */
    double resref = 0.0;

    /** array storing residuals */
    double *resvec = (double *) calloc(sizeof(double), (nintcf + 1));

    // initialize the reference residual
    for ( nc = nintci; nc <= nintcf; nc++ ) {
        resvec[nc] = su[nc];
        resref = resref + resvec[nc] * resvec[nc];
    }
    
    // A2.3
    double global_resref = 0;
    MPI_Allreduce(&resref, &global_resref, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    resref = global_resref;
    

    resref = sqrt(resref);
    if ( resref < 1.0e-15 ) {
        fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref);
        return 0;
    }

    
    // Counting the number of ghost cells to extend the direc1
    int ghost_cells_recv = 0, ghost_cells_send = 0;
    int proc, i, j;
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      ghost_cells_recv += recv_cnt[proc];
      ghost_cells_send += send_cnt[proc];
    }
    
    
    /** the computation vectors */
    // TODO:
    double *direc1 = (double *) calloc(sizeof(double), ((nextcf + 1) + ghost_cells_recv));
    double *direc2 = (double *) calloc(sizeof(double), (nextcf + 1));
    double *adxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *adxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    
    // Determine displacements for sending
    int **displacements = (int **) malloc(sizeof(double)*nghb_cnt);
    int **blocklenghts = (int **) malloc(sizeof(double)*nghb_cnt);
    
    for(proc = 0; proc < nghb_cnt; proc++) {
      displacements[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
      blocklenghts[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
    }
    
    j = 0;
    for (proc = 0; proc < nghb_cnt; proc++) {
      for (i = 0; i < send_cnt[proc]; i++) {
	displacements[proc][i] = global_local_index[send_lst[proc][i]];
	blocklenghts[proc][i] = 1;
      }
    }
    
    MPI_Request request;
    MPI_Datatype *indextype;
    indextype = (MPI_Datatype *) malloc(sizeof(*indextype)*nghb_cnt);
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      MPI_Type_indexed(send_cnt[proc], blocklenghts[proc], displacements[proc], MPI_DOUBLE, &(indextype[proc]));
      MPI_Type_commit(&(indextype[proc]));
    }
    
    // Testing
//     for ( nc = nintci; nc <= nintcf; nc++ ) {
// 	printf("nc = %d, %d %d %d %d %d \n", nc, lcc[nc][0], lcc[nc][1], lcc[nc][2], lcc[nc][3], lcc[nc][4], lcc[nc][5], lcc[nc][5]);
//     }


    while ( iter < max_iters ) {
        /**********  START COMP PHASE 1 **********/
        // update the old values of direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc];
        }

	  // Communication of direc1 - start
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
// 	    MPI_Type_indexed(send_cnt[proc], blocklenghts[proc], displacements[proc], MPI_DOUBLE, &(indextype[proc]));
// 	    MPI_Type_commit(&(indextype[proc]));
	    //MPI_Send(direc1, 1, indextype, nghb_to_rank[proc], 0, MPI_COMM_WORLD);
	    MPI_Isend(direc1, 1, indextype[proc], nghb_to_rank[proc], 0, MPI_COMM_WORLD, &request);
	  }
	  
	  // Reference position in the direc1
	  int ref_pos = nextcf + 1;
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
	    MPI_Recv(&(direc1[ref_pos]), recv_cnt[proc], MPI_DOUBLE, nghb_to_rank[proc], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	    ref_pos += recv_cnt[proc];
	  }
	  
	  // Communication of direc1 - stop
        
        // compute new guess (approximation) for direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
//             direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[lcc[nc][0]]
//                          - be[nc] * direc1[lcc[nc][1]] - bn[nc] * direc1[lcc[nc][2]]
//                          - bw[nc] * direc1[lcc[nc][3]] - bl[nc] * direc1[lcc[nc][4]]
//                          - bh[nc] * direc1[lcc[nc][5]];
	  //printf("nextcf = %d\n", (nextcf + 1));
	  //printf("nc = %d, %d %d %d %d %d %d %d \n", nc, global_local_index[lcc[nc][0]], global_local_index[lcc[nc][1]], global_local_index[lcc[nc][2]], global_local_index[lcc[nc][3]], global_local_index[lcc[nc][4]], global_local_index[lcc[nc][5]], lcc[nc][5]);
	  

		
            direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[global_local_index[lcc[nc][0]]]
                         - be[nc] * direc1[global_local_index[lcc[nc][1]]] - bn[nc] * direc1[global_local_index[lcc[nc][2]]]
                         - bw[nc] * direc1[global_local_index[lcc[nc][3]]] - bl[nc] * direc1[global_local_index[lcc[nc][4]]]
                         - bh[nc] * direc1[global_local_index[lcc[nc][5]]];
			
			 
        }
        
        
        /********** END COMP PHASE 1 **********/

        /********** START COMP PHASE 2 **********/
        // execute normalization steps
        double oc1, oc2, occ;
        if ( nor1 == 1 ) {
            oc1 = 0;
            occ = 0;

            for ( nc = nintci; nc <= nintcf; nc++ ) {
                occ = occ + direc2[nc] * adxor1[nc];
            }
            
            // A2.3
            double global_occ = 0.0;
	    MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	    occ = global_occ;

            oc1 = occ / cnorm[1];
            for ( nc = nintci; nc <= nintcf; nc++ ) {
                direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
                direc1[nc] = direc1[nc] - oc1 * dxor1[nc];
            }

            if1++;
        } else {
            if ( nor1 == 2 ) {
                oc1 = 0;
                occ = 0;

                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor1[nc];
                }
                
                // A2.3
		double global_occ = 0.0;
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;
		

                oc1 = occ / cnorm[1];
                oc2 = 0;
                occ = 0;
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor2[nc];
                }
                
                // A2.3
	
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;

                oc2 = occ / cnorm[2];
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
                    direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
                }

                if2++;
            }
        }

        // compute the new residual
        cnorm[nor] = 0;
        double omega = 0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
            omega = omega + resvec[nc] * direc2[nc];
        }
        
	// A2.3
	double global_cnorm_nor = 0.0, global_omega = 0.0;
	MPI_Allreduce(&(cnorm[nor]), &global_cnorm_nor, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	MPI_Allreduce(&omega, &global_omega, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	cnorm[nor] = global_cnorm_nor;
	omega = global_omega;

        omega = omega / cnorm[nor];
        double res_updated = 0.0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            resvec[nc] = resvec[nc] - omega * direc2[nc];
            res_updated = res_updated + resvec[nc] * resvec[nc];
            var[nc] = var[nc] + omega * direc1[nc];
        }
        
	// A2.3
	double global_res_updated = 0.0;
	MPI_Allreduce(&res_updated, &global_res_updated, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	res_updated = global_res_updated;

        res_updated = sqrt(res_updated);
        *residual_ratio = res_updated / resref;

        // exit on no improvements of residual
        if ( *residual_ratio <= 1.0e-10 ) break;

        iter++;

        // prepare additional arrays for the next iteration step
        if ( nor == nomax ) {
            nor = 1;
        } else {
            if ( nor == 1 ) {
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    dxor1[nc] = direc1[nc];
                    adxor1[nc] = direc2[nc];
                }
            } else {
                if ( nor == 2 ) {
                    for ( nc = nintci; nc <= nintcf; nc++ ) {
                        dxor2[nc] = direc1[nc];
                        adxor2[nc] = direc2[nc];
                    }
                }
            }

            nor++;
        }
        nor1 = nor - 1;
        /********** END COMP PHASE 2 **********/
    }

    for (i = 0; i < nghb_cnt; i++){
      free(displacements[i]);
    }

    free(displacements);
    
    free(indextype);
    
    free(direc1);
    free(direc2);
    free(adxor1);
    free(adxor2);
    free(dxor1);
    free(dxor2);
    free(resvec);

    return iter;
}
Beispiel #18
0
/* indexed_of_vectors_test()
 *
 * Builds an indexed type of vectors of ints.
 *
 * MPICH1 fails this test because it converts the vectors into hvectors.
 *
 * Returns the number of errors encountered.
 */
int indexed_of_vectors_test(void)
{
    MPI_Datatype inner_vector, inner_vector_copy;
    MPI_Datatype outer_indexed;
    
    int i_count = 3, i_blocklengths[3] = { 3, 2, 1 };
    int i_displacements[3] = { 10, 20, 30 };

    int nints, nadds, ntypes, combiner, *ints;
    MPI_Aint *adds = NULL;
    MPI_Datatype *types;

    int err, errs = 0;

    /* set up type */
    err = MPI_Type_vector(2,
			  1,
			  2,
			  MPI_INT,
			  &inner_vector);
    if (err != MPI_SUCCESS) {
	if (verbose) fprintf(stderr, 
			     "error in MPI call; aborting after %d errors\n",
			     errs+1);
	return errs+1;
    }

    err = MPI_Type_indexed(i_count,
			   i_blocklengths,
			   i_displacements,
			   inner_vector,
			   &outer_indexed);
    if (err != MPI_SUCCESS) {
	if (verbose) fprintf(stderr, 
			     "error in MPI call; aborting after %d errors\n",
			     errs+1);
	return errs+1;
    }

    /* decode outer vector (get envelope, then contents) */
    err = MPI_Type_get_envelope(outer_indexed,
				&nints,
				&nadds,
				&ntypes,
				&combiner);
    if (err != MPI_SUCCESS) {
	if (verbose) fprintf(stderr, 
			     "error in MPI call; aborting after %d errors\n",
			     errs+1);
	return errs+1;
    }

    if (nints != 7) errs++;
    if (nadds != 0) errs++;
    if (ntypes != 1) errs++;
    if (combiner != MPI_COMBINER_INDEXED) errs++;

    if (verbose) {
        if (nints != 7) fprintf(stderr, "nints = %d; should be 7\n", nints);
	if (nadds != 0) fprintf(stderr, "nadds = %d; should be 0\n", nadds);
	if (ntypes != 1) fprintf(stderr, "ntypes = %d; should be 1\n", ntypes);
	if (combiner != MPI_COMBINER_INDEXED)
	    fprintf(stderr, "combiner = %s; should be indexed\n",
		    combiner_to_string(combiner));
    }

    if (errs) {
	if (verbose) fprintf(stderr, "aborting after %d errors\n", errs);
	return errs;
    }

    ints = malloc(nints * sizeof(*ints));
    if (nadds) adds = malloc(nadds * sizeof(*adds));
    types = malloc(ntypes * sizeof(*types));

    /* get contents of outer vector */
    err = MPI_Type_get_contents(outer_indexed,
				nints,
				nadds,
				ntypes,
				ints,
				adds,
				types);

    if (ints[0] != i_count) errs++;
    if (ints[1] != i_blocklengths[0]) errs++;
    if (ints[2] != i_blocklengths[1]) errs++;
    if (ints[3] != i_blocklengths[2]) errs++;
    if (ints[4] != i_displacements[0]) errs++;
    if (ints[5] != i_displacements[1]) errs++;
    if (ints[6] != i_displacements[2]) errs++;

    if (verbose) {
	if (ints[0] != i_count) 
	    fprintf(stderr, "count = %d; should be %d\n", ints[0], i_count);
	if (ints[1] != i_blocklengths[0]) 
	    fprintf(stderr, "blocklength[0] = %d; should be %d\n", ints[1], i_blocklengths[0]);
	if (ints[2] != i_blocklengths[1]) 
	    fprintf(stderr, "blocklength[1] = %d; should be %d\n", ints[2], i_blocklengths[1]);
	if (ints[3] != i_blocklengths[2]) 
	    fprintf(stderr, "blocklength[2] = %d; should be %d\n", ints[3], i_blocklengths[2]);
	if (ints[4] != i_displacements[0]) 
	    fprintf(stderr, "displacement[0] = %d; should be %d\n", ints[4], i_displacements[0]);
	if (ints[5] != i_displacements[1]) 
	    fprintf(stderr, "displacement[1] = %d; should be %d\n", ints[5], i_displacements[1]);
	if (ints[6] != i_displacements[2]) 
	    fprintf(stderr, "displacement[2] = %d; should be %d\n", ints[6], i_displacements[2]);
    }

    if (errs) {
	if (verbose) fprintf(stderr, "aborting after %d errors\n", errs);
	return errs;
    }

    inner_vector_copy = types[0];
    free(ints);
    if (nadds) free(adds);
    free(types);

    /* decode inner vector */
    err = MPI_Type_get_envelope(inner_vector_copy,
				&nints,
				&nadds,
				&ntypes,
				&combiner);
    if (err != MPI_SUCCESS) {
	if (verbose) fprintf(stderr, 
			     "error in MPI call; aborting after %d errors\n",
			     errs+1);
	return errs+1;
    }

    if (nints != 3) errs++;
    if (nadds != 0) errs++;
    if (ntypes != 1) errs++;
    if (combiner != MPI_COMBINER_VECTOR) errs++;

    if (verbose) {
	if (nints != 3) fprintf(stderr, 
				"inner vector nints = %d; should be 3\n",
				nints);
	if (nadds != 0) fprintf(stderr, 
				"inner vector nadds = %d; should be 0\n",
				nadds);
	if (ntypes != 1) fprintf(stderr, 
				 "inner vector ntypes = %d; should be 1\n",
				 ntypes);
	if (combiner != MPI_COMBINER_VECTOR)
	    fprintf(stderr, "inner vector combiner = %s; should be vector\n",
		    combiner_to_string(combiner));
    }
    if (errs) {
	if (verbose) fprintf(stderr, "aborting after %d errors\n", errs);
	return errs;
    }

    ints = malloc(nints * sizeof(*ints));
    if (nadds) adds = malloc(nadds * sizeof(*adds));
    types = malloc(ntypes * sizeof(*types));

    err = MPI_Type_get_contents(inner_vector_copy,
				nints,
				nadds,
				ntypes,
				ints,
				adds,
				types);

    if (ints[0] != 2) errs++;
    if (ints[1] != 1) errs++;
    if (ints[2] != 2) errs++;

    if (verbose) {
	if (ints[0] != 2) fprintf(stderr, 
				  "inner vector count = %d; should be 2\n",
				  ints[0]);
	if (ints[1] != 1) fprintf(stderr,
				  "inner vector blocklength = %d; should be 1\n",
				  ints[1]);
	if (ints[2] != 2) fprintf(stderr, 
				  "inner vector stride = %d; should be 2\n",
				  ints[2]);
    }
    if (errs) {
	if (verbose) fprintf(stderr, "aborting after %d errors\n", errs);
	return errs;
    }

    free(ints);
    if (nadds) free(adds);
    free(types);

    MPI_Type_free( &inner_vector_copy );
    MPI_Type_free( &inner_vector );
    MPI_Type_free( &outer_indexed );

    return 0;
}
Beispiel #19
0
static int test_indexed_with_zeros(char *filename, int testcase)
{
    int i, rank, np, buflen, num, err, nr_errors=0;
    int  nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN];
    MPI_File fh;
    MPI_Status status;
    MPI_Datatype filetype;
    MPI_Datatype types[MAXLEN];
    MPI_Aint addrs[MAXLEN];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    /* set up the number of integers to write in each iteration */
    for (i=0; i<MAXLEN; i++) nelms[i] = 0;
    if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1;
    if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1;

    /* pre-fill the file with integers -999 */
    if (rank == 0) {
        for (i=0; i<MAXLEN; i++) buf[i] = -999;
	err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY,
		MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    /* define a filetype with spurious leading zeros */
    buflen = num = 0;
    for (i=0; i<MAXLEN; i++) {
        buflen       += nelms[i];
        indices[num]  = i;
        addrs[num] = i*sizeof(int);
        blocklen[num] = nelms[i];
        types[num] = MPI_INT;
        num++;
    }
    switch (testcase) {
	case INDEXED:
	    MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype);
	    break;
	case HINDEXED:
	    MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype);
	    break;
	case STRUCT:
	    MPI_Type_create_struct(num, blocklen, addrs, types, &filetype);
	    break;
	default:
	    fprintf(stderr, "unknown testcase!\n");
	    return(-100);

    }

    MPI_Type_commit(&filetype);

    /* initialize write buffer and write to file*/
    for (i=0; i<MAXLEN; i++) buf[i] = 1;
    err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
    err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view");
    err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all");
    MPI_Type_free(&filetype);
    err = MPI_File_close(&fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");

    /* read back and check */
    if (rank == 0) {
        err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
        for (i=0; i<MAXLEN; i++) {
            if (buf[i] < 0) {
		nr_errors++;
                printf("Error: unexpected value for case %d at buf[%d] == %d\n",
			testcase,i,buf[i]);
	    }
	}
    }
    return nr_errors;
}
int compute_solution(const int max_iters,
                     int nintci, int nintcf,
                     int** lcc,
                     double* bp,
                     double* bs, double* bw, double* bl,
                     double* bn, double* be, double* bh,
                     double* cnorm, double* var, double *su, double* cgup, double* residual_ratio,
                     int* local_global_index, int* global_local_index,
                     int number_of_elements,
                     int* number_of_elements_in_partitions,
                     int* partitions_offsets,
                     int* send_count, int** send_list,
                     int* recv_count, int** recv_list,
                     idx_t* epart) {
    int iter = 1;
    int if1 = 0;
    int if2 = 0;
    int nor = 1;
    int nor1 = nor - 1;
    int nc = 0;

    int i;

    // allocate arrays used in gccg
    int nomax = 3;

    /** the reference residual*/
    double resref = 0.0;

    /** array storing residuals */
    double *resvec = (double *) calloc(sizeof(double), (nintcf + 1));


    // initialize the reference residual
    for ( nc = nintci; nc <= nintcf; nc++ ) {
        resvec[nc] = su[nc];
        resref = resref + resvec[nc] * resvec[nc];
    }


    MPI_Allreduce(MPI_IN_PLACE, &resref, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

    resref = sqrt(resref);
    if ( resref < 1.0e-15 ) {
        fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref);
        return 0;
    }

    int my_rank;
    int neighbours_count;
    MPI_Comm_size(MPI_COMM_WORLD, &neighbours_count);    /// get number of processes
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    /* how many cells do we receive */
    int total_recv = 0;
    for ( i = 0; i < neighbours_count; ++i )
        total_recv += number_of_elements_in_partitions[i];

    /** the computation vectors */
    double *direc1 = (double *) calloc(sizeof(double), (nintcf + 1) + total_recv + 1);
    double *direc2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *adxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *adxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor2 = (double *) calloc(sizeof(double), (nintcf + 1));

    MPI_Datatype* send_datatypes = (MPI_Datatype*) calloc(neighbours_count, sizeof(MPI_Datatype));

    for ( i = 0; i <  neighbours_count; ++i ) {
        int j;
        int* b = (int*) calloc(send_count[i], sizeof(int));
        for ( j = 0; j < send_count[i]; ++j )
            b[j] = 1;

        MPI_Type_indexed(send_count[i], b, send_list[i], MPI_DOUBLE, &send_datatypes[i]);
        MPI_Type_commit(&send_datatypes[i]);
        free(b);
    }

    MPI_Datatype* recv_datatypes = (MPI_Datatype*) calloc(neighbours_count, sizeof(MPI_Datatype));

    for ( i = 0; i <  neighbours_count; ++i ) {
        int j;
        int* b = (int*) calloc(recv_count[i], sizeof(int));
        for ( j = 0; j < recv_count[i]; ++j )
            b[j] = 1;

        MPI_Type_indexed(recv_count[i], b, recv_list[i], MPI_DOUBLE, &recv_datatypes[i]);
        MPI_Type_commit(&recv_datatypes[i]);
        free(b);
    }

    MPI_Request* requests = (MPI_Request*) calloc(neighbours_count, sizeof(MPI_Request));
    MPI_Status* statuses = (MPI_Status*) calloc(neighbours_count, sizeof(MPI_Status));

    /// ---------------------------------------------------

    while ( iter < max_iters ) {
        /**********  START COMP PHASE 1 **********/
        // update the old values of direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc];
        }

        for ( i = 0; i <  neighbours_count; ++i ) {
            assert(send_count[i] == recv_count[i]);
            if ( send_count[i] > 0 ) {
                MPI_Isend(direc1, 1, send_datatypes[i], i, 10, MPI_COMM_WORLD, &requests[i]);
            }
        }

        for ( i = 0; i <  neighbours_count; ++i ) {
            if ( recv_count[i] > 0 ) {
                MPI_Recv(&direc1[number_of_elements + partitions_offsets[i]],
                         1, recv_datatypes[i], i, 10, MPI_COMM_WORLD, &statuses[i]);
            }
        }

        for ( i = 0; i <  neighbours_count; ++i ) {
            if ( send_count[i] > 0 )
                MPI_Wait(&requests[i], &statuses[i]);
        }

        // compute new guess (approximation) for direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            direc2[nc] = bp[nc] * direc1[nc]
                         - bs[nc] * direc1[lcc[nc][0]]
                         - bw[nc] * direc1[lcc[nc][3]]
                         - bl[nc] * direc1[lcc[nc][4]]
                         - bn[nc] * direc1[lcc[nc][2]]
                         - be[nc] * direc1[lcc[nc][1]]
                         - bh[nc] * direc1[lcc[nc][5]];
        }
        /********** END COMP PHASE 1 **********/

        /********** START COMP PHASE 2 **********/
        // execute normalization steps
        double oc1, oc2, occ;
        double global_occ;
        if ( nor1 == 1 ) {
            oc1 = 0;
            occ = 0;

            for ( nc = nintci; nc <= nintcf; nc++ ) {
                occ = occ + adxor1[nc] * direc2[nc];
            }

            MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

            oc1 = global_occ / cnorm[1];
            for ( nc = nintci; nc <= nintcf; nc++ ) {
                direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
                direc1[nc] = direc1[nc] - oc1 * dxor1[nc];
            }

            if1++;
        } else {
            if ( nor1 == 2 ) {
                oc1 = 0;
                occ = 0;

                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + adxor1[nc] * direc2[nc];
                }

                MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

                oc1 = global_occ / cnorm[1];
                oc2 = 0;
                occ = 0;
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + adxor2[nc] * direc2[nc];
                }

                MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

                oc2 = global_occ / cnorm[2];
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
                    direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
                }

                if2++;
            }
        }

        // compute the new residual
        cnorm[nor] = 0;
        double omega = 0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
            omega = omega + resvec[nc] * direc2[nc];
        }


        MPI_Allreduce(MPI_IN_PLACE, &cnorm[nor], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        MPI_Allreduce(MPI_IN_PLACE, &omega, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

        omega = omega / cnorm[nor];
        double res_updated = 0.0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            var[nc] = var[nc] + omega * direc1[nc];
            resvec[nc] = resvec[nc] - omega * direc2[nc];
            res_updated = res_updated + resvec[nc] * resvec[nc];
        }


        MPI_Allreduce(MPI_IN_PLACE, &res_updated, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

        res_updated = sqrt(res_updated);

        *residual_ratio = res_updated / resref;

        // exit on no improvements of residual
        if ( *residual_ratio <= 1.0e-10 ) break;

        iter++;

        // prepare additional arrays for the next iteration step
        if ( nor == nomax ) {
            nor = 1;
        } else {
            if ( nor == 1 ) {
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    dxor1[nc] = direc1[nc];
                    adxor1[nc] = direc2[nc];
                }
            } else {
                if ( nor == 2 ) {
                    for ( nc = nintci; nc <= nintcf; nc++ ) {
                        dxor2[nc] = direc1[nc];
                        adxor2[nc] = direc2[nc];
                    }
                }
            }

            nor++;
        }
        nor1 = nor - 1;
        /********** END COMP PHASE 2 **********/

        #ifdef DEBUG
        if (iter > 2)
            break;
        #endif
    }

    free(recv_datatypes);
    free(send_datatypes);
    free(requests);
    free(statuses);

    free(resvec);
    free(direc1);
    free(direc2);
    free(adxor1);
    free(adxor2);
    free(dxor1);
    free(dxor2);

    return iter;
}
Beispiel #21
0
PIDX_return_code PIDX_generic_rst_staged_write(PIDX_generic_rst_id rst_id)
{
  PIDX_variable_group var_grp = rst_id->idx->variable_grp[rst_id->group_index];

#if PIDX_HAVE_MPI
  unsigned long long k1 = 0, i1 = 0, j1 = 0;
  unsigned long long i, j, v, index, count1 = 0, req_count = 0;
  int *send_count, *send_offset;
  unsigned long long send_c = 0, send_o = 0, counter = 0, req_counter = 0, chunk_counter = 0;
  int ret = 0;
  int pipe_length = 0;

  MPI_Request *req;
  MPI_Status *status;
  MPI_Datatype *chunk_data_type;

  //creating ample requests and statuses
  for (i = 0; i < rst_id->reg_patch_grp_count; i++)
    for(j = 0; j < rst_id->reg_patch_grp[i]->count; j++)
      req_count++;

  int end_index = 0;
  int start_index = 0;
  for (start_index = rst_id->first_index; start_index < (rst_id->last_index + 1); start_index = start_index + pipe_length + 1)
  {
    send_c = 0, send_o = 0, counter = 0, req_counter = 0, chunk_counter = 0;
    end_index = ((start_index + pipe_length) >= (rst_id->last_index + 1)) ? (rst_id->last_index) : (start_index + pipe_length);

    req = malloc(sizeof (*req) * req_count * 2 * (end_index - start_index + 1));
    if (!req)
    {
      fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
      return (-1);
    }
    memset(req, 0, sizeof (*req) * req_count * 2 * (end_index - start_index + 1));

    status = malloc(sizeof (*status) * req_count * 2 * (end_index - start_index + 1));
    if (!status)
    {
      fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
      return (-1);
    }
    memset(status, 0, sizeof (*status) * req_count * 2 * (end_index - start_index + 1));

    chunk_data_type =  malloc(sizeof (*chunk_data_type) * req_count  * (end_index - start_index + 1));
    if (!chunk_data_type)
    {
      fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
      return (-1);
    }
    memset(chunk_data_type, 0, sizeof (*chunk_data_type) * req_count  * (end_index - start_index + 1));

    for (i = 0; i < rst_id->reg_patch_grp_count; i++)
    {
      if (rst_id->idx_c->grank == rst_id->reg_patch_grp[i]->max_patch_rank)
      {
        for(j = 0; j < rst_id->reg_patch_grp[i]->count; j++)
        {
          unsigned long long *reg_patch_offset = rst_id->reg_patch_grp[i]->patch[j]->offset;
          unsigned long long *reg_patch_count  = rst_id->reg_patch_grp[i]->patch[j]->size;

          if(rst_id->idx_c->grank == rst_id->reg_patch_grp[i]->source_patch_rank[j])
          {
            count1 = 0;

            unsigned long long *sim_patch_offset = var_grp->variable[start_index]->sim_patch[0]->offset;
            unsigned long long *sim_patch_count = var_grp->variable[start_index]->sim_patch[0]->size;

            for (k1 = reg_patch_offset[2]; k1 < reg_patch_offset[2] + reg_patch_count[2]; k1++)
              for (j1 = reg_patch_offset[1]; j1 < reg_patch_offset[1] + reg_patch_count[1]; j1++)
                for (i1 = reg_patch_offset[0]; i1 < reg_patch_offset[0] + reg_patch_count[0]; i1 = i1 + reg_patch_count[0])
                {
                  index = (sim_patch_count[0] * sim_patch_count[1] * (k1 - sim_patch_offset[2])) +
                          (sim_patch_count[0] * (j1 - sim_patch_offset[1])) +
                          (i1 - sim_patch_offset[0]);

                  for(v = start_index; v <= end_index; v++)
                  {
                    PIDX_variable var = var_grp->variable[v];
                    send_o = index * var->vps;
                    send_c = reg_patch_count[0] * var->vps;

                    if (rst_id->idx_dbg->state_dump != PIDX_NO_IO_AND_META_DATA_DUMP)
                      memcpy(var->rst_patch_group->patch[j]->buffer + (count1 * send_c * var->bpv/8), var->sim_patch[0]->buffer + send_o * var->bpv/8, send_c * var->bpv/8);

                    if (rst_id->idx_dbg->state_dump == PIDX_META_DATA_DUMP_ONLY || rst_id->idx_dbg->state_dump == PIDX_NO_IO_AND_META_DATA_DUMP)
                    {
                      fprintf(rst_id->idx_dbg->local_dump_fp, "[M] [%lld] Dest offset %lld Dest size %lld Source offset %lld Source size %lld\n", v, (unsigned long long)(count1 * send_c * var->bpv/8), (unsigned long long)(send_c * var->bpv/8), (unsigned long long)(send_o * var->bpv/8), (unsigned long long)(send_c * var->bpv/8));
                      fflush(rst_id->idx_dbg->local_dump_fp);
                    }
                  }
                  count1++;
                }
          }
          else
          {
            for(v = start_index; v <= end_index; v++)
            {
              PIDX_variable var = var_grp->variable[v];
              int length = (reg_patch_count[0] * reg_patch_count[1] * reg_patch_count[2]) * var->vps * var->bpv/8;

              if (rst_id->idx_dbg->state_dump != PIDX_NO_IO_AND_META_DATA_DUMP)
              {
                ret = MPI_Irecv(var->rst_patch_group->patch[j]->buffer, length, MPI_BYTE, rst_id->reg_patch_grp[i]->source_patch_rank[j], 123, rst_id->idx_c->global_comm, &req[req_counter]);
                if (ret != MPI_SUCCESS)
                {
                  fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
                  return PIDX_err_mpi;
                }
              }

              if (rst_id->idx_dbg->state_dump == PIDX_META_DATA_DUMP_ONLY || rst_id->idx_dbg->state_dump == PIDX_NO_IO_AND_META_DATA_DUMP)
              {
                fprintf(rst_id->idx_dbg->mpi_dump_fp, "[N REC] [%lld] Dest offset 0 Dest size %d My rank %d Source rank %d\n", v, length, rst_id->idx_c->grank,  rst_id->reg_patch_grp[i]->source_patch_rank[j]);
                fflush(rst_id->idx_dbg->mpi_dump_fp);
              }

              req_counter++;
            }
          }
        }
        counter++;
      }
      else
      {
        for(j = 0; j < rst_id->reg_patch_grp[i]->count; j++)
        {
          if(rst_id->idx_c->grank == rst_id->reg_patch_grp[i]->source_patch_rank[j])
          {
            for(v = start_index; v <= end_index; v++)
            {
              PIDX_variable var = var_grp->variable[v];

              unsigned long long *reg_patch_count = rst_id->reg_patch_grp[i]->patch[j]->size;
              unsigned long long *reg_patch_offset = rst_id->reg_patch_grp[i]->patch[j]->offset;

              send_offset = malloc(sizeof (int) * (reg_patch_count[1] * reg_patch_count[2]));
              if (!send_offset)
              {
                fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
                return PIDX_err_mpi;
              }
              memset(send_offset, 0, sizeof (int) * (reg_patch_count[1] * reg_patch_count[2]));

              send_count = malloc(sizeof (int) * (reg_patch_count[1] * reg_patch_count[2]));
              if (!send_count)
              {
                fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
                return PIDX_err_mpi;
              }
              memset(send_count, 0, sizeof (int) * (reg_patch_count[1] * reg_patch_count[2]));

              count1 = 0;

              unsigned long long *sim_patch_count  = var_grp->variable[start_index]->sim_patch[0]->size;
              unsigned long long *sim_patch_offset = var_grp->variable[start_index]->sim_patch[0]->offset;

              for (k1 = reg_patch_offset[2]; k1 < reg_patch_offset[2] + reg_patch_count[2]; k1++)
                for (j1 = reg_patch_offset[1]; j1 < reg_patch_offset[1] + reg_patch_count[1]; j1++)
                  for (i1 = reg_patch_offset[0]; i1 < reg_patch_offset[0] + reg_patch_count[0]; i1 = i1 + reg_patch_count[0])
                  {
                    index = (sim_patch_count[0] * sim_patch_count[1] * (k1 - sim_patch_offset[2])) +
                            (sim_patch_count[0] * (j1 - sim_patch_offset[1])) +
                            (i1 - sim_patch_offset[0]);
                    send_offset[count1] = index * var->vps * var->bpv/8;
                    send_count[count1] = reg_patch_count[0] * var->vps * var->bpv/8;

                    count1++;
                  }

              MPI_Type_indexed(count1, send_count, send_offset, MPI_BYTE, &chunk_data_type[chunk_counter]);
              MPI_Type_commit(&chunk_data_type[chunk_counter]);

              if (rst_id->idx_dbg->state_dump != PIDX_NO_IO_AND_META_DATA_DUMP)
              {
                ret = MPI_Isend(var->sim_patch[0]->buffer, 1, chunk_data_type[chunk_counter], rst_id->reg_patch_grp[i]->max_patch_rank, 123, rst_id->idx_c->global_comm, &req[req_counter]);
                if (ret != MPI_SUCCESS)
                {
                  fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
                  return PIDX_err_mpi;
                }
              }

              if (rst_id->idx_dbg->state_dump == PIDX_META_DATA_DUMP_ONLY || rst_id->idx_dbg->state_dump == PIDX_NO_IO_AND_META_DATA_DUMP)
              {
                fprintf(rst_id->idx_dbg->mpi_dump_fp, "[N SND] [%lld] Source offset 0 Source size 1 My rank %d Dest rank %d\n", v, rst_id->idx_c->grank,  rst_id->reg_patch_grp[i]->max_patch_rank);
                fflush(rst_id->idx_dbg->mpi_dump_fp);
              }

              req_counter++;
              chunk_counter++;

              free(send_offset);
              free(send_count);
            }
          }
        }
      }
    }

    //fprintf(stderr, "[before] Rank %d\n", rst_id->idx_c->grank);
    ret = MPI_Waitall(req_counter, req, status);
    if (ret != MPI_SUCCESS)
    {
      fprintf(stderr, "Error: File [%s] Line [%d]\n", __FILE__, __LINE__);
      return (-1);
    }
    //fprintf(stderr, "[after] Rank %d\n", rst_id->idx_c->grank);

    for (i = 0; i < chunk_counter; i++)
      MPI_Type_free(&chunk_data_type[i]);
    free(chunk_data_type);
    chunk_data_type = 0;

    free(req);
    req = 0;
    free(status);
    status = 0;
    req_counter = 0;
  }


  return PIDX_success;
#else
  if (rst_id->idx->enable_rst == 1)
    return PIDX_err_rst;
  else
    return PIDX_success;
#endif
}
Beispiel #22
0
FORT_DLL_SPEC void FORT_CALL mpi_type_indexed_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *ierr ){
    *ierr = MPI_Type_indexed( (int)*v1, v2, v3, (MPI_Datatype)(*v4), (MPI_Datatype *)(v5) );
}
Beispiel #23
0
int compute_solution(const int max_iters, int nintci, int nintcf, int nextcf, int** lcc, double* bp,
                     double* bs, double* bw, double* bl, double* bn, double* be, double* bh,
                     double* cnorm, double* var, double *su, double* cgup, double* residual_ratio,
                     int* local_global_index, int* global_local_index, int neighbors_count,
                     int* send_count, int** send_list, int* recv_count, int** recv_list, int num_elems_local, int* epart) {
    int iter = 1;
    int if1 = 0;
    int if2 = 0;
    int nor = 1;
    int nor1 = nor - 1;
    int nc = 0;
    int my_rank, num_procs;
    int i = 0, j=0, k=0;
    double residual_ratio_sum; 
    MPI_Status status;
    int rank;
   
   
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);    // Get current process id
    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);    // get number of processe
     MPI_Datatype send_type[num_procs], recv_type[num_procs];
    // allocate arrays used in gccg
    int nomax = 3;


    /** the reference residual*/
    double resref = 0.0;
    double resref_sum = 0.0;
    /** array storing residuals */
    double *resvec = (double *) calloc(sizeof(double), (num_elems_local));
    //printf("num_elems_local is:%d\n",num_elems_local); 
    // initialize the reference residual
    for ( nc = 0; nc < num_elems_local; nc++ ) {
        resvec[nc] = su[nc];
        resref = resref + resvec[nc] * resvec[nc];
    }
   MPI_Allreduce(&resref, &resref_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    resref_sum = sqrt(resref_sum);
    if ( resref_sum < 1.0e-15 ) {
        fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref);
        return 0;
    }
 
   
  /** the computation vectors */
    double *direc1 = (double *) calloc(sizeof(double), (nextcf + 1));
    double *direc2 = (double *) calloc(sizeof(double), (nextcf + 1));
    double *adxor1 = (double *) calloc(sizeof(double), (num_elems_local));
    double *adxor2 = (double *) calloc(sizeof(double), (num_elems_local));
    double *dxor1 = (double *) calloc(sizeof(double), (num_elems_local));
    double *dxor2 = (double *) calloc(sizeof(double), (num_elems_local));
    double *cnorm_sum = (double *) calloc(sizeof(double), 4);
    int block_lens[nextcf+1];
    for (i = 0; i < 4; i++){
      cnorm_sum[i] = 1;}
   int nc_global = 0;
   int  *block_len_send;
   int *block_len_recv;
// Define MPI Datatype
for (i = 0; i < num_procs; i++)
{
       block_len_send=(int* ) calloc(sizeof(int), (send_count[i]));
       block_len_recv=(int* ) calloc(sizeof(int), (recv_count[i]));
        if (send_count[i] != 0){
       for (k = 0 ; k < send_count[i]; k++)   
          {block_len_send[k] = 1;}
      MPI_Type_indexed(send_count[i], block_len_send, send_list[i], MPI_DOUBLE, &send_type[i]);
      MPI_Type_commit(&send_type[i]);
         free(block_len_send);
}
   if (recv_count[i] != 0){
   for (k=0; k< recv_count[i];k++)   
           {block_len_recv[k] = 1;}
      MPI_Type_indexed(recv_count[i], block_len_recv, recv_list[i], MPI_DOUBLE, &recv_type[i]);
      MPI_Type_commit(&recv_type[i]);
      free(block_len_recv);
}
}


      while ( iter <  max_iters ) {
        /**********  START COMP PHASE 1 **********/
        // update the old values of direc
        for ( nc = 0; nc < num_elems_local; nc++ ) {
	   nc_global= local_global_index[nc];   
           direc1[nc_global] = direc1[nc_global] + resvec[nc] * cgup[nc];
	   //ddirec1[nc_global] = direc1[nc_global];
        }

   //printf("my_rank%d, iter;%d,resudual_ratio:%e\n",my_rank,iter,*residual_ratio);
  // communicate the direc1
        for (i = 0; i < num_procs; i++) {
          if ( send_count[i] != 0) {
            MPI_Sendrecv(direc1,1, send_type[i], i, i, 
			 direc1,1, recv_type[i], i, my_rank, MPI_COMM_WORLD, &status);
	  }
        }

       // compute new guess (approximation) for direc

        for ( nc = 0; nc < num_elems_local; nc++ ) {  
	      nc_global = local_global_index[nc];   
            direc2[nc] = bp[nc] * direc1[nc_global] - bs[nc] * direc1[lcc[nc][0]]
                         - bw[nc] * direc1[lcc[nc][3]] - bl[nc] * direc1[lcc[nc][4]]
                         - bn[nc] * direc1[lcc[nc][2]] - be[nc] * direc1[lcc[nc][1]]
                         - bh[nc] * direc1[lcc[nc][5]];
        }
       
        /********** END COMP PHASE 1 **********/

        /********** START COMP PHASE 2 **********/
        // execute normalization steps
        double oc1, oc2, occ;
        double occ_sum;
            
        if ( nor1 == 1 ) {
            oc1 = 0;
            occ = 0;

            for ( nc = 0; nc < num_elems_local; nc++ ) {
                occ = occ + adxor1[nc] * direc2[nc];
            }
           
           MPI_Allreduce(&occ, &occ_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
           oc1 = occ_sum / cnorm_sum[1];
           
             //printf("oc1_sum is:%e\n",oc1_sum);
            for ( nc = 0; nc < num_elems_local; nc++ ) {
                direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
               nc_global=local_global_index[nc]; 
               direc1[nc_global] = direc1[nc_global] - oc1 * dxor1[nc];
            }

            if1++;
        } else {
            if ( nor1 == 2 ) {
                oc1 = 0;
                occ = 0;

                for ( nc = 0; nc < num_elems_local; nc++ ) {
                    occ = occ + adxor1[nc] * direc2[nc];
                }
                   MPI_Allreduce(&occ, &occ_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                  oc1 = occ_sum / cnorm_sum[1];
        //        MPI_Allreducee(&oc1, &oc1_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                oc2 = 0;
                occ = 0;
                for ( nc = 0; nc <num_elems_local; nc++ ) {
                    occ = occ + adxor2[nc] * direc2[nc];
                }
                MPI_Allreduce(&occ, &occ_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                 oc2 = occ_sum / cnorm_sum[2];
        ///        MPI_Allreduce(&oc2, &oc2_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                for ( nc = 0; nc < num_elems_local; nc++ ) {
                    direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
                    nc_global=local_global_index[nc];      
                    direc1[nc_global] = direc1[nc_global] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
                }

                if2++;
            }
        }
        //MPI_Barrier(MPI_COMM_WORLD);
        // compute the new residual
        cnorm[nor] = 0;
        double omega = 0;
        double omega_sum = 0;
        for ( nc = 0; nc < num_elems_local; nc++ ) {
            cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
            omega = omega + resvec[nc] * direc2[nc];
        }
        MPI_Allreduce(&cnorm[nor], &cnorm_sum[nor], 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        MPI_Allreduce(&omega, &omega_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        omega = omega_sum / cnorm_sum[nor];
  
        double res_updated = 0.0;
	double res_updated_sum;
        for ( nc = 0; nc < num_elems_local; nc++ ) {
            nc_global=local_global_index[nc];

            var[nc] = var[nc] + omega * direc1[nc_global];
            resvec[nc] = resvec[nc] - omega * direc2[nc];
            res_updated = res_updated + resvec[nc] * resvec[nc];
        }

        MPI_Allreduce(&res_updated, &res_updated_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
        res_updated_sum = sqrt(res_updated_sum);
   
        *residual_ratio = res_updated_sum / resref_sum;
 
 
        // exit on no improvements of residual
        if ( *residual_ratio <= 1.0e-10 ) break;

        iter++;

        // prepare additional arrays for the next iteration step
        if ( nor == nomax ) {
            nor = 1;
        } else {
            if ( nor == 1 ) {
                for ( nc = 0; nc <num_elems_local; nc++ ) {
                    nc_global=local_global_index[nc];
                    dxor1[nc] = direc1[nc_global];
                    adxor1[nc] = direc2[nc];
                }
            } else {
                if ( nor == 2 ) {
                    for ( nc = 0; nc < num_elems_local; nc++ ) {
                  nc_global=local_global_index[nc];                  
                        dxor2[nc] = direc1[nc_global];
                        adxor2[nc] = direc2[nc];
                    }
                }
            }

            nor++;
        }
        nor1 = nor - 1;
        /********** END COMP PHASE 2 **********/
    }
           
    //printf("myrank%d,residual_sum is %e\n",my_rank,residual_ratio_sum);
if (my_rank == 0){
    printf("rank is: %d, iter is: %d, residual ration is: %e\n",my_rank,iter,*residual_ratio);
    printf("pent iter is: %d,residual ration is: %e\n",546,9.695255e-11);
}
    free(resvec);
    free(direc1);
    free(direc2);
    free(adxor1);
    free(adxor2);
    free(dxor1);
    free(dxor2);

    return iter;
}
/*
 * allgatherv_neighborexchange
 *
 * Function:     allgatherv using N/2 steps (O(N))
 * Accepts:      Same arguments as MPI_Allgatherv
 * Returns:      MPI_SUCCESS or error code
 *
 * Description:  Neighbor Exchange algorithm for allgather adapted for 
 *               allgatherv.
 *               Described by Chen et.al. in 
 *               "Performance Evaluation of Allgather Algorithms on 
 *                Terascale Linux Cluster with Fast Ethernet",
 *               Proceedings of the Eighth International Conference on 
 *               High-Performance Computing inn Asia-Pacific Region
 *               (HPCASIA'05), 2005
 * 
 *               Rank r exchanges message with one of its neighbors and
 *               forwards the data further in the next step.
 *
 *               No additional memory requirements.
 * 
 * Limitations:  Algorithm works only on even number of processes.
 *               For odd number of processes we switch to ring algorithm.
 * 
 * Example on 6 nodes:
 *  Initial state
 *    #     0      1      2      3      4      5
 *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]
 *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]
 *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]
 *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]
 *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]
 *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]
 *   Step 0:
 *    #     0      1      2      3      4      5
 *         [0]    [0]    [ ]    [ ]    [ ]    [ ]
 *         [1]    [1]    [ ]    [ ]    [ ]    [ ]
 *         [ ]    [ ]    [2]    [2]    [ ]    [ ]
 *         [ ]    [ ]    [3]    [3]    [ ]    [ ]
 *         [ ]    [ ]    [ ]    [ ]    [4]    [4]
 *         [ ]    [ ]    [ ]    [ ]    [5]    [5]
 *   Step 1:
 *    #     0      1      2      3      4      5
 *         [0]    [0]    [0]    [ ]    [ ]    [0]
 *         [1]    [1]    [1]    [ ]    [ ]    [1]
 *         [ ]    [2]    [2]    [2]    [2]    [ ]
 *         [ ]    [3]    [3]    [3]    [3]    [ ]
 *         [4]    [ ]    [ ]    [4]    [4]    [4]
 *         [5]    [ ]    [ ]    [5]    [5]    [5]
 *   Step 2:
 *    #     0      1      2      3      4      5
 *         [0]    [0]    [0]    [0]    [0]    [0]
 *         [1]    [1]    [1]    [1]    [1]    [1]
 *         [2]    [2]    [2]    [2]    [2]    [2]
 *         [3]    [3]    [3]    [3]    [3]    [3]
 *         [4]    [4]    [4]    [4]    [4]    [4]
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
void ADCL_allgatherv_neighborexchange(ADCL_request_t *req)
{
    int line = -1;
    int rank, size;
    int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
    int new_scounts[2], new_sdispls[2], new_rcounts[2], new_rdispls[2];
    int i, even_rank;
    int err = 0;
    MPI_Aint rlb, rext;
    char *tmpsend = NULL, *tmprecv = NULL;
    MPI_Datatype  new_rdtype, new_sdtype;

    ADCL_topology_t *topo = req->r_emethod->em_topo;
    ADCL_vmap_t *r_vmap = req->r_rvecs[0]->v_map;
    void *sbuf = req->r_svecs[0]->v_data;
    void *rbuf = req->r_rvecs[0]->v_data;
    MPI_Comm comm = topo->t_comm;

    MPI_Datatype sdtype;
    int scount;
    MPI_Datatype rdtype = req->r_rdats[0];

    int *rcounts = r_vmap->m_rcnts;
    int *rdispls = r_vmap->m_displ;

    size = topo->t_size;
    rank = topo->t_rank;

    if (size % 2) {
	/* ADCL: I Don't like this!!!! */
        ADCL_allgatherv_ring(req);
	return;
    }

    err = MPI_Type_get_extent (rdtype, &rlb, &rext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

    /* Initialization step:
       - if send buffer is not MPI_IN_PLACE, copy send buffer to 
       the appropriate block of receive buffer
    */
    tmprecv = (char*) rbuf + rdispls[rank] * rext;
    if (MPI_IN_PLACE != sbuf) {
        sdtype  = req->r_sdats[0];
        scount = req->r_scnts[0];
        tmpsend = (char*) sbuf;
        err = MPI_Sendrecv (tmpsend, scount, sdtype, rank, ADCL_TAG_ALLGATHERV,
               tmprecv, rcounts[rank], rdtype, rank, ADCL_TAG_ALLGATHERV,
	       comm,  MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
    } 

    /* Determine neighbors, order in which blocks will arrive, etc. */
    even_rank = !(rank % 2);
    if (even_rank) {
        neighbor[0] = (rank + 1) % size;
        neighbor[1] = (rank - 1 + size) % size;
        recv_data_from[0] = rank;
        recv_data_from[1] = rank;
        offset_at_step[0] = (+2);
        offset_at_step[1] = (-2);
    } else {
        neighbor[0] = (rank - 1 + size) % size;
        neighbor[1] = (rank + 1) % size;
        recv_data_from[0] = neighbor[0];
        recv_data_from[1] = neighbor[0];
        offset_at_step[0] = (-2);
        offset_at_step[1] = (+2);
    }

    /* Communication loop:
       - First step is special: exchange a single block with neighbor[0].
       - Rest of the steps: 
       update recv_data_from according to offset, and 
       exchange two blocks with appropriate neighbor.
       the send location becomes previous receve location.
       Note, we need to create indexed datatype to send and receive these
       blocks properly.
    */
    tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
    tmpsend = (char*)rbuf + rdispls[rank] * rext;
    err = MPI_Sendrecv(tmpsend, rcounts[rank], rdtype, 
		       neighbor[0], ADCL_TAG_ALLGATHERV,
		       tmprecv, rcounts[neighbor[0]], rdtype, 
		       neighbor[0], ADCL_TAG_ALLGATHERV,
		       comm, MPI_STATUS_IGNORE);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
   
    /* Determine initial sending counts and displacements*/
    if (even_rank) {
        send_data_from = rank;
    } else {
        send_data_from = recv_data_from[0];
    }

    for (i = 1; i < (size / 2); i++) {
        const int i_parity = i % 2;
        recv_data_from[i_parity] = 
            (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;

        /* Create new indexed types for sending and receiving.
           We are sending data from ranks (send_data_from) and (send_data_from+1)
           We are receiving data from ranks (recv_data_from[i_parity]) and
           (recv_data_from[i_parity]+1).
        */
        new_scounts[0] = rcounts[send_data_from];
        new_scounts[1] = rcounts[(send_data_from + 1)];
        new_sdispls[0] = rdispls[send_data_from];
        new_sdispls[1] = rdispls[(send_data_from + 1)];
        err = MPI_Type_indexed(2, new_scounts, new_sdispls, rdtype, 
                                      &new_sdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
        err = MPI_Type_commit(&new_sdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

        new_rcounts[0] = rcounts[recv_data_from[i_parity]];
        new_rcounts[1] = rcounts[(recv_data_from[i_parity] + 1)];
        new_rdispls[0] = rdispls[recv_data_from[i_parity]];
        new_rdispls[1] = rdispls[(recv_data_from[i_parity] + 1)];
        err = MPI_Type_indexed(2, new_rcounts, new_rdispls, rdtype, 
                                      &new_rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
        err = MPI_Type_commit(&new_rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
      
        tmprecv = (char*)rbuf;
        tmpsend = (char*)rbuf;
      
        /* Sendreceive */
        err = MPI_Sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
			   ADCL_TAG_ALLGATHERV,
			   tmprecv, 1, new_rdtype, neighbor[i_parity],
			   ADCL_TAG_ALLGATHERV,
			   comm, MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
	
        send_data_from = recv_data_from[i_parity];
      
        MPI_Type_free(&new_sdtype);
        MPI_Type_free(&new_rdtype);
    }

 err_hndl:
    return;
}
void ParticleMovementHandler_ShareAndUpdateParticlesThatHaveMovedOutsideDomains(
		ParticleMovementHandler* self,
		Particle_Index*      globalParticlesArrivingMyDomainCountPtr,
		Particle_Index*      globalParticlesOutsideDomainTotalPtr )
{
	Particle_Index*		globalParticlesOutsideDomainCounts = NULL;		
	Particle_Index		maxGlobalParticlesOutsideDomainCount = 0;		
	Processor_Index		proc_I = 0;
	Particle_Index		lParticle_I = 0;
	Particle_Index		particle_I = 0;

	Journal_DPrintfL( self->debug, 2, "In %s():\n", __func__ );
	Stream_IndentBranch( Swarm_Debug );

	(*globalParticlesArrivingMyDomainCountPtr) = 0;
	(*globalParticlesOutsideDomainTotalPtr) = 0;		

	/* Find the counts of particles	outside domain... */
	ParticleMovementHandler_GetCountOfParticlesOutsideDomainPerProcessor(
		self,
		&globalParticlesOutsideDomainCounts,
		&maxGlobalParticlesOutsideDomainCount,
		globalParticlesOutsideDomainTotalPtr );

	if ( (*globalParticlesOutsideDomainTotalPtr) > 0 ) {
		Particle*		particlesLeavingMyDomain = NULL;
		Particle*		globalParticlesLeavingDomains = NULL;
		SizeT			particlesLeavingDomainSizeBytes = 0;
		Cell_DomainIndex	lCell_I = 0;
		GlobalParticle*	        currParticle = NULL;
		Particle_Index		currProcParticlesOutsideDomainCount = 0;
		Particle_Index		currProcOffset = 0;

		particlesLeavingDomainSizeBytes = self->swarm->particleExtensionMgr->finalSize
			* maxGlobalParticlesOutsideDomainCount;
		particlesLeavingMyDomain = Memory_Alloc_Bytes( particlesLeavingDomainSizeBytes, "Particle",
			"particlesLeavingMyDomain" );

		// TODO: investigate doing this with an MPI_Indexed datatype instead...
		Journal_DPrintfL( self->debug, 2, "Copying particles leaving my domain to temp. transfer array\n" );
		Stream_IndentBranch( Swarm_Debug );

		#if 0
		MPI_Type_indexed( 
			self->particlesOutsideDomainTotalCount,
			blocklens,
			self->particlesOutsideDomainIndices,//change to contiguous indices?
			MPI_BYTE,
			ParticlesLeavingDomainTransferIndexed
			);
		#endif	

		for ( particle_I=0; particle_I < self->particlesOutsideDomainTotalCount; particle_I++ ) {
			Journal_DPrintfL( self->debug, 3, "Copying particle %d to particlesLeavingMyDomain[%d]\n",
				self->particlesOutsideDomainIndices[particle_I], particle_I );
			Swarm_CopyParticleOffSwarm( self->swarm,
				particlesLeavingMyDomain, particle_I,
				self->particlesOutsideDomainIndices[particle_I] );
		}	
		Stream_UnIndentBranch( Swarm_Debug );

		/* allocate the big global receive buffer */
		globalParticlesLeavingDomains = Memory_Alloc_Bytes( particlesLeavingDomainSizeBytes * self->swarm->nProc,
			"Particle", "globalParticlesLeavingDomains" );

		Journal_DPrintfL( self->debug, 2, "Getting the global array of particles leaving domains\n" );
		(void)MPI_Allgather( particlesLeavingMyDomain, particlesLeavingDomainSizeBytes, MPI_BYTE,
			globalParticlesLeavingDomains, particlesLeavingDomainSizeBytes, MPI_BYTE,
			self->swarm->comm );

		Journal_DPrintfL( self->debug, 2, "Checking through the global array of particles leaving domains, "
			"and snaffling those moving into my domain:\n" );
		Stream_IndentBranch( Swarm_Debug );
		for ( proc_I=0; proc_I < self->swarm->nProc; proc_I++ ) {

			if ( proc_I == self->swarm->myRank ) continue;

			currProcOffset = proc_I * maxGlobalParticlesOutsideDomainCount;
			currProcParticlesOutsideDomainCount = globalParticlesOutsideDomainCounts[proc_I];
			
			Journal_DPrintfL( self->debug, 3, "Checking particles that left proc. %d:\n", proc_I );
			for ( particle_I=0; particle_I < currProcParticlesOutsideDomainCount; particle_I++ ) {
				currParticle = (GlobalParticle*)ParticleAt( globalParticlesLeavingDomains,
					(currProcOffset + particle_I),
					self->swarm->particleExtensionMgr->finalSize );
				lCell_I = CellLayout_CellOf( self->swarm->cellLayout, currParticle );
				if ( lCell_I < self->swarm->cellLocalCount ) { 
					#if DEBUG
					Journal_DPrintfL( self->debug, 3, "Found particle at (%.2f,%.2f,%.2f) that's moved "
						"into my local cell %d...\n", currParticle->coord[0],
						currParticle->coord[1], currParticle->coord[2], lCell_I );
					#endif	
					
					/* copy particle to the lowest available slot in my particles array */
					lParticle_I = ParticleMovementHandler_FindFreeSlotAndPrepareForInsertion( (ParticleCommHandler*)self );

					Swarm_CopyParticleOntoSwarm( self->swarm, lParticle_I,
						globalParticlesLeavingDomains, (currProcOffset + particle_I) );
					Swarm_AddParticleToCell( self->swarm, lCell_I, lParticle_I );
					(*globalParticlesArrivingMyDomainCountPtr)++;
				}
				#if DEBUG
				else {
					currParticle = (GlobalParticle*)ParticleAt( globalParticlesLeavingDomains, 
						(currProcOffset + particle_I),
						self->swarm->particleExtensionMgr->finalSize );
					Journal_DPrintfL( self->debug, 3, "Ignoring particle at (%.2f,%.2f,%.2f) since "
						"not in my local cells...\n", currParticle->coord[0],
						currParticle->coord[1], currParticle->coord[2] );
				}
				#endif
			}		
		}	
		Stream_UnIndentBranch( Swarm_Debug );

		Memory_Free( particlesLeavingMyDomain );
		Memory_Free( globalParticlesLeavingDomains );

		/* Defensive check to make sure particles not lost/created accidentally somehow */
		if( self->defensive == True ) {
			ParticleMovementHandler_EnsureParticleCountLeavingDomainsEqualsCountEnteringGlobally( self );
		}
	}	
	Memory_Free( globalParticlesOutsideDomainCounts );
	Stream_UnIndentBranch( Swarm_Debug );
}
Beispiel #26
0
/* 
   Create a range of datatypes with a given count elements.
   This uses a selection of types, rather than an exhaustive collection.
   It allocates both send and receive types so that they can have the same
   type signature (collection of basic types) but different type maps (layouts
   in memory) 
 */
int MTestGetDatatypes( MTestDatatype *sendtype, MTestDatatype *recvtype,
		       int count )
{
    int merr;
    int i;

    sendtype->InitBuf	  = 0;
    sendtype->FreeBuf	  = 0;
    sendtype->CheckBuf	  = 0;
    sendtype->datatype	  = 0;
    sendtype->isBasic	  = 0;
    sendtype->printErrors = 0;
    recvtype->InitBuf	  = 0;
    recvtype->FreeBuf	  = 0;

    recvtype->CheckBuf	  = 0;
    recvtype->datatype	  = 0;
    recvtype->isBasic	  = 0;
    recvtype->printErrors = 0;

    sendtype->buf	  = 0;
    recvtype->buf	  = 0;

    /* Set the defaults for the message lengths */
    sendtype->count	  = count;
    recvtype->count	  = count;
    /* Use datatype_index to choose a datatype to use.  If at the end of the
       list, return 0 */
    switch (datatype_index) {
    case 0:
	sendtype->datatype = MPI_INT;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_INT;
	recvtype->isBasic  = 1;
	break;
    case 1:
	sendtype->datatype = MPI_DOUBLE;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_DOUBLE;
	recvtype->isBasic  = 1;
	break;
    case 2:
	sendtype->datatype = MPI_FLOAT_INT;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_FLOAT_INT;
	recvtype->isBasic  = 1;
	break;
    case 3:
	merr = MPI_Type_dup( MPI_INT, &sendtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( sendtype->datatype,
                                  (char*)"dup of MPI_INT" );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_dup( MPI_INT, &recvtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( recvtype->datatype,
                                  (char*)"dup of MPI_INT" );
	if (merr) MTestPrintError( merr );
	/* dup'ed types are already committed if the original type 
	   was committed (MPI-2, section 8.8) */
	break;
    case 4:
	/* vector send type and contiguous receive type */
	/* These sizes are in bytes (see the VectorInit code) */
 	sendtype->stride   = 3 * sizeof(int);
	sendtype->blksize  = sizeof(int);
	sendtype->nelm     = recvtype->count;

	merr = MPI_Type_vector( recvtype->count, 1, 3, MPI_INT, 
				&sendtype->datatype );
	if (merr) MTestPrintError( merr );
        merr = MPI_Type_commit( &sendtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( sendtype->datatype,
                                  (char*)"int-vector" );
	if (merr) MTestPrintError( merr );
	sendtype->count    = 1;
 	recvtype->datatype = MPI_INT;
	recvtype->isBasic  = 1;
	sendtype->InitBuf  = MTestTypeVectorInit;
	recvtype->InitBuf  = MTestTypeContigInitRecv;
	sendtype->FreeBuf  = MTestTypeVectorFree;
	recvtype->FreeBuf  = MTestTypeContigFree;
	sendtype->CheckBuf = 0;
	recvtype->CheckBuf = MTestTypeContigCheckbuf;
	break;

    case 5:
	/* Indexed send using many small blocks and contig receive */
	sendtype->blksize  = sizeof(int);
	sendtype->nelm     = recvtype->count;
	sendtype->basesize = sizeof(int);
	sendtype->displs   = (int *)malloc( sendtype->nelm * sizeof(int) );
	sendtype->index    = (int *)malloc( sendtype->nelm * sizeof(int) );
	if (!sendtype->displs || !sendtype->index) {
	    MTestError( "Out of memory in type init\n" );
	}
	/* Make the sizes larger (4 ints) to help push the total
	   size to over 256k in some cases, as the MPICH code as of
	   10/1/06 used large internal buffers for packing non-contiguous
	   messages */
	for (i=0; i<sendtype->nelm; i++) {
	    sendtype->index[i]   = 4;
	    sendtype->displs[i]  = 5*i;
	}
	merr = MPI_Type_indexed( sendtype->nelm,
				 sendtype->index, sendtype->displs, 
				 MPI_INT, &sendtype->datatype );
	if (merr) MTestPrintError( merr );
        merr = MPI_Type_commit( &sendtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( sendtype->datatype,
                                  (char*)"int-indexed(4-int)" );
	if (merr) MTestPrintError( merr );
	sendtype->count    = 1;
	sendtype->InitBuf  = MTestTypeIndexedInit;
	sendtype->FreeBuf  = MTestTypeIndexedFree;
	sendtype->CheckBuf = 0;

 	recvtype->datatype = MPI_INT;
	recvtype->isBasic  = 1;
	recvtype->count    = count * 4;
	recvtype->InitBuf  = MTestTypeContigInitRecv;
	recvtype->FreeBuf  = MTestTypeContigFree;
	recvtype->CheckBuf = MTestTypeContigCheckbuf;
	break;

    case 6:
	/* Indexed send using 2 large blocks and contig receive */
	sendtype->blksize  = sizeof(int);
	sendtype->nelm     = 2;
	sendtype->basesize = sizeof(int);
	sendtype->displs   = (int *)malloc( sendtype->nelm * sizeof(int) );
	sendtype->index    = (int *)malloc( sendtype->nelm * sizeof(int) );
	if (!sendtype->displs || !sendtype->index) {
	    MTestError( "Out of memory in type init\n" );
	}
	/* index -> block size */
	sendtype->index[0]   = (recvtype->count + 1) / 2;
	sendtype->displs[0]  = 0;
	sendtype->index[1]   = recvtype->count - sendtype->index[0];
	sendtype->displs[1]  = sendtype->index[0] + 1; 
	/* There is a deliberate gap here */

	merr = MPI_Type_indexed( sendtype->nelm,
				 sendtype->index, sendtype->displs, 
				 MPI_INT, &sendtype->datatype );
	if (merr) MTestPrintError( merr );
        merr = MPI_Type_commit( &sendtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( sendtype->datatype,
                                  (char*)"int-indexed(2 blocks)" );
	if (merr) MTestPrintError( merr );
	sendtype->count    = 1;
	sendtype->InitBuf  = MTestTypeIndexedInit;
	sendtype->FreeBuf  = MTestTypeIndexedFree;
	sendtype->CheckBuf = 0;

 	recvtype->datatype = MPI_INT;
	recvtype->isBasic  = 1;
	recvtype->count    = sendtype->index[0] + sendtype->index[1];
	recvtype->InitBuf  = MTestTypeContigInitRecv;
	recvtype->FreeBuf  = MTestTypeContigFree;
	recvtype->CheckBuf = MTestTypeContigCheckbuf;
	break;

    case 7:
	/* Indexed receive using many small blocks and contig send */
	recvtype->blksize  = sizeof(int);
	recvtype->nelm     = recvtype->count;
	recvtype->basesize = sizeof(int);
	recvtype->displs   = (int *)malloc( recvtype->nelm * sizeof(int) );
	recvtype->index    = (int *)malloc( recvtype->nelm * sizeof(int) );
	if (!recvtype->displs || !recvtype->index) {
	    MTestError( "Out of memory in type recv init\n" );
	}
	/* Make the sizes larger (4 ints) to help push the total
	   size to over 256k in some cases, as the MPICH code as of
	   10/1/06 used large internal buffers for packing non-contiguous
	   messages */
	/* Note that there are gaps in the indexed type */
	for (i=0; i<recvtype->nelm; i++) {
	    recvtype->index[i]   = 4;
	    recvtype->displs[i]  = 5*i;
	}
	merr = MPI_Type_indexed( recvtype->nelm,
				 recvtype->index, recvtype->displs, 
				 MPI_INT, &recvtype->datatype );
	if (merr) MTestPrintError( merr );
        merr = MPI_Type_commit( &recvtype->datatype );
	if (merr) MTestPrintError( merr );
	merr = MPI_Type_set_name( recvtype->datatype,
                                  (char*)"recv-int-indexed(4-int)" );
	if (merr) MTestPrintError( merr );
	recvtype->count    = 1;
	recvtype->InitBuf  = MTestTypeIndexedInitRecv;
	recvtype->FreeBuf  = MTestTypeIndexedFree;
	recvtype->CheckBuf = MTestTypeIndexedCheckbuf;

 	sendtype->datatype = MPI_INT;
	sendtype->isBasic  = 1;
	sendtype->count    = count * 4;
	sendtype->InitBuf  = MTestTypeContigInit;
	sendtype->FreeBuf  = MTestTypeContigFree;
	sendtype->CheckBuf = 0;
	break;

	/* Less commonly used but still simple types */
    case 8:
	sendtype->datatype = MPI_SHORT;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_SHORT;
	recvtype->isBasic  = 1;
	break;
    case 9:
	sendtype->datatype = MPI_LONG;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_LONG;
	recvtype->isBasic  = 1;
	break;
    case 10:
	sendtype->datatype = MPI_CHAR;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_CHAR;
	recvtype->isBasic  = 1;
	break;
    case 11:
	sendtype->datatype = MPI_UINT64_T;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_UINT64_T;
	recvtype->isBasic  = 1;
	break;
    case 12:
	sendtype->datatype = MPI_FLOAT;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_FLOAT;
	recvtype->isBasic  = 1;
	break;

#ifndef USE_STRICT_MPI
	/* MPI_BYTE may only be used with MPI_BYTE in strict MPI */
    case 13:
	sendtype->datatype = MPI_INT;
	sendtype->isBasic  = 1;
	recvtype->datatype = MPI_BYTE;
	recvtype->isBasic  = 1;
	recvtype->count    *= sizeof(int);
	break;
#endif
    default:
	datatype_index = -1;
    }

    if (!sendtype->InitBuf) {
	sendtype->InitBuf  = MTestTypeContigInit;
	recvtype->InitBuf  = MTestTypeContigInitRecv;
	sendtype->FreeBuf  = MTestTypeContigFree;
	recvtype->FreeBuf  = MTestTypeContigFree;
	sendtype->CheckBuf = MTestTypeContigCheckbuf;
	recvtype->CheckBuf = MTestTypeContigCheckbuf;
    }
    datatype_index++;

    if (dbgflag && datatype_index > 0) {
	int typesize;
	fprintf( stderr, "%d: sendtype is %s\n", wrank, MTestGetDatatypeName( sendtype ) );
	merr = MPI_Type_size( sendtype->datatype, &typesize );
	if (merr) MTestPrintError( merr );
	fprintf( stderr, "%d: sendtype size = %d\n", wrank, typesize );
	fprintf( stderr, "%d: recvtype is %s\n", wrank, MTestGetDatatypeName( recvtype ) );
	merr = MPI_Type_size( recvtype->datatype, &typesize );
	if (merr) MTestPrintError( merr );
	fprintf( stderr, "%d: recvtype size = %d\n", wrank, typesize );
	fflush( stderr );
	
    }
    else if (verbose && datatype_index > 0) {
	printf( "Get new datatypes: send = %s, recv = %s\n", 
		MTestGetDatatypeName( sendtype ), 
		MTestGetDatatypeName( recvtype ) );
	fflush( stdout );
    }

    return datatype_index;
}
Beispiel #27
0
int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
                                         struct ompi_communicator_t *comm)
{
    int i, k, line = -1;
    int rank, size;
    int sendto, recvfrom, distance, *displs=NULL, *blen=NULL;
    int maxpacksize, packsize, position;
    char * tmpbuf=NULL, *packbuf=NULL;
    ptrdiff_t lb, sext, rext;
    int err = 0;
    int weallocated = 0;
    MPI_Datatype iddt;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_bruck rank %d", rank));

    err = ompi_ddt_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

    err = ompi_ddt_get_extent (rdtype, &lb, &rext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }


#ifdef blahblah
    /* try and SAVE memory by using the data segment hung off the communicator if possible */
    if (comm->c_coll_selected_data->mcct_num_reqs >= size) { 
        /* we have enought preallocated for displments and lengths */
        displs = (int*) comm->c_coll_basic_data->mcct_reqs;
        blen = (int *) (displs + size);
        weallocated = 0;
    } 
    else { /* allocate the buffers ourself */
#endif
        displs = (int *) malloc(size*sizeof(int));
        if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; }
        blen = (int *) malloc(size*sizeof(int));
        if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; }
        weallocated = 1;
#ifdef blahblah
    }
#endif


    /* Prepare for packing data */
    err = MPI_Pack_size( scount*size, sdtype, comm, &maxpacksize );
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

    /* pack buffer allocation */
    packbuf = (char*) malloc((unsigned) maxpacksize);
    if (packbuf == NULL) { line = __LINE__; err = -1; goto err_hndl; }

    /* tmp buffer allocation for message data */
    tmpbuf = (char *) malloc(scount*size*sext);
    if (tmpbuf == NULL) { line = __LINE__; err = -1; goto err_hndl; }


    /* Step 1 - local rotation - shift up by rank */
    err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) ((size-rank)*scount),
                                          tmpbuf, ((char*)sbuf)+rank*scount*sext);
    if (err<0) {
        line = __LINE__; err = -1; goto err_hndl;
    }

    if (rank != 0) {
        err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) (rank*scount),
                                              tmpbuf+(size-rank)*scount*sext, (char*)sbuf);
        if (err<0) {
            line = __LINE__; err = -1; goto err_hndl;
        }
    }

    /* perform communication step */
    for (distance = 1; distance < size; distance<<=1) {

        /* send data to "sendto" */
        sendto = (rank+distance)%size;
        recvfrom = (rank-distance+size)%size;
        packsize = 0;
        k = 0;

        /* create indexed datatype */
        for (i = 1; i < size; i++) {
            if ((i&distance) == distance) {
                displs[k] = i*scount; blen[k] = scount;
                k++;
            }
        }
        /* Set indexes and displacements */
        err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
        /* Commit the new datatype */
        err = MPI_Type_commit(&iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* have the new distribution ddt, pack and exchange data */
        err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* Sendreceive */
        err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto, 
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         rbuf, packsize, MPI_PACKED, recvfrom, 
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         comm, MPI_STATUS_IGNORE, rank);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

        /* Unpack data from rbuf to tmpbuf */
        position = 0;
        err = MPI_Unpack(rbuf, packsize, &position,
                         tmpbuf, 1, iddt, comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

        /* free ddt */
        err = MPI_Type_free(&iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
    } /* end of for (distance = 1... */

    /* Step 3 - local rotation - */
    for (i = 0; i < size; i++) {

        err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount,
                                              ((char*)rbuf)+(((rank-i+size)%size)*rcount*rext), 
                                              tmpbuf+i*rcount*rext);
        if (err<0) {
            line = __LINE__; err = -1; goto err_hndl;
        }
    }

    /* Step 4 - clean up */
    if (tmpbuf != NULL) free(tmpbuf);
    if (packbuf != NULL) free(packbuf);
    if (weallocated) {
        if (displs != NULL) free(displs);
        if (blen != NULL) free(blen);
    }
    return OMPI_SUCCESS;

 err_hndl:
    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
    if (tmpbuf != NULL) free(tmpbuf);
    if (packbuf != NULL) free(packbuf);
    if (weallocated) {
        if (displs != NULL) free(displs);
        if (blen != NULL) free(blen);
    }
    return err;
}
Beispiel #28
0
int main (int argc, char *argv[])
{
    int ret,errs = 0;
    char *src, *sendrec;
    int bufsize = BUFSIZE;

    int myrank, nprocs;
    int i;
    MPI_Status status;

    int small_non_contig_struct_count = 3;
    int small_non_contig_struct_blocklens[] = {1, 1, 1};
    MPI_Aint small_non_contig_struct_disps[] = {0, 2, 4};
    MPI_Datatype small_non_contig_struct_types[] = {MPI_CHAR, MPI_CHAR,MPI_CHAR};
    MPI_Datatype small_non_contig_struct_type;

    int contig_indexed_count = 3;
    int contig_indexed_blocklens[] = {1, 2, 1};
    int contig_indexed_indices[] = {4, 8, 16};
    int contig_indexed_inner_type = MPI_INT;
    int contig_indexed_type;

    MTest_Init( &argc, &argv );
    ret = MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
    ret = MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    if (nprocs < 2) {
        printf("Need at least 2 procs\n");
        exit(RESFAIL_ERROR);
    }

    ret = MPI_Type_struct(small_non_contig_struct_count,
			  small_non_contig_struct_blocklens, 
			  small_non_contig_struct_disps,
			  small_non_contig_struct_types, 
			  &small_non_contig_struct_type);
    if_error("MPI_Type_struct", "small_non_contig_struct_type", ret);

    ret = MPI_Type_commit(&small_non_contig_struct_type);
    if_error("MPI_Type_commit", "small_non_contig_struct_type", ret);

    ret = MPI_Type_indexed(contig_indexed_count,contig_indexed_blocklens, 
			   contig_indexed_indices,contig_indexed_inner_type, 
			   &contig_indexed_type);
    if_error("MPI_Type_indexed", "contig_indexed_type", ret);

    ret = MPI_Type_commit(&contig_indexed_type);
    if_error("MPI_Type_commit", "contig_indexed_type", ret);


    ret = MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src);

    if (ret != 0) {
        printf("MPI_Alloc_mem src = #%x\n", ret);
        exit(INTERNAL_ERROR);
    }

    ret = MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &sendrec);

    if (ret != 0) {
        printf("MPI_Alloc_mem sendrec buf = #%x\n", ret);
        exit(INTERNAL_ERROR);
    }


    for (i=0; i<bufsize; i++) {
        src[i] = (char) i+1;
    }

    memset(sendrec, 0, bufsize);

    MPI_Barrier(MPI_COMM_WORLD);
    if (myrank == 1) {
        MPI_Send(src, 1, small_non_contig_struct_type, 0, 0xabc,MPI_COMM_WORLD);
    } else {
	MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN );
        ret = MPI_Recv(sendrec, 1, contig_indexed_type, 1, 0xabc,
		       MPI_COMM_WORLD, &status);
	if (ret == MPI_SUCCESS) {
	    printf( "MPI_Recv succeeded with non-matching datatype signature\n" );
	    errs++;
	}
    }

    MPI_Barrier(MPI_COMM_WORLD);

    MPI_Type_free( &small_non_contig_struct_type );
    MPI_Type_free( &contig_indexed_type );

    MPI_Free_mem(src);
    MPI_Free_mem(sendrec);

    MTest_Finalize( errs );
    MPI_Finalize( );

    return 0;
}
Beispiel #29
0
int compute_solution(int nprocs, int myrank, const int max_iters, int nintci, int nintcf, int nextcf, int** lcc, double* bp,
                     double* bs, double* bw, double* bl, double* bn, double* be, double* bh,
                     double* cnorm, double* var, double *su, double* cgup, double* residual_ratio,
                     int* local_global_index, int* global_local_index, int nghb_cnt, 
                     int* nghb_to_rank, int* send_cnt, int** send_lst, int *recv_cnt, int** recv_lst){
  
  // Add SCOREP manual instrumentation
  #ifdef SCOREP
  SCOREP_USER_REGION_DEFINE(handle1);
  SCOREP_USER_REGION_DEFINE(handle2);
  SCOREP_USER_REGION_DEFINE(handle3);
  SCOREP_USER_REGION_DEFINE(handle4);
  SCOREP_USER_REGION_DEFINE(handle5);
  SCOREP_USER_REGION_DEFINE(handle6);
  SCOREP_USER_REGION_DEFINE(handle7);
  SCOREP_USER_REGION_DEFINE(handle8);
  SCOREP_USER_REGION_DEFINE(handle9);
  SCOREP_USER_REGION_DEFINE(handle10);
  SCOREP_USER_REGION_DEFINE(handle_break);
  #endif
  
  #ifdef SCOREP
  SCOREP_USER_REGION_BEGIN( handle1, "handle1 - Initialization of variables and reference residuals.",SCOREP_USER_REGION_TYPE_COMMON );
  #endif
  
    /** parameters used in gccg */
    int iter = 1;
    int if1 = 0;
    int if2 = 0;
    int nor = 1;
    int nor1 = nor - 1;
    int nc = 0;
    int nomax = 3;
    
    /** the reference residual */
    double resref = 0.0;

    /** array storing residuals */
    double *resvec = (double *) calloc(sizeof(double), (nintcf + 1));

    // initialize the reference residual
    for ( nc = nintci; nc <= nintcf; nc++ ) {
        resvec[nc] = su[nc];
        resref = resref + resvec[nc] * resvec[nc];
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle1 );
    SCOREP_USER_REGION_BEGIN( handle2, "handle2 - 1st Allreduce.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif
    
    // A2.3
    double global_resref = 0;
    MPI_Allreduce(&resref, &global_resref, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    resref = global_resref;
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle2 );
    SCOREP_USER_REGION_BEGIN( handle3, "handle3 - Calculation of the residue sum.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif
    

    resref = sqrt(resref);
    if ( resref < 1.0e-15 ) {
        fprintf(stderr, "Residue sum less than 1.e-15 - %lf\n", resref);
        return 0;
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle3 );
    SCOREP_USER_REGION_BEGIN( handle4, "handle4 - Memory allocation.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif

    
    // Counting the number of ghost cells to extend the direc1
    int ghost_cells_recv = 0, ghost_cells_send = 0;
    int proc, i, j;
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      ghost_cells_recv += recv_cnt[proc];
      ghost_cells_send += send_cnt[proc];
    }
    
    
    /** the computation vectors */
    // TODO:
    double *direc1 = (double *) calloc(sizeof(double), ((nextcf + 1) + ghost_cells_recv));
    double *direc2 = (double *) calloc(sizeof(double), (nextcf + 1));
    double *adxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *adxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor1 = (double *) calloc(sizeof(double), (nintcf + 1));
    double *dxor2 = (double *) calloc(sizeof(double), (nintcf + 1));
    
    // Determine displacements for sending
    int **displacements = (int **) malloc(sizeof(double)*nghb_cnt);
    int **blocklenghts = (int **) malloc(sizeof(double)*nghb_cnt);
    
    for(proc = 0; proc < nghb_cnt; proc++) {
      displacements[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
      blocklenghts[proc] = (int*)calloc(send_cnt[proc],sizeof(int));
    }
    
    j = 0;
    for (proc = 0; proc < nghb_cnt; proc++) {
      for (i = 0; i < send_cnt[proc]; i++) {
	displacements[proc][i] = global_local_index[send_lst[proc][i]];
	blocklenghts[proc][i] = 1;
      }
    }
    
    MPI_Request request;
    MPI_Datatype *indextype;
    indextype = (MPI_Datatype *) malloc(sizeof(*indextype)*nghb_cnt);
    
    for (proc = 0; proc < nghb_cnt; proc++) {
      MPI_Type_indexed(send_cnt[proc], blocklenghts[proc], displacements[proc], MPI_DOUBLE, &(indextype[proc]));
      MPI_Type_commit(&(indextype[proc]));
    }
    
    #ifdef SCOREP
    SCOREP_USER_REGION_END( handle4 );
    SCOREP_USER_REGION_BEGIN( handle5, "handle5 - Computation phase1. direc1 update.",SCOREP_USER_REGION_TYPE_COMMON );
    #endif


    while ( iter < max_iters ) {
        /**********  START COMP PHASE 1 **********/
        // update the old values of direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            direc1[nc] = direc1[nc] + resvec[nc] * cgup[nc];
        }

	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle5 );
	  SCOREP_USER_REGION_BEGIN( handle6, "handle6 - Computation phase1. direc1 communication",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif
        
	  // Communication of direc1 - start
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
	    MPI_Isend(direc1, 1, indextype[proc], nghb_to_rank[proc], 0, MPI_COMM_WORLD, &request);
	  }
	  
	  // Reference position in the direc1
	  int ref_pos = nextcf + 1;
	  
	  for (proc = 0; proc < nghb_cnt; proc++) {
	    MPI_Recv(&(direc1[ref_pos]), recv_cnt[proc], MPI_DOUBLE, nghb_to_rank[proc], 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	    ref_pos += recv_cnt[proc];
	  }
	 
	  // Communication of direc1 - stop
	  
	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle6 );
	  SCOREP_USER_REGION_BEGIN( handle7, "handle7 - Computation phase1. direc2 computation",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif
        
        // compute new guess (approximation) for direc
        for ( nc = nintci; nc <= nintcf; nc++ ) {
		
            direc2[nc] = bp[nc] * direc1[nc] - bs[nc] * direc1[global_local_index[lcc[nc][0]]]
                         - be[nc] * direc1[global_local_index[lcc[nc][1]]] - bn[nc] * direc1[global_local_index[lcc[nc][2]]]
                         - bw[nc] * direc1[global_local_index[lcc[nc][3]]] - bl[nc] * direc1[global_local_index[lcc[nc][4]]]
                         - bh[nc] * direc1[global_local_index[lcc[nc][5]]];
			
			 
        }
        
        
        /********** END COMP PHASE 1 **********/
	
	  #ifdef SCOREP
	  SCOREP_USER_REGION_END( handle7 );
	  SCOREP_USER_REGION_BEGIN( handle8, "handle8 - Computation phase2. occ computation",SCOREP_USER_REGION_TYPE_COMMON );
	  #endif

        /********** START COMP PHASE 2 **********/
        // execute normalization steps
        double oc1, oc2, occ;
        if ( nor1 == 1 ) {
            oc1 = 0;
            occ = 0;

            for ( nc = nintci; nc <= nintcf; nc++ ) {
                occ = occ + direc2[nc] * adxor1[nc];
            }
            
            // A2.3
            double global_occ = 0.0;
	    MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	    occ = global_occ;

            oc1 = occ / cnorm[1];
            for ( nc = nintci; nc <= nintcf; nc++ ) {
                direc2[nc] = direc2[nc] - oc1 * adxor1[nc];
                direc1[nc] = direc1[nc] - oc1 * dxor1[nc];
            }

            if1++;
        } else {
            if ( nor1 == 2 ) {
                oc1 = 0;
                occ = 0;

                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor1[nc];
                }
                
                // A2.3
		double global_occ = 0.0;
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;
		

                oc1 = occ / cnorm[1];
                oc2 = 0;
                occ = 0;
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    occ = occ + direc2[nc] * adxor2[nc];
                }
                
                // A2.3
	
		MPI_Allreduce(&occ, &global_occ, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
		occ = global_occ;

                oc2 = occ / cnorm[2];
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    direc1[nc] = direc1[nc] - oc1 * dxor1[nc] - oc2 * dxor2[nc];
                    direc2[nc] = direc2[nc] - oc1 * adxor1[nc] - oc2 * adxor2[nc];
                }

                if2++;
            }
        }
        
       #ifdef SCOREP
      SCOREP_USER_REGION_END( handle8 );
      SCOREP_USER_REGION_BEGIN( handle9, "handle9 - Computation phase2. residual_ratio computation - before break",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

        // compute the new residual
        cnorm[nor] = 0;
        double omega = 0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            cnorm[nor] = cnorm[nor] + direc2[nc] * direc2[nc];
            omega = omega + resvec[nc] * direc2[nc];
        }
        
	// A2.3
	double global_cnorm_nor = 0.0, global_omega = 0.0;
	MPI_Allreduce(&(cnorm[nor]), &global_cnorm_nor, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	MPI_Allreduce(&omega, &global_omega, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	cnorm[nor] = global_cnorm_nor;
	omega = global_omega;

        omega = omega / cnorm[nor];
        double res_updated = 0.0;
        for ( nc = nintci; nc <= nintcf; nc++ ) {
            resvec[nc] = resvec[nc] - omega * direc2[nc];
            res_updated = res_updated + resvec[nc] * resvec[nc];
            var[nc] = var[nc] + omega * direc1[nc];
        }
        
	// A2.3
	double global_res_updated = 0.0;
	MPI_Allreduce(&res_updated, &global_res_updated, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
	res_updated = global_res_updated;

        res_updated = sqrt(res_updated);
        *residual_ratio = res_updated / resref;

       #ifdef SCOREP
      SCOREP_USER_REGION_END( handle9 );
      #endif
        // exit on no improvements of residual
        if ( *residual_ratio <= 1.0e-10 ) break;

      #ifdef SCOREP
      SCOREP_USER_REGION_BEGIN( handle_break, "handle9 - Computation phase2. residual_ratio computation - after break",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

        iter++;

        // prepare additional arrays for the next iteration step
        if ( nor == nomax ) {
            nor = 1;
        } else {
            if ( nor == 1 ) {
                for ( nc = nintci; nc <= nintcf; nc++ ) {
                    dxor1[nc] = direc1[nc];
                    adxor1[nc] = direc2[nc];
                }
            } else {
                if ( nor == 2 ) {
                    for ( nc = nintci; nc <= nintcf; nc++ ) {
                        dxor2[nc] = direc1[nc];
                        adxor2[nc] = direc2[nc];
                    }
                }
            }

            nor++;
        }
        nor1 = nor - 1;
        /********** END COMP PHASE 2 **********/
    }
    
      #ifdef SCOREP
      SCOREP_USER_REGION_END( handle_break );
      SCOREP_USER_REGION_BEGIN( handle10, "handle10 - Memory freeing",SCOREP_USER_REGION_TYPE_COMMON );
      #endif

    for (i = 0; i < nghb_cnt; i++){
      free(displacements[i]);
    }

    free(displacements);
    
    free(indextype);
    
    free(direc1);
    free(direc2);
    free(adxor1);
    free(adxor2);
    free(dxor1);
    free(dxor2);
    free(resvec);

    return iter;
    
      #ifdef SCOREP
      SCOREP_USER_REGION_END( handle10 );
      #endif

    

}
Beispiel #30
0
/* indexed_of_basics_test(void)
 *
 * Simple indexed type.
 *
 * Returns number of errors encountered.
 */
int indexed_of_basics_test(void)
{
    MPI_Datatype parent_type;
    int s_count = 3, s_blocklengths[3] = { 3, 2, 1 };
    int s_displacements[3] = { 10, 20, 30 };

    int nints, nadds, ntypes, combiner, *ints;
    MPI_Aint *adds = NULL;
    MPI_Datatype *types;

    int err, errs = 0;

    /* set up type */
    err = MPI_Type_indexed(s_count,
			   s_blocklengths,
			   s_displacements,
			   MPI_INT,
			   &parent_type);

    /* decode */
    err = MPI_Type_get_envelope(parent_type,
				&nints,
				&nadds,
				&ntypes,
				&combiner);

    if (nints != 7) errs++;
    if (nadds != 0) errs++;
    if (ntypes != 1) errs++;
    if (combiner != MPI_COMBINER_INDEXED) errs++;

    if (verbose) {
        if (nints != 7) fprintf(stderr, "nints = %d; should be 7\n", nints);
	if (nadds != 0) fprintf(stderr, "nadds = %d; should be 0\n", nadds);
	if (ntypes != 1) fprintf(stderr, "ntypes = %d; should be 1\n", ntypes);
	if (combiner != MPI_COMBINER_INDEXED)
	    fprintf(stderr, "combiner = %s; should be indexed\n",
		    combiner_to_string(combiner));
    }

    ints = malloc(nints * sizeof(*ints));
    if (nadds) adds = malloc(nadds * sizeof(*adds));
    types = malloc(ntypes *sizeof(*types));

    err = MPI_Type_get_contents(parent_type,
				nints,
				nadds,
				ntypes,
				ints,
				adds,
				types);

    if (ints[0] != s_count) errs++;
    if (ints[1] != s_blocklengths[0]) errs++;
    if (ints[2] != s_blocklengths[1]) errs++;
    if (ints[3] != s_blocklengths[2]) errs++;
    if (ints[4] != s_displacements[0]) errs++;
    if (ints[5] != s_displacements[1]) errs++;
    if (ints[6] != s_displacements[2]) errs++;
    if (types[0] != MPI_INT) errs++;

    if (verbose) {
	if (ints[0] != s_count) 
	    fprintf(stderr, "count = %d; should be %d\n", ints[0], s_count);
	if (ints[1] != s_blocklengths[0]) 
	    fprintf(stderr, "blocklength[0] = %d; should be %d\n", ints[1], s_blocklengths[0]);
	if (ints[2] != s_blocklengths[1]) 
	    fprintf(stderr, "blocklength[1] = %d; should be %d\n", ints[2], s_blocklengths[1]);
	if (ints[3] != s_blocklengths[2]) 
	    fprintf(stderr, "blocklength[2] = %d; should be %d\n", ints[3], s_blocklengths[2]);
	if (ints[4] != s_displacements[0]) 
	    fprintf(stderr, "displacement[0] = %d; should be %d\n", ints[4], s_displacements[0]);
	if (ints[5] != s_displacements[1]) 
	    fprintf(stderr, "displacement[1] = %d; should be %d\n", ints[5], s_displacements[1]);
	if (ints[6] != s_displacements[2]) 
	    fprintf(stderr, "displacement[2] = %d; should be %d\n", ints[6], s_displacements[2]);
	if (types[0] != MPI_INT) fprintf(stderr, "type[0] does not match\n");
    }

    free(ints);
    if (nadds) free(adds);
    free(types);

    MPI_Type_free( &parent_type );
    return errs;
}