C++ (Cpp) bmodd Examples

Example #1

0

Show file

File: lu.c Project: dmlb2000/nwchem-cml

void lu(int n, int bs, int me)
{
    int i, il, j, jl, k, kl;
    int I, J, K;
    double *A, *B, *C, *D;
    int dimI, dimJ, dimK;
    int strI, strJ, strK;
    unsigned int t1, t2, t3, t4, t11, t22;
    int diagowner;
    double *buf1, *buf2;

    /* temporary memories */
    buf1 = (double *)malloc(block_size*block_size*sizeof(double));
    buf2 = (double *)malloc(block_size*block_size*sizeof(double));

    for (k=0, K=0; k<n; k+=bs, K++) {
        kl = k + bs;
        if (kl > n) {
            kl = n;
            strK = kl - k;
        } else {
            strK = bs;
        }

        /* factor diagonal block */
        diagowner = block_owner(K, K);
        if (diagowner == me) {
            A = a[K+K*nblocks];
            lu0(A, strK, strK);
        }
        MP_BARRIER();

        /* divide column k by diagonal block */
        if(block_owner(K, K) == me)
            D = a[K+K*nblocks];
        else {
            D = buf1;
            get_remote(D, K, K);
        }
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            if (block_owner(I, K) == me) {  /* parcel out blocks */
                il = i + bs;
                if (il > n) {
                    il = n;
                    strI = il - i;
                } else {
                    strI = bs;
                }
                A = a[I+K*nblocks];
                bdiv(A, D, strI, strK, strI, strK);
            }
        }

        /* modify row k by diagonal block */
        for (j=kl, J=K+1; j<n; j+=bs, J++) {
            if (block_owner(K, J) == me) {  /* parcel out blocks */
                jl = j+bs;
                if (jl > n) {
                    jl = n;
                    strJ = jl - j;
                } else {
                    strJ = bs;
                }
                A = a[K+J*nblocks];
                bmodd(D, A, strK, strJ, strK, strK);
            }
        }

        MP_BARRIER();

        /* modify subsequent block columns */
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            il = i+bs;
            if (il > n) {
                il = n;
                strI = il - i;
            } else {
                strI = bs;
            }

            if(block_owner(I,K) == me)
                A = a[I+K*nblocks];
            else {
                A = buf1;
                get_remote(A, I, K);
            }
            for (j=kl, J=K+1; j<n; j+=bs, J++) {
                jl = j + bs;
                if (jl > n) {
                    jl = n;
                    strJ= jl - j;
                } else {
                    strJ = bs;
                }
                if (block_owner(I, J) == me) {  /* parcel out blocks */
                    if(block_owner(K,J) == me)
                        B = a[K+J*nblocks];
                    else {
                        B = buf2;
                        get_remote(B, K, J);
                    }
                    C = a[I+J*nblocks];
                    bmod(A, B, C, strI, strJ, strK, strI, strK, strI);
                }
            }
        }
    }

    free(buf1);
    free(buf2);
}

Example #2

0

Show file

File: luf.c Project: ameena3/Anubhav_projects

void slave() {

  double *b;
  double *buffer;
  double *workbuf;
  int i,j,k;
  int myrow,nextrow,rownum;
  MPI_Status status;
  int ntasks,pid;

  /* get the number of the processes in application. 
 *  *      Can we define ntasks as shared variable?*/
  MPI_Comm_size(MPI_COMM_WORLD,&ntasks); 
  rownum=matrix_size/(block_size*ntasks);
  
  /* allocate the local portion of matrix */
  b=(double*)malloc(rownum*block_size*matrix_size);

  /* allocate buffer space, it should be big enough 
 *  *      to contaion a whole row of block. */
  buffer=(double *)malloc(matrix_size*block_size*sizeof(double));

  /* receive the initial matrix from process 0 */
  for (i=0;i<rownum;i++)
    MPI_Recv(&b[i*block_size*matrix_size],block_size*matrix_size,MPI_DOUBLE,
	     0,i*block_size*ntasks+myrank*block_size,MPI_COMM_WORLD,&status);

  MPI_Barrier(MPI_COMM_WORLD);

  /* do computation work of this process */
  for (i=0;i<matrix_size;i+=block_size) {

    /* compute the id of the process that owns the row i 
 *  *        to row i+block_size-1 */
    pid=(i/block_size)%ntasks;
    myrow=((i/block_size)/ntasks)*block_size;

    if (pid==myrank) { /* My process */
     
      /* factor diagonal */
      lu0(&b[myrow+myrow*matrix_size],block_size,matrix_size);

      /* modify "column" by diagonal */
      for (j=myrow+block_size;j<matrix_size;j+=block_size)
	bdiv(&b[j+myrow*matrix_size],&b[myrow+myrow*matrix_size],
	     block_size,matrix_size);

      /* send this row to other processes, only need to send the column 
 *  * 	 after diagonal? */
      for (j=0;j<ntasks;j++) {
	if (j!=myrank)
	  MPI_Send(&b[myrow*matrix_size],block_size*matrix_size,MPI_DOUBLE,
		   j,i,MPI_COMM_WORLD);
      }
      workbuf=&b[myrow*matrix_size];
    }
    else { /* other process */
      /* receive row i to row i+block_size-1 from process pid */
      MPI_Recv(&buffer,block_size*matrix_size,MPI_DOUBLE,pid,i,
	       MPI_COMM_WORLD,&status);
      workbuf=buffer;
    }
    
    if (myrank>pid)
      nextrow=myrow;
    else
      nextrow=myrow+block_size;

    /* modify the "row" using diagonal */
    for (j=nextrow;j<matrix_size;j+=block_size) 
      bmodd(&b[i+j*matrix_size],&workbuf[i],
	    block_size,matrix_size);
     
    /* modify the internal rows and columns */
    for (j=nextrow;j<matrix_size;j+=block_size)
      for (k=i+block_size;k<matrix_size;k+=block_size) 
	bmod(&b[k+j*matrix_size],&workbuf[k],&b[i+j*matrix_size],
	     block_size,matrix_size);
  }

  MPI_Barrier(MPI_COMM_WORLD);

  /* Send b to process 0. */
  for (i=0;i<rownum;i++)
    MPI_Send(&b[i*block_size*matrix_size],block_size*matrix_size,MPI_DOUBLE,
	     0,i*block_size*ntasks+myrank*block_size,MPI_COMM_WORLD);
}

Example #3

0

Show file

File: lu_nb_put.c Project: dmlb2000/nwchem-cml

void lu(int n, int bs, int me)
{
  int i, il, j, jl, k, kl;
  int I, J, K;
  double *A, *B, *C, *D;
  int dimI, dimJ, dimK;
  int strI, strJ, strK;
  unsigned int t1, t2, t3, t4, t11, t22;
  int diagowner, destp, hc, m;
  double *dbuf;
  armci_hdl_t handle[2*MAXPROC];
  int saved[MAXPROC];  
  
  dbuf = (double *)ARMCI_Malloc_local((armci_size_t) block_size*block_size*sizeof(double));

  for (k=0, K=0; k<n; k+=bs, K++) {
    kl = k + bs; 
    if (kl > n) {
      kl = n;
      strK = kl - k;
    } else {
      strK = bs;
    }
    
    /* factor diagonal block */
    diagowner = block_owner(K, K);
    if (diagowner == me) {
      A = a[K+K*nblocks]; 
      lu0(A, strK, strK); /* impl algo on this diag block */
    }
    MP_BARRIER(); 
    
    /* divide column k by diagonal block */
    if(block_owner(K, K) == me)
      D = a[K+K*nblocks];
    else {
      D = dbuf;
      get_remote(D, K, K);
    }
    
    for (i=kl, I=K+1; i<n; i+=bs, I++) {
      if (block_owner(I, K) == me) {  /* parcel out blocks */
	il = i + bs; 
	if (il > n) {
	  il = n;
	  strI = il - i;
	} else {
	  strI = bs;
	}
	A = a[I+K*nblocks]; 
	bdiv(A, D, strI, strK, strI, strK);
	
	/* Pre-put this block to the block-owners of all blocks on the I-th row with a non-blocking put*/
	memset (saved, 0, sizeof(saved));
	for (m = K+1; m < nblocks; m++) {
	    destp = block_owner (I, m);
	    if (destp != me && !saved[destp]) {
	      ARMCI_NbPut(A, bufc[destp*nblocks + I], strI*strK*sizeof(double), destp, NULL);
	      saved[destp] = 1;
	    }
	}
      }
    } /* end of for (i=k1, I=K+1...) */
    
    /* modify row k by diagonal block */
    for (j=kl, J=K+1; j<n; j+=bs, J++) {
      if (block_owner(K, J) == me) {  /* parcel out blocks */
	jl = j+bs; 
	if (jl > n) {
	  jl = n;
	  strJ = jl - j;
	} else {
	  strJ = bs;
	}
	A = a[K+J*nblocks];
	bmodd(D, A, strK, strJ, strK, strK);
     
	/* Pre-put this block to the block-owners of all blocks on the J-th column with a non-blocking put*/
        memset (saved, 0, sizeof(saved));
        for (m = K+1; m < nblocks; m++) {
	  destp = block_owner (m, J);
	  if (destp != me  && !saved[destp]) {
	    ARMCI_NbPut(A, bufr[destp*nblocks + J], strK*strJ*sizeof(double), destp, NULL);
	    saved[destp] = 1;
	  }
	}
      }      
    }
        
    ARMCI_WaitAll();
    ARMCI_AllFence();
    MP_BARRIER();
    /* modify subsequent block columns */
    
    for (i=kl, I=K+1; i<n; i+=bs, I++) {
      il = i+bs; 
      if (il > n) {
	il = n;
	strI = il - i;
      } else {
	strI = bs;
      }

      for (j=kl, J=K+1; j<n; j+=bs, J++) {
	jl = j + bs; 
	if (jl > n) {
	  jl = n;
	  strJ= jl - j;
	} else {
	  strJ = bs;
	  }
	if (block_owner(I, J) == me) {  /* parcel out blocks */
	  if(block_owner(I,K) == me)
	    A = a[I+K*nblocks];
	  else {
	    A = bufc[me*nblocks+I];
          }
	  
	  if(block_owner(K,J) == me)
	    B = a[K+J*nblocks];
	  else
	    B = bufr[me*nblocks + J];
	    
	  C = a[I+J*nblocks];
	  bmod(A, B, C, strI, strJ, strK, strI, strK, strI);
	}
      }
    }
  }
  ARMCI_Free_local(dbuf);
}

Example #4

0

Show file

File: luf.c Project: ameena3/Anubhav_projects

void master() {
  
  double *a;
  double *rhs;
  int ntasks,pid;
  double *buffer;
  double *workbuf;
  int i,j,k;
  MPI_Status status;
  time_t t0,t1;
  int  ct;
 
  /* get the number of the processes in application. */
  MPI_Comm_size(MPI_COMM_WORLD,&ntasks); 

  /* allocate matrix, rhs vector */
  a = (double *) malloc( matrix_size*matrix_size*sizeof(double) ) ;
  rhs = (double *) malloc( matrix_size*sizeof(double) ) ;

  /* initialize the matrix */
  /* Do we need to allocate a matrix a or ony a row 
 *  *      and initilize each row and then send to corresponding process? */
  initializeMatrix( matrix_size, a, rhs );

  /* Send each row to the corresponding process. */
  for (i=block_size;i<matrix_size;i+=block_size) {

    /* send i row to i+block_size-1 row to process 
 *  *        (i mod block_size)%ntasks. */
    pid=(i/block_size)%ntasks;

    if (pid!=0)
      MPI_Send(&a[i*matrix_size],matrix_size*block_size,MPI_DOUBLE,
	       pid,i,MPI_COMM_WORLD);
  }

  MPI_Barrier(MPI_COMM_WORLD);

  /* allocate buffer space, it should be big enough 
 *  *      to contaion a whole row of block. */
  buffer=(double *)malloc(matrix_size*block_size*sizeof(double));
  
  time(&t0);

  /* Do the computation work of process 0 */
  for (i=0;i<matrix_size;i+=block_size) {

    /* compute the id of the processor that own the row i 
 *  *        to row i+block_size-1 */
    pid=(i/block_size)%ntasks;

    if (pid==0) { /* matser process. Me! */
     
      /* factor diagonal */
      lu0(&a[i+i*matrix_size],block_size,matrix_size);

      /* modify "column" by diagonal */
      for (j=i+block_size;j<matrix_size;j+=block_size)
	bdiv(&a[j+i*matrix_size],&a[i+i*matrix_size],
	     block_size,matrix_size);

      /* send this row to other processes, only need to send 
 *  * 	 the column after diagonal? */
      for (j=1;j<ntasks;j++) {
	MPI_Send(&a[i*matrix_size],block_size*matrix_size,MPI_DOUBLE,
		 j,i,MPI_COMM_WORLD);
      }
      workbuf=&a[i*matrix_size];
    }
    else { /* other process */
      /* receive row i to row i+block_size-1 from process pid */
      MPI_Recv(&buffer,block_size*matrix_size,MPI_DOUBLE,pid,i,
	       MPI_COMM_WORLD,&status);
      workbuf=buffer;
    }

    /* modify the "row" using diagonal */
    for (j=i+(ntasks-pid)*block_size;j<matrix_size;j+=block_size*ntasks) 
      bmodd(&a[i+j*matrix_size],&workbuf[i],
	    block_size,matrix_size);

    /* modify the internal rows and columns */
    for (j=i+(ntasks-pid)*block_size;j<matrix_size;j+=block_size*ntasks)
      for (k=i+block_size;k<matrix_size;k+=block_size) 
	bmod(&a[k+j*matrix_size],&workbuf[k],&a[i+j*matrix_size],
	     block_size,matrix_size);

  }

  MPI_Barrier(MPI_COMM_WORLD);

  time(&t1);
  ct=t1-t0;
  printf("LU decomposition took %d millisecs\n", ct);

  /* Receive the modified matrix from all other processes. */
  for (i=0;i<matrix_size;i+=block_size) {

    /* compute the id of the processor that own the row i 
 *  *        to row i+block_size-1 */
    pid=(i/block_size)%ntasks;

    if (pid!=0)
      MPI_Recv(&a[i*matrix_size],block_size*matrix_size,MPI_DOUBLE,pid,i,MPI_COMM_WORLD,&status);
  }
  
  /* test the resulting decoposition */
  checkResult(matrix_size,a,rhs);
  
}

Example #5

0

Show file

File: lu-block-th.c Project: bcernohous/ga

void *lu(void *lu_arg)
{
    int n, bs, th_idx;
    int i, il, j, jl, k, kl;
    int I, J, K;
    double *A, *B, *C, *D;
    int dimI, dimJ, dimK;
    int strI, strJ, strK;
    unsigned int t1, t2, t3, t4, t11, t22;
    int diagowner;
    double *buf1, *buf2;

    n = ((int *)lu_arg)[0];
    bs = ((int *)lu_arg)[1];
    th_idx = ((int *)lu_arg)[2];

#ifdef DEBUG
    printf("DBG: starting thread %d(idx=%d) on node %d\n", me_th[th_idx], th_idx, me); fflush(stdout);
#endif

    /* temporary memories */
    buf1 = (double *)malloc(block_size*block_size*sizeof(double));
    buf2 = (double *)malloc(block_size*block_size*sizeof(double));

    for (k=0, K=0; k<n; k+=bs, K++) {
        kl = k + bs; 
        if (kl > n) {
            kl = n;
            strK = kl - k;
        } else {
            strK = bs;
        }

        /* factor diagonal block */
        diagowner = block_owner(K, K);
        if (diagowner == me_th[th_idx]) {
            A = a[K+K*nblocks];
            print_block_dbg(A, "th=%d, idx=%d: before lu0 a[%d]:\n", me_th[th_idx], th_idx, K+K*nblocks);
            lu0(A, strK, strK);
        }
        MT_BARRIER();

        /* divide column k by diagonal block */
        if(block_owner(K, K) == me_th[th_idx])
            D = a[K+K*nblocks];
        else {
            D = buf1;
            get_remote(D, K, K);
        }
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            if (block_owner(I, K) == me_th[th_idx]) {  /* parcel out blocks */
                il = i + bs; 
                if (il > n) {
                    il = n;
                    strI = il - i;
                } else {
                    strI = bs;
                }
                A = a[I+K*nblocks]; 
                bdiv(A, D, strI, strK, strI, strK);
            }
        }

        /* modify row k by diagonal block */
        for (j=kl, J=K+1; j<n; j+=bs, J++) {
            if (block_owner(K, J) == me_th[th_idx]) {  /* parcel out blocks */
                jl = j+bs; 
                if (jl > n) {
                    jl = n;
                    strJ = jl - j;
                } else {
                    strJ = bs;
                }
                A = a[K+J*nblocks];
                bmodd(D, A, strK, strJ, strK, strK);
            }
        }
        MT_BARRIER();

        /* modify subsequent block columns */
        for (i=kl, I=K+1; i<n; i+=bs, I++) {
            il = i+bs; 
            if (il > n) {
                il = n;
                strI = il - i;
            } else {
                strI = bs;
            }

            if(block_owner(I,K) == me_th[th_idx])
                A = a[I+K*nblocks];
            else {
                A = buf1;
                get_remote(A, I, K);
            }
            for (j=kl, J=K+1; j<n; j+=bs, J++) {
                jl = j + bs; 
                if (jl > n) {
                    jl = n;
                    strJ= jl - j;
                } else {
                    strJ = bs;
                }
                if (block_owner(I, J) == me_th[th_idx]) {  /* parcel out blocks */
                    if(block_owner(K,J) == me_th[th_idx])
                        B = a[K+J*nblocks];
                    else {
                        B = buf2;
                        get_remote(B, K, J);
                    }
                    C = a[I+J*nblocks];
                    bmod(A, B, C, strI, strJ, strK, strI, strK, strI);
                }
            }
        }
    }

    free(buf1);
    free(buf2);

    return lu_arg;
}