Esempio n. 1
0
void DDI_ARMCI_Acc(DDI_Patch *patch, void *scale, void *buf) {
    int handle = patch->handle;
    int i,nops = 0;
    int nsubp,ranks[MAX_NODES];
    DDI_Patch subp[MAX_NODES];
    char *working_buffer = (char *) buf;
    
    DDI_Subpatch(handle,patch,&nsubp,ranks,subp);
    
    for(i=0; i<nsubp; i++) {
#if defined DDI_ARMCI_SMP
	nops += DDI_ARMCI_Acc_domain_SMP(&subp[i],scale,working_buffer,ranks[i]);
#else
	nops += DDI_ARMCI_Acc_proc(&subp[i],scale,working_buffer,ranks[i]);
#endif
	working_buffer += subp[i].size;
    }
    
#if defined DDI_ARMCI_IMPLICIT_NBACC && defined DDI_ARMCI_IMPLICIT_WAIT
    // wait for implicit non-blocking operations
    ARMCI_WaitAll();
#endif

    return;
}
Esempio n. 2
0
void lu(int n, int bs, int me)
{
  int i, il, j, jl, k, kl;
  int I, J, K;
  double *A, *B, *C, *D;
  int dimI, dimJ, dimK;
  int strI, strJ, strK;
  unsigned int t1, t2, t3, t4, t11, t22;
  int diagowner, destp, hc, m;
  double *dbuf;
  armci_hdl_t handle[2*MAXPROC];
  int saved[MAXPROC];  
  
  dbuf = (double *)ARMCI_Malloc_local((armci_size_t) block_size*block_size*sizeof(double));

  for (k=0, K=0; k<n; k+=bs, K++) {
    kl = k + bs; 
    if (kl > n) {
      kl = n;
      strK = kl - k;
    } else {
      strK = bs;
    }
    
    /* factor diagonal block */
    diagowner = block_owner(K, K);
    if (diagowner == me) {
      A = a[K+K*nblocks]; 
      lu0(A, strK, strK); /* impl algo on this diag block */
    }
    MP_BARRIER(); 
    
    /* divide column k by diagonal block */
    if(block_owner(K, K) == me)
      D = a[K+K*nblocks];
    else {
      D = dbuf;
      get_remote(D, K, K);
    }
    
    for (i=kl, I=K+1; i<n; i+=bs, I++) {
      if (block_owner(I, K) == me) {  /* parcel out blocks */
	il = i + bs; 
	if (il > n) {
	  il = n;
	  strI = il - i;
	} else {
	  strI = bs;
	}
	A = a[I+K*nblocks]; 
	bdiv(A, D, strI, strK, strI, strK);
	
	/* Pre-put this block to the block-owners of all blocks on the I-th row with a non-blocking put*/
	memset (saved, 0, sizeof(saved));
	for (m = K+1; m < nblocks; m++) {
	    destp = block_owner (I, m);
	    if (destp != me && !saved[destp]) {
	      ARMCI_NbPut(A, bufc[destp*nblocks + I], strI*strK*sizeof(double), destp, NULL);
	      saved[destp] = 1;
	    }
	}
      }
    } /* end of for (i=k1, I=K+1...) */
    
    /* modify row k by diagonal block */
    for (j=kl, J=K+1; j<n; j+=bs, J++) {
      if (block_owner(K, J) == me) {  /* parcel out blocks */
	jl = j+bs; 
	if (jl > n) {
	  jl = n;
	  strJ = jl - j;
	} else {
	  strJ = bs;
	}
	A = a[K+J*nblocks];
	bmodd(D, A, strK, strJ, strK, strK);
     
	/* Pre-put this block to the block-owners of all blocks on the J-th column with a non-blocking put*/
        memset (saved, 0, sizeof(saved));
        for (m = K+1; m < nblocks; m++) {
	  destp = block_owner (m, J);
	  if (destp != me  && !saved[destp]) {
	    ARMCI_NbPut(A, bufr[destp*nblocks + J], strK*strJ*sizeof(double), destp, NULL);
	    saved[destp] = 1;
	  }
	}
      }      
    }
        
    ARMCI_WaitAll();
    ARMCI_AllFence();
    MP_BARRIER();
    /* modify subsequent block columns */
    
    for (i=kl, I=K+1; i<n; i+=bs, I++) {
      il = i+bs; 
      if (il > n) {
	il = n;
	strI = il - i;
      } else {
	strI = bs;
      }

      for (j=kl, J=K+1; j<n; j+=bs, J++) {
	jl = j + bs; 
	if (jl > n) {
	  jl = n;
	  strJ= jl - j;
	} else {
	  strJ = bs;
	  }
	if (block_owner(I, J) == me) {  /* parcel out blocks */
	  if(block_owner(I,K) == me)
	    A = a[I+K*nblocks];
	  else {
	    A = bufc[me*nblocks+I];
          }
	  
	  if(block_owner(K,J) == me)
	    B = a[K+J*nblocks];
	  else
	    B = bufr[me*nblocks + J];
	    
	  C = a[I+J*nblocks];
	  bmod(A, B, C, strI, strJ, strK, strI, strK, strI);
	}
      }
    }
  }
  ARMCI_Free_local(dbuf);
}
Esempio n. 3
0
/** @see ddi_armci.h */
void DDI_ARMCI_Barrier(MPI_Comm comm) {
  ARMCI_WaitAll();
  MPI_Barrier(comm);
}