void DDI_ARR_select_local(DDI_Patch *dAPatch, DDI_ARR_Element *element) { DDA_Index *Index = gv(dda_index); int dA = dAPatch->handle; double *dALocal; int op = dAPatch->oper; int dALdaLocal, dAiLocal, dAjLocal, dAmLocal, dAnLocal; /* A segment dimensions */ dALdaLocal = Index[dA].ihi - Index[dA].ilo + 1; dAiLocal = dAPatch->ilo - Index[dA].ilo; dAjLocal = dAPatch->jlo - Index[dA].jlo; dAmLocal = dAPatch->ihi - dAPatch->ilo + 1; dAnLocal = dAPatch->jhi - dAPatch->jlo + 1; # if defined USE_SYSV if(USING_DATA_SERVERS()) DDI_Fence_check(dA); # endif DDI_Acquire(Index, dA, DDI_READ_ACCESS, (void**)&dALocal); switch(op) { case DDI_ARR_MIN: mmin(dALocal, dALdaLocal, dAiLocal, dAjLocal, dAmLocal, dAnLocal, &(element->alpha), element->index); break; case DDI_ARR_MAX: mmax(dALocal, dALdaLocal, dAiLocal, dAjLocal, dAmLocal, dAnLocal, &(element->alpha), element->index); break; } /* switch */ /* adjust element indices to global values */ element->index[0] += Index[dA].ilo; element->index[1] += Index[dA].jlo; DDI_Release(Index, dA, DDI_READ_ACCESS); }
/* -------------------------------------------------------------- *\ DDI_GetAcc_local(patch,buff) ========================= [IN] patch - structure containing ilo, ihi, jlo, jhi, etc. [IN] buff - Data segment to be operated on. GetAccumulates the subpatch specified by patch and stored in buff into the share-memory segment(s) of the local node. \* -------------------------------------------------------------- */ void DDI_GetAcc_local(const DDI_Patch* patch,void *buff) { /* --------------- *\ Local Variables \* --------------- */ DDA_Index *Index = gv(dda_index); int i,j,nrows,ncols,start_row,start_col; size_t dda_offset,size; double tmp,*dda,*dloc = (double *) buff; int handle = patch->handle; int ilo = patch->ilo; int ihi = patch->ihi; int jlo = patch->jlo; int jhi = patch->jhi; int trows = Index[handle].nrows; # if FULL_SMP int icpu,smpme,smpnp; DDI_SMP_NProc(&smpnp,&smpme); # endif MAX_DEBUG((stdout,"%s: Entering DDI_GetAcc_local.\n",DDI_Id())) /* ------------------------------------------------------------------ *\ For FULL SMP implementations, loop on the number of SMP processors \* ------------------------------------------------------------------ */ # if FULL_SMP for(icpu=0; icpu<smpnp; icpu++) { Index = gv(smp_index)[icpu]; jlo = Index[handle].jlo; jhi = Index[handle].jhi; if(jlo > patch->jhi || jhi < patch->jlo) continue; if(patch->jlo > jlo) jlo = patch->jlo; if(jhi > patch->jhi) jhi = patch->jhi; # endif nrows = ihi - ilo + 1; ncols = jhi - jlo + 1; size = nrows*ncols; start_row = ilo - Index[handle].ilo; start_col = jlo - Index[handle].jlo; /* ---------------------------------------------------------- *\ If the patch and the DD array have the same row dimensions \* ---------------------------------------------------------- */ if(nrows == trows) { dda_offset = start_col*nrows; DDI_Acquire(Index,handle,DDI_WRITE_ACCESS,(void **) &dda); dda += dda_offset; for(i=0; i<size; i++) { tmp = dda[i]; dda[i] += dloc[i]; dloc[i] = tmp; } DDI_Release(Index,handle,DDI_WRITE_ACCESS); dloc += size; } else { /* ----------------------------------------------- *\ Otherwise, pack the local patch into the buffer \* ----------------------------------------------- */ DDI_Acquire(Index,handle,DDI_WRITE_ACCESS,(void **) &dda); dda_offset = start_col*trows; dda += dda_offset; dda += start_row; size = nrows*sizeof(double); for(i=0; i<ncols; i++) { for(j=0; j<nrows; j++) { tmp = dda[j]; dda[j] += dloc[j]; dloc[j] = tmp; } dloc += nrows; dda += trows; } DDI_Release(Index,handle,DDI_WRITE_ACCESS); } # if FULL_SMP } /* end for-loop on local cpus */ # endif /* --------------------- *\ Shared-memory counter \* --------------------- */ # if defined DDI_COUNTERS gv(acc_profile).ncalls_shmem++; gv(acc_profile).nbytes_shmem += patch->size; # endif MAX_DEBUG((stdout,"%s: Leaving DDI_GetAcc_local.\n",DDI_Id())) }