int minloc(int a[], int low, int high) { int i; int x; int k; int r; k = low; x = a[low]; minloc(a[4], a[3]); i = low + 1; while (i < test ) { if (a[i] < x) { x = a[i]; k = i; } i = i + 1; } if(t==4) i=3; else i=4; return k; }
void suddividi(intervallo *curr, int n, int *nf, int *nint, double *xdir, double *fdir, double funct(int, double*)){ int i, j; int numtrue, ind1, ind2; numtrue = 0; for(i=0;i<n;i++){ //printf("id: %d maxdim %f dimen[%d] %f\n",curr->id,curr->maxdim,i,curr->dimen[i]); if(curr->maxdim == curr->dimen[i]){ for(j=0;j<n;j++) mod_suddividi.ysud[j] = curr->cent[j]; mod_suddividi.ysud[i] = curr->cent[i] + 1.0*curr->dimen[i]/3.0; unscalevars(n,mod_suddividi.ysud,mod_suddividi.xsud); mod_suddividi.vetf1[i] = funct(n,mod_suddividi.xsud); mod_suddividi.ysud[i] = curr->cent[i] - 1.0*curr->dimen[i]/3.0; unscalevars(n,mod_suddividi.ysud,mod_suddividi.xsud); mod_suddividi.vetf2[i] = funct(n,mod_suddividi.xsud); mod_suddividi.mask[i] = 1; numtrue = numtrue + 1; *nf = *nf+2; } else{ mod_suddividi.vetf1[i] = 1.e+30; mod_suddividi.vetf2[i] = 1.e+30; mod_suddividi.mask[i] = 0; } } //printf("numtrue = %d\n",numtrue); for(i=1;i<=numtrue;i++){ ind1 = minloc(n,mod_suddividi.vetf1,mod_suddividi.mask); ind2 = minloc(n,mod_suddividi.vetf2,mod_suddividi.mask); if( mod_suddividi.vetf1[ind1] < mod_suddividi.vetf2[ind2] ){ mod_suddividi.mask[ind1] = 0; triplica(curr,n,ind1,mod_suddividi.vetf1[ind1], mod_suddividi.vetf2[ind1],nint,xdir,fdir); } else{ mod_suddividi.mask[ind2] = 0; triplica(curr,n,ind2,mod_suddividi.vetf1[ind2], mod_suddividi.vetf2[ind2],nint,xdir,fdir); } *nint = *nint + 2; } }
void sort ( int a[], int low, int high ) { int i; int k; i = low; while (i < high-1) { int t; k = minloc (a,i,high); t =a[k]; a[k] = a[i]; a[i] = t; i = i + 1; } }
/** **************************************************************************** * * @ingroup InPlaceTransformation * * plasma_dgetmi2 Implementation of inplace transposition * based on the GKK algorithm by Gustavson, Karlsson, Kagstrom. * This algorithm shift some cycles to transpose the matrix. * ******************************************************************************* * * @param[in] m * Number of rows of matrix A * * @param[in] n * Number of columns of matrix A * * @param[in,out] A * Matrix of size L*m*n * * @param[in] nprob * Number of parallel and independant problems * * @param[in] me * Number of rows of the problem * * @param[in] ne * Number of columns in the problem * * @param[in] L * Size of chunk to use for transformation * ******************************************************************************/ int plasma_dshift(plasma_context_t *plasma, int m, int n, double *A, int nprob, int me, int ne, int L, PLASMA_sequence *sequence, PLASMA_request *request) { int *leaders = NULL; int ngrp, thrdbypb, thrdtot, nleaders; /* Check Plasma context */ thrdtot = PLASMA_SIZE; thrdbypb = PLASMA_GRPSIZE; ngrp = thrdtot/thrdbypb; /* check input */ if( (nprob * me * ne * L) != (m * n) ) { plasma_error(__func__, "problem size does not match matrix size"); /*printf("m=%d, n=%d, nprob=%d, me=%d, ne=%d, L=%d\n", m, n, nprob, me, ne, L);*/ return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if( thrdbypb > thrdtot ) { plasma_error(__func__, "number of thread per problem must be less or equal to total number of threads"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } if( (thrdtot % thrdbypb) != 0 ) { plasma_error(__func__, "number of thread per problem must divide the total number of thread"); return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); } /* quick return */ if( (me < 2) || (ne < 2) || (nprob < 1) ) { return PLASMA_SUCCESS; } GKK_getLeaderNbr(me, ne, &nleaders, &leaders); nleaders *= 3; if (PLASMA_SCHEDULING == PLASMA_STATIC_SCHEDULING) { int *Tp = NULL; int i, ipb; int q, owner; q = me*ne - 1; Tp = (int *)plasma_shared_alloc(plasma, thrdtot, PlasmaInteger); for (i=0; i<thrdtot; i++) Tp[i] = 0; ipb = 0; /* First part with coarse parallelism */ if (nprob > ngrp) { ipb = (nprob / ngrp)*ngrp; /* loop over leader */ if (thrdbypb > 1) { for (i=0; i<nleaders; i+=3) { /* assign this cycle to a thread */ owner = minloc(thrdbypb, Tp); /* assign it to owner */ Tp[owner] = Tp[owner] + leaders[i+1] * L; leaders[i+2] = owner; } GKK_BalanceLoad(thrdbypb, Tp, leaders, nleaders, L); } else { for (i=0; i<nleaders; i+=3) { Tp[0] = Tp[0] + leaders[i+1] * L; leaders[i+2] = 0; } } /* shift in parallel */ for (i=0; i< (nprob/ngrp); i++) { plasma_static_call_9(plasma_pdshift, int, me, int, ne, int, L, double*, &(A[i*ngrp*me*ne*L]), int *, leaders, int, nleaders, int, thrdbypb, PLASMA_sequence*, sequence, PLASMA_request*, request); } }