Ant::Ant() :Individual(), m_iteIndivl() { mv_tabu.resize(getNumDim(),0); m_loc=0; m_flag=false; mv_accPro.resize(getNumDim()); }
void Ant::modifiedAnt(Individual<CodeVInt> &parent) { if (m_iteIndivl.data().m_x[m_loc] != m_x[m_loc]) { double obj = m_iteIndivl.data().m_obj[0]; TravellingSalesman *ptr = dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get()); int pos = -1; for (int i = m_loc + 1; i < getNumDim(); i++) { if (m_iteIndivl.data().m_x[i] == m_x[m_loc]) { pos = i; break; } } obj = obj - ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[pos - 1]] - ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[(pos + 1) % getNumDim()]]\ - ptr->getCost()[m_iteIndivl.data().m_x[m_loc]][m_iteIndivl.data().m_x[m_loc - 1]]; obj = obj + ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[m_loc - 1]] + ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[m_loc]]\ + ptr->getCost()[m_iteIndivl.data().m_x[pos - 1]][m_iteIndivl.data().m_x[(pos + 1) % getNumDim()]]; m_iteIndivl.data().m_obj[0] = obj; for (int i = pos - 1; i >= m_loc; i--) { m_iteIndivl.data().m_x[i + 1] = m_iteIndivl.data().m_x[i]; } m_iteIndivl.data().m_x[m_loc] = m_x[m_loc]; if ((ptr->getOptType() == MIN_OPT && obj < parent.data().m_obj[0]) || (ptr->getOptType() == MAX_OPT && obj > parent.data().m_obj[0])) { parent.data().m_obj[0] = obj; for (int i = 0; i < getNumDim(); i++) parent.data().m_x[i] = m_iteIndivl.data().m_x[i]; parent.setFlag(true); } } }
static void setLattSize(const int size[]) { int i; for(i=0; i < getNumDim(); ++i) { tot_size_3d[i] = size[i]; } total_vol_3d = size[0]; for(i=1; i < getNumDim(); ++i) { total_vol_3d *= size[i]; } total_vol_3d_cb = total_vol_3d / 2; }
void Ant::selectNextCity_Pro(const vector<vector<double> > &phero, const vector<set<int>> &candidate, double beta, double alpha) { int i, curNode = m_x[m_loc], numDim = getNumDim(), firstZero = -1; for (i = 0; i<numDim; i++){ if (mv_tabu[i] == 0 && candidate[curNode].find(i) != candidate[curNode].end()){ mv_accPro[i] = pow(phero[curNode][i], alpha)*pow(1. / dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[curNode][i], beta); if (firstZero == -1) firstZero = i; } else{ mv_accPro[i] = 0; } if (i>0) mv_accPro[i] += mv_accPro[i - 1]; } if (mv_accPro[numDim - 1]>0){ ++m_loc; double p = Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim - 1]; //vector<double>::iterator it= find_if(mv_accPro.begin(),mv_accPro.end(),[&](const double &i){return p<=i;}); vector<double>::iterator it = lower_bound(mv_accPro.begin(), mv_accPro.end(), p); m_x[m_loc] = int(it - mv_accPro.begin()); mv_tabu[m_x[m_loc]] = 1; } else{ if (firstZero != -1) { ++m_loc; m_x[m_loc] = firstZero; mv_tabu[m_x[m_loc]] = 1; } else selectNextCity_Greedy(phero,beta,alpha); } }
void Ant::initializeIteIndivl(const Solution<CodeVInt> &parent) { for (int i = 0; i < getNumDim(); i++) { m_iteIndivl.data().m_x[i] = parent.data().m_x[i]; } m_iteIndivl.data().m_obj = parent.data().m_obj; }
void Ant::resetData(bool flag) { for(int i=0;i<getNumDim();i++) mv_tabu[i]=0; m_loc=0; if (flag) mv_tabu[(m_x[m_loc])]=1; m_flag=false; // to be evaluated }
/* Offset by 1 in direction dir */ static void offs(int temp[], const int coord[], int mu, int isign) { int i; for(i=0; i < getNumDim(); ++i) temp[i] = coord[i]; /* translate address to neighbour */ temp[mu] = (temp[mu] + isign + 2*getLattSize()[mu]) % getLattSize()[mu]; }
/* Subgrid lattice size */ static int* getSubgridSize() { static int first = 1; static int subgrid_size[4]; if (first == 1) { int i; for(i=0; i < getNumDim(); ++i) { subgrid_size[i] = QMP_get_subgrid_dimensions()[i]; } subgrid_size[0] *= 2; first = 0; } return subgrid_size; }
/* Total problem size */ static int* getLattSize() { static int first = 1; static int tot_size[4]; if (first == 1) { const int* phys_size = QMP_get_logical_dimensions(); int i; for(i=0; i < getNumDim(); ++i) { tot_size[i] = getSubgridSize()[i]*phys_size[i]; } first = 0; } return tot_size; }
void Ant::selectNextCity_GLMemory_QAP(Individual<CodeVInt> &parent, const vector<vector<double> > &phero, double xp) { double p = Global::msp_global->mp_uniformAlg->Next(); if (p<xp) { int node = parent.data().m_x[m_loc + 1]; if (mv_tabu[node] == 0) { ++m_loc; m_x[m_loc] = node; mv_tabu[node] = 1; if (!m_flag&&parent.data().m_x[m_loc] != node) m_flag = true; return; } } int i, numDim = getNumDim(), firstZero = -1; for (i = 0; i<numDim; i++) { if (mv_tabu[i] == 0) { mv_accPro[i] = phero[m_loc + 1][i]; if (firstZero == -1) firstZero = i; } else { mv_accPro[i] = 0; } if (i>0) mv_accPro[i] += mv_accPro[i - 1]; } ++m_loc; if (mv_accPro[numDim - 1]>0) { double p = Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim - 1]; vector<double>::iterator it = lower_bound(mv_accPro.begin(), mv_accPro.end(), p); m_x[m_loc] = int(it - mv_accPro.begin()); } else { m_x[m_loc] = firstZero; } if (!m_flag&&m_x[m_loc] != parent.data().m_x[m_loc]) m_flag = true; mv_tabu[m_x[m_loc]] = 1; }
void Ant::selectNextCity_Greedy(const vector<vector<double> > &phero,double beta,double alpha) { double max=-0xfffffff; double temp; int i,pos=-1; for(i=0;i<getNumDim();i++) { if(mv_tabu[i]==0) { temp=pow(phero[(m_x[m_loc])][i],alpha)*pow((1./dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[(m_x[m_loc])][i]),beta); if(max<temp) { max=temp; pos=i; } } } if(pos==-1) throw myException("selectNextCity_Greedy() error in Ant.cpp"); ++m_loc; m_x[m_loc]=pos; mv_tabu[pos]=1; }
void Ant::selectNextCity_GLMemory_TSP(Individual<CodeVInt> &parent, const vector<vector<double> > &phero, double xp){ double p=Global::msp_global->mp_uniformAlg->Next(); if(p<xp){ pair<int,int> pa=dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getNextCity(parent,m_x[m_loc]); int node=mv_tabu[pa.first]<mv_tabu[pa.second]?pa.first:pa.second; if(mv_tabu[node]==0){ ++m_loc; m_x[m_loc]=node; mv_tabu[node]=1; if(!m_flag&&parent.data().m_x[m_loc]!=node) m_flag=true; modifiedAnt(parent); return; } } int i,curNode=m_x[m_loc],numDim=getNumDim(),firstZero=-1; /*for(i=0;i<getNumDim();i++){ if(i!=curNode&&dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[curNode][i]==0){ ++m_loc; m_x[m_loc]=i; if(m_x[m_loc]!=parent.m_x[m_loc]) m_flag=true; return; } } for(i=0;i<getNumDim();i++){ mv_accPro[i]=0; if(mv_tabu[i]==0){ sum+=phero[curNode][i]; } } if(sum>0){ for(i=0;i<getNumDim();i++){ if(mv_tabu[i]==0){ mv_accPro[i]=phero[curNode][i]; mv_accPro[i]=mv_accPro[i]/sum; } if(i>0) mv_accPro[i]+=mv_accPro[i-1]; } double p=Global::msp_global->mp_uniformAlg->Next(); int pos; for(i=0;i<getNumDim();i++){ if(p<=mv_accPro[i]){ pos=i; break; } } ++m_loc; m_x[m_loc]=pos; }else{ for(i=0;i<getNumDim();i++){ if(mv_tabu[i]==0){ ++m_loc; m_x[m_loc]=i; break; } } } if(m_x[m_loc]!=parent.m_x[m_loc]) m_flag=true; mv_tabu[m_x[m_loc]]=1;*/ for(i=0;i<numDim;i++){ if(mv_tabu[i]==0){ mv_accPro[i]=phero[curNode][i]; if(firstZero==-1) firstZero=i; }else{ mv_accPro[i]=0; } if(i>0) mv_accPro[i]+=mv_accPro[i-1]; } ++m_loc; if(mv_accPro[numDim-1]>0){ double p=Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim-1]; //vector<double>::iterator it= find_if(mv_accPro.begin(),mv_accPro.end(),[&](const double &i){return p<=i;}); vector<double>::iterator it= lower_bound(mv_accPro.begin(),mv_accPro.end(),p); m_x[m_loc]=int (it-mv_accPro.begin()); }else{ m_x[m_loc]=firstZero; } if(!m_flag&&m_x[m_loc]!=parent.data().m_x[m_loc]) m_flag=true; modifiedAnt(parent); mv_tabu[m_x[m_loc]]=1; }
void Ant::initialize(int initNode) { if(initNode!=-1) m_x[m_loc]=initNode; else m_x[m_loc]=int((getNumDim()-1)*Global::msp_global->mp_uniformAlg->Next()); mv_tabu[int(m_x[m_loc])]=1; }
void make_shift_tables(int bound[2][4][4], halfspinor_array* chi1, halfspinor_array* chi2, halfspinor_array* recv_bufs[2][4], halfspinor_array* send_bufs[2][4], void (*QDP_getSiteCoords)(int coord[], int node, int linearsite), int (*QDP_getLinearSiteIndex)(const int coord[]), int (*QDP_getNodeNumber)(const int coord[])) { volatile int dir,i; const int my_node = QMP_get_node_number(); int coord[4]; int gcoord[4]; int gcoord2[4]; int linear; int **shift_table; int x,y,z,t; int *subgrid_size = getSubgridSize(); int mu; int offset; int cb; const int *node_coord = QMP_get_logical_coordinates(); int p; int site, index; InvTab4 *xinvtab; InvTab4 *invtab; int qdp_index; int my_index; int num; int offsite_found; /* Setup the subgrid volume for ever after */ subgrid_vol = 1; for(i=0; i < getNumDim(); ++i) { subgrid_vol *= getSubgridSize()[i]; } /* Get the checkerboard size for ever after */ subgrid_vol_cb = subgrid_vol / 2; /* Now I want to build the site table */ /* I want it cache line aligned? */ xsite_table = (int *)malloc(sizeof(int)*subgrid_vol+63L); if(xsite_table == 0x0 ) { QMP_error("Couldnt allocate site table"); QMP_abort(1); } site_table = (int *)((((ptrdiff_t)(xsite_table))+63L)&(-64L)); xinvtab = (InvTab4 *)malloc(sizeof(InvTab4)*subgrid_vol + 63L); if(xinvtab == 0x0 ) { QMP_error("Couldnt allocate site table"); QMP_abort(1); } invtab = (InvTab4 *)((((ptrdiff_t)(xinvtab))+63L)&(-64L)); /* Inversity of functions check: Check that myLinearSiteIndex3D is in fact the inverse of mySiteCoords3D, and that QDP_getSiteCoords is the inverse of QDP_linearSiteIndex() */ for(p=0; p < 2; p++) { for(site=0; site < subgrid_vol_cb; site++) { /* Linear site index */ my_index = site + subgrid_vol_cb*p; QDP_getSiteCoords(gcoord, my_node, my_index); linear=QDP_getLinearSiteIndex(gcoord); if( linear != my_index ) { printf("P%d cb=%d site=%d : QDP_getSiteCoords not inverse of QDP_getLinearSiteIndex(): my_index=%d linear=%d\n", my_node, p,site, my_index,linear); } mySiteCoords4D(gcoord, my_node, my_index); linear=myLinearSiteIndex4D(gcoord); if( linear != my_index ) { printf("P%d cb=%d site=%d : mySiteCoords3D not inverse of myLinearSiteIndex3D(): my_index=%d linear=%d\n", my_node, p,site, my_index,linear); } } } /* Loop through sites - you can choose your path below */ /* This is a checkerboarded order which is identical hopefully to QDP++'s rb2 subset when QDP++ is in a CB2 layout */ for(p=0; p < 2; p++) { for(t=0; t < subgrid_size[3]; t++) { for(z=0; z < subgrid_size[2]; z++) { for(y=0; y < subgrid_size[1]; y++) { for(x=0; x < subgrid_size[0]/2; x++) { coord[0] = 2*x + p; coord[1] = y; coord[2] = z; coord[3] = t; /* Make global */ for(i=0; i < 4; i++) { coord[i] += subgrid_size[i]*node_coord[i]; } /* Index of coordinate -- NB this is not lexicographic but takes into account checkerboarding in QDP++ */ qdp_index = QDP_getLinearSiteIndex(coord); /* Index of coordinate in my layout. -- NB this is not lexicographic but takes into account my 3D checkerbaording */ my_index = myLinearSiteIndex4D(coord); site_table[my_index] = qdp_index; cb=parity(coord); linear = my_index%subgrid_vol_cb; invtab[qdp_index].cb=cb; invtab[qdp_index].linearcb=linear; } } } } } /* Site table transitivity check: for each site, convert to index in cb3d, convert to qdp index convert qdp_index to coordinate convert coordinate to back index in cb3d Check that your cb3d at the end is the same as you started with */ for(p=0; p < 2; p++) { for(site=0; site < subgrid_vol_cb; site++) { /* My local index */ my_index = site + subgrid_vol_cb*p; /* Convert to QDP index */ qdp_index = site_table[ my_index ]; /* Switch QDP index to coordinates */ QDP_getSiteCoords(gcoord, my_node,qdp_index); /* Convert back to cb3d index */ linear = myLinearSiteIndex4D(gcoord); /* Check new cb,cbsite index matches the old cb index */ if (linear != my_index) { printf("P%d The Circle is broken. My index=%d qdp_index=%d coords=%d,%d,%d,%d linear(=my_index?)=%d\n", my_node, my_index, qdp_index, gcoord[0],gcoord[1],gcoord[2],gcoord[3],linear); } } } /* Consistency check 2: Test mySiteCoords 3D for all 3d cb,cb3index convert to cb3d linear index (my_index) convert to qdp_index (lookup in site table) Now convert qdp_index and my_index both to coordinates. They should produce the same coordinates */ for(p=0; p < 2; p++) { for(site=0; site < subgrid_vol_cb; site++) { /* My local index */ my_index = site + subgrid_vol_cb*p; mySiteCoords4D(gcoord, my_node, my_index); qdp_index = site_table[ my_index ]; QDP_getSiteCoords(gcoord2, my_node,qdp_index); for(mu=0 ; mu < 4; mu++) { if( gcoord2[mu] != gcoord[mu] ) { printf("P%d: my_index=%d qdp_index=%d mySiteCoords=(%d,%d,%d,%d) QDPsiteCoords=(%d,%d,%d,%d)\n", my_node, my_index, qdp_index, gcoord[0], gcoord[1], gcoord[2], gcoord[3], gcoord2[0], gcoord2[1], gcoord2[2], gcoord2[3]); continue; } } } } /* Allocate the shift table */ /* The structure is as follows: There are 4 shift tables in order: [ Table 1 | Table 2 | Table 3 | Table 4 ] Table 1: decomp_scatter_index[mu][site] Table 2: decomp_hvv_scatter_index[mu][site] Table 3: recons_mvv_gather_index[mu][site] Table 4: recons_gather_index[mu][site] */ /* This 4 is for the 4 tables: Table 1-4*/ if ((shift_table = (int **)malloc(4*sizeof(int*))) == 0 ) { QMP_error("init_wnxtsu3dslash: could not initialize shift_table"); QMP_abort(1); } for(i=0; i < 4; i++) { /* This 4 is for the 4 comms dierctions: */ if ((shift_table[i] = (int *)malloc(4*subgrid_vol*sizeof(int))) == 0) { QMP_error("init_wnxtsu3dslash: could not initialize shift_table"); QMP_abort(1); } } /* Initialize the boundary counters */ for(cb=0; cb < 2; cb++) { for(dir=0; dir < 4; dir++) { bound[cb][0][dir] = 0; bound[cb][1][dir] = 0; bound[cb][2][dir] = 0; bound[cb][3][dir] = 0; } } for(cb=0; cb < 2; cb++) { for(site=0; site < subgrid_vol_cb; ++site) { index = cb*subgrid_vol_cb + site; /* Fetch site from site table */ qdp_index = site_table[index]; /* Get its coords */ QDP_getSiteCoords(coord, my_node, qdp_index); /* Loop over directions building up shift tables */ for(dir=0; dir < 4; dir++) { int fcoord[4], bcoord[4]; int fnode, bnode; int blinear, flinear; /* Backwards displacement*/ offs(bcoord, coord, dir, -1); bnode = QDP_getNodeNumber(bcoord); blinear = QDP_getLinearSiteIndex(bcoord); /* Forward displacement */ offs(fcoord, coord, dir, +1); fnode = QDP_getNodeNumber(fcoord); flinear = QDP_getLinearSiteIndex(fcoord); /* Scatter: decomp_{plus,minus} */ /* Operation: a^F(shift(x,type=0),dir) <- decomp(psi(x),dir) */ /* Send backwards - also called a receive from forward */ if (bnode != my_node) { /* Offnode */ /* Append to Tail 1, increase boundary count */ /* This is the correct code */ shift_table[DECOMP_SCATTER][dir+4*index] = subgrid_vol_cb + bound[1-cb][DECOMP_SCATTER][dir]; bound[1-cb][DECOMP_SCATTER][dir]++; } else { /* On node. Note the linear part of its (cb3, linear) bit, using a reverse lookup */ shift_table[DECOMP_SCATTER][dir+4*index] = invtab[blinear].linearcb; } /* Scatter: decomp_hvv_{plus,minus} */ /* Operation: a^B(shift(x,type=1),dir) <- U^dag(x,dir)*decomp(psi(x),dir) */ /* Send forwards - also called a receive from backward */ if (fnode != my_node) { /* Offnode */ /* Append to Tail 1, increase boundary count */ shift_table[DECOMP_HVV_SCATTER][dir+4*index] = subgrid_vol_cb + bound[1-cb][DECOMP_HVV_SCATTER][dir]; bound[1-cb][DECOMP_HVV_SCATTER][dir]++; } else { /* On node. Note the linear part of its (cb3, linear) bit, using a reverse lookup */ shift_table[DECOMP_HVV_SCATTER][dir+4*index] /* Onnode */ = invtab[flinear].linearcb ; } /* Gather: mvv_recons_{plus,minus} */ /* Operation: chi(x) <- \sum_dir U(x,dir)*a^F(shift(x,type=2),dir) */ /* Receive from forward */ if (fnode != my_node) { /* Offnode */ /* Append to Tail 2, increase boundary count */ shift_table[RECONS_MVV_GATHER][dir+4*index] = 2*subgrid_vol_cb + (bound[cb][RECONS_MVV_GATHER][dir]); bound[cb][RECONS_MVV_GATHER][dir]++; } else { /* On node. Note the linear part of its (cb3, linear) bit, using a reverse lookup. Note this is a recons post shift, so the linear coordinate to invert is mine rather than the neighbours */ shift_table[RECONS_MVV_GATHER][dir+4*index] = invtab[qdp_index].linearcb ; } /* Gather: recons_{plus,minus} */ /* Operation: chi(x) += \sum_dir recons(a^B(shift(x,type=3),dir),dir) */ /* Receive from backward */ if (bnode != my_node) { shift_table[RECONS_GATHER][dir+4*index] = 2*subgrid_vol_cb + bound[cb][RECONS_GATHER][dir]; bound[cb][RECONS_GATHER][dir]++; } else { /* On node. Note the linear part of its (cb3, linear) bit, using a reverse lookup. Note this is a recons post shift, so the linear coordinate to invert is mine rather than the neighbours */ shift_table[RECONS_GATHER][dir+4*index] = invtab[qdp_index].linearcb ; } } } } /* Sanity check - make sure the sending and receiving counters match */ for(cb=0; cb < 2; cb++) { for(dir=0; dir < 4; dir++) { /* Sanity 1: Must have same number of boundary sites on each cb for a given operation */ for(i = 0; i < 4; i++) { if (bound[1-cb][i][dir] != bound[cb][i][dir]) { QMP_error("SSE Wilson dslash - make_shift_tables: type 0 diff. cb send/recv counts do not match: %d %d", bound[1-cb][i][dir],bound[cb][i][dir]); QMP_abort(1); } } } } /* Now I want to make the offset table into the half spinor temporaries */ /* The half spinor temporaries will look like this: dir=0 [ Body Half Spinors ][ Tail 1 Half Spinors ][ Tail 2 Half Spinors ] dir=1 [ Body Half Spinors ][ Tail 1 Half Spinors ][ Tail 2 Half Spinors ] ... And each of these blocks of half spinors will be sized to vol_cb sites (ie half volume only). The shift_table() for a given site and direction indexes into one of these lines. So the offset table essentially delineates which line one picks, by adding an offset of 3*subgrid_vol_cb*dir To the shift. The result from offset table, can be used directly as a pointer displacement on the temporaries. Perhaps the best way to condsider this is to consider a value of shift_table[type][dir/site] that lands in the body. The shift table merely gives me a site index. But the data needs to be different for each direction for that site index. Hence we need to replicate the body, for each dir. The 3xsubgrid_vol_cb is just there to take care of the buffers. Or another way to think of it is that there is a 'body element' index specified by the shift table lookup, and that dir is just the slowest varying index. */ /* 4 dims, 4 types, rest of the magic is to align the thingie */ xoffset_table = (halfspinor_array **)malloc(4*4*subgrid_vol*sizeof(halfspinor_array*)+63L); if( xoffset_table == 0 ) { QMP_error("init_wnxtsu3dslash: could not initialize offset_table[i]"); QMP_abort(1); } /* This is the bit what aligns straight from AMD Manual */ offset_table = (halfspinor_array**)((((ptrdiff_t)(xoffset_table)) + 63L) & (-64L)); /* Walk through the shift_table and remap the offsets into actual pointers */ /* DECOMP_SCATTER */ num=0; for(dir =0; dir < Nd; dir++) { /* Loop through all the sites. Remap the offsets either to local arrays or pointers */ offsite_found=0; for(site=0; site < subgrid_vol; site++) { offset = shift_table[DECOMP_SCATTER][dir+4*site]; if( offset >= subgrid_vol_cb ) { /* Found an offsite guy. It's address must be to the send back buffer */ /* send to back index = recv from forward index = 0 */ offsite_found++; offset_table[ dir + 4*(site + subgrid_vol*DECOMP_SCATTER) ] = send_bufs[0][num]+(offset - subgrid_vol_cb); } else { /* Guy is onsite: This is DECOMP_SCATTER so offset to chi1 */ offset_table[ dir + 4*(site + subgrid_vol*DECOMP_SCATTER) ] = chi1+shift_table[DECOMP_SCATTER][dir+4*site]+subgrid_vol_cb*dir; } } if( offsite_found > 0 ) { /* If we found an offsite guy, next direction has to go into the next dir part of the send bufs */ num++; } } /* DECOMP_HVV_SCATTER */ /* Restart num-s */ num=0; for(dir =0; dir <Nd; dir++) { offsite_found=0; for(site=0; site < subgrid_vol; site++) { offset = shift_table[DECOMP_HVV_SCATTER][dir+4*site]; if( offset >= subgrid_vol_cb ) { /* Found an offsite guy. It's address must be to the send forw buffer */ /* send to forward / receive from backward index = 1 */ offsite_found++; offset_table[ dir + 4*(site + subgrid_vol*DECOMP_HVV_SCATTER) ] = send_bufs[1][num]+(offset - subgrid_vol_cb); } else { /* Guy is onsite. This is DECOMP_HVV_SCATTER so offset to chi2 */ offset_table[ dir + 4*(site + subgrid_vol*DECOMP_HVV_SCATTER) ] = chi2+shift_table[DECOMP_HVV_SCATTER][dir+4*site ]+subgrid_vol_cb*dir; } } if( offsite_found > 0 ) { num++; } } /* RECONS_MVV_GATHER */ num=0; for(dir =0; dir <Nd; dir++) { offsite_found=0; for(site=0; site < subgrid_vol; site++) { offset = shift_table[RECONS_MVV_GATHER][dir+4*site]; if( offset >= 2*subgrid_vol_cb ) { /* Found an offsite guy. It's address must be to the recv from front buffer */ /* recv_from front index = send to back index = 0 */ offsite_found++; offset_table[ dir + 4*(site + subgrid_vol*RECONS_MVV_GATHER) ] = recv_bufs[0][num]+(offset - 2*subgrid_vol_cb); } else { /* Guy is onsite */ /* This is RECONS_MVV_GATHER so offset with respect to chi1 */ offset_table[ dir + 4*(site + subgrid_vol*RECONS_MVV_GATHER) ] = chi1+shift_table[RECONS_MVV_GATHER][dir+4*site ]+subgrid_vol_cb*dir; } } if( offsite_found > 0 ) { num++; } } /* RECONS_GATHER */ num=0; for(dir =0; dir <Nd; dir++) { offsite_found=0; for(site=0; site < subgrid_vol; site++) { offset = shift_table[RECONS_GATHER][dir+4*site]; if( offset >= 2*subgrid_vol_cb ) { /* Found an offsite guy. It's address must be to the recv from back buffer */ /* receive from back = send to forward index = 1*/ offsite_found++; offset_table[ dir + 4*(site + subgrid_vol*RECONS_GATHER) ] = recv_bufs[1][num]+(offset - 2*subgrid_vol_cb); } else { /* Guy is onsite */ /* This is RECONS_GATHER so offset with respect to chi2 */ offset_table[ dir + 4*(site + subgrid_vol*RECONS_GATHER ) ] = chi2+shift_table[RECONS_GATHER][dir+4*site ]+subgrid_vol_cb*dir; } } if( offsite_found > 0 ) { num++; } } /* Free shift table - it is no longer needed. We deal solely with offsets */ for(i=0; i < 4; i++) { free( (shift_table)[i] ); } free( shift_table ); free( xinvtab ); }