Пример #1
0
Ant::Ant() :Individual(), m_iteIndivl()
{
	mv_tabu.resize(getNumDim(),0);
	m_loc=0;
	m_flag=false;
	mv_accPro.resize(getNumDim());
}
Пример #2
0
void Ant::modifiedAnt(Individual<CodeVInt> &parent)
{
	if (m_iteIndivl.data().m_x[m_loc] != m_x[m_loc])
	{
		double obj = m_iteIndivl.data().m_obj[0];
		TravellingSalesman *ptr = dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get());
		int pos = -1;
		for (int i = m_loc + 1; i < getNumDim(); i++)
		{
			if (m_iteIndivl.data().m_x[i] == m_x[m_loc])
			{
				pos = i;
				break;
			}
		}
		obj = obj - ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[pos - 1]] - ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[(pos + 1) % getNumDim()]]\
			- ptr->getCost()[m_iteIndivl.data().m_x[m_loc]][m_iteIndivl.data().m_x[m_loc - 1]];
		obj = obj + ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[m_loc - 1]] + ptr->getCost()[m_iteIndivl.data().m_x[pos]][m_iteIndivl.data().m_x[m_loc]]\
			+ ptr->getCost()[m_iteIndivl.data().m_x[pos - 1]][m_iteIndivl.data().m_x[(pos + 1) % getNumDim()]];
		m_iteIndivl.data().m_obj[0] = obj;
		for (int i = pos - 1; i >= m_loc; i--)
		{
			m_iteIndivl.data().m_x[i + 1] = m_iteIndivl.data().m_x[i];
		}
		m_iteIndivl.data().m_x[m_loc] = m_x[m_loc];
		if ((ptr->getOptType() == MIN_OPT && obj < parent.data().m_obj[0]) || (ptr->getOptType() == MAX_OPT && obj > parent.data().m_obj[0]))
		{
			parent.data().m_obj[0] = obj;
			for (int i = 0; i < getNumDim(); i++)
				parent.data().m_x[i] = m_iteIndivl.data().m_x[i];
			parent.setFlag(true);
		}
	}
}
  static void setLattSize(const int size[])
  {
    int i;
    for(i=0; i < getNumDim(); ++i) {
      tot_size_3d[i] = size[i];
    }
    total_vol_3d = size[0];
    for(i=1; i < getNumDim(); ++i) {
      total_vol_3d *= size[i];
    }

    total_vol_3d_cb = total_vol_3d / 2;
  }
Пример #4
0
void Ant::selectNextCity_Pro(const vector<vector<double> > &phero, const vector<set<int>> &candidate, double beta, double alpha)
{
	int i, curNode = m_x[m_loc], numDim = getNumDim(), firstZero = -1;

	for (i = 0; i<numDim; i++){
		if (mv_tabu[i] == 0 && candidate[curNode].find(i) != candidate[curNode].end()){
			mv_accPro[i] = pow(phero[curNode][i], alpha)*pow(1. / dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[curNode][i], beta);
			if (firstZero == -1) firstZero = i;
		}
		else{
			mv_accPro[i] = 0;
		}
		if (i>0) mv_accPro[i] += mv_accPro[i - 1];
	}
	if (mv_accPro[numDim - 1]>0){
		++m_loc;
		double p = Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim - 1];
		//vector<double>::iterator it= find_if(mv_accPro.begin(),mv_accPro.end(),[&](const double &i){return p<=i;});
		vector<double>::iterator it = lower_bound(mv_accPro.begin(), mv_accPro.end(), p);
		m_x[m_loc] = int(it - mv_accPro.begin());
		mv_tabu[m_x[m_loc]] = 1;
	}
	else{
		if (firstZero != -1)
		{
			++m_loc;
			m_x[m_loc] = firstZero;
			mv_tabu[m_x[m_loc]] = 1;
		}
		else
			selectNextCity_Greedy(phero,beta,alpha);
	}
}
Пример #5
0
void Ant::initializeIteIndivl(const Solution<CodeVInt> &parent)
{
	for (int i = 0; i < getNumDim(); i++)
	{
		m_iteIndivl.data().m_x[i] = parent.data().m_x[i];
	}
	m_iteIndivl.data().m_obj = parent.data().m_obj;
}
Пример #6
0
void Ant::resetData(bool flag)
{
	for(int i=0;i<getNumDim();i++)
		mv_tabu[i]=0;
	m_loc=0;
	if (flag)
		mv_tabu[(m_x[m_loc])]=1;
	m_flag=false;  // to be evaluated
}
 /* Offset by 1 in direction dir */
 static void offs(int temp[], const int coord[], int mu, int isign)
 {
   int i;
   
   for(i=0; i < getNumDim(); ++i)
     temp[i] = coord[i];
   
   /* translate address to neighbour */
   temp[mu] = (temp[mu] + isign + 2*getLattSize()[mu]) % getLattSize()[mu];
 }
/* Subgrid lattice size */
static int* getSubgridSize()
{
    static int first = 1;
    static int subgrid_size[4];

    if (first == 1) {
        int i;
        for(i=0; i < getNumDim(); ++i) {
            subgrid_size[i] = QMP_get_subgrid_dimensions()[i];
        }


        subgrid_size[0] *= 2;

        first = 0;
    }

    return subgrid_size;
}
/* Total problem size */
static int* getLattSize()
{
    static int first = 1;
    static int tot_size[4];

    if (first == 1) {

        const int* phys_size = QMP_get_logical_dimensions();
        int i;

        for(i=0; i < getNumDim(); ++i) {
            tot_size[i] = getSubgridSize()[i]*phys_size[i];
        }

        first = 0;
    }

    return tot_size;
}
Пример #10
0
void Ant::selectNextCity_GLMemory_QAP(Individual<CodeVInt> &parent, const vector<vector<double> > &phero, double xp) {
	double p = Global::msp_global->mp_uniformAlg->Next();
	if (p<xp) {
		int node = parent.data().m_x[m_loc + 1];

		if (mv_tabu[node] == 0) {
			++m_loc;
			m_x[m_loc] = node;
			mv_tabu[node] = 1;
			if (!m_flag&&parent.data().m_x[m_loc] != node) m_flag = true;
			return;
		}
	}

	int i, numDim = getNumDim(), firstZero = -1;

	for (i = 0; i<numDim; i++) {
		if (mv_tabu[i] == 0) {
			mv_accPro[i] = phero[m_loc + 1][i];
			if (firstZero == -1) firstZero = i;
		}
		else {
			mv_accPro[i] = 0;
		}
		if (i>0) mv_accPro[i] += mv_accPro[i - 1];
	}
	++m_loc;
	if (mv_accPro[numDim - 1]>0) {
		double p = Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim - 1];
		vector<double>::iterator it = lower_bound(mv_accPro.begin(), mv_accPro.end(), p);
		m_x[m_loc] = int(it - mv_accPro.begin());
	}
	else {
		m_x[m_loc] = firstZero;
	}
	if (!m_flag&&m_x[m_loc] != parent.data().m_x[m_loc]) m_flag = true;
	mv_tabu[m_x[m_loc]] = 1;
}
Пример #11
0
void Ant::selectNextCity_Greedy(const vector<vector<double> > &phero,double beta,double alpha)
{
	double max=-0xfffffff;
	double temp;
	int i,pos=-1;
	for(i=0;i<getNumDim();i++)
	{
		if(mv_tabu[i]==0)
		{
			temp=pow(phero[(m_x[m_loc])][i],alpha)*pow((1./dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[(m_x[m_loc])][i]),beta);   
			if(max<temp) 
			{
				max=temp;
				pos=i;
			}
		}
	}
	if(pos==-1)
		throw myException("selectNextCity_Greedy() error in Ant.cpp");
	++m_loc;
	m_x[m_loc]=pos;
	mv_tabu[pos]=1;
}
Пример #12
0
void Ant::selectNextCity_GLMemory_TSP(Individual<CodeVInt> &parent, const vector<vector<double> > &phero, double xp){
	
	double p=Global::msp_global->mp_uniformAlg->Next();
	if(p<xp){
		pair<int,int> pa=dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getNextCity(parent,m_x[m_loc]);
		int node=mv_tabu[pa.first]<mv_tabu[pa.second]?pa.first:pa.second;
		
		if(mv_tabu[node]==0){
			++m_loc;
			m_x[m_loc]=node;
			mv_tabu[node]=1;
			if(!m_flag&&parent.data().m_x[m_loc]!=node) m_flag=true;
			modifiedAnt(parent);
			return;
		}
	}	

	int i,curNode=m_x[m_loc],numDim=getNumDim(),firstZero=-1;
	/*for(i=0;i<getNumDim();i++){
		if(i!=curNode&&dynamic_cast<TravellingSalesman*>(Global::msp_global->mp_problem.get())->getCost()[curNode][i]==0){
			++m_loc;
			m_x[m_loc]=i;
			if(m_x[m_loc]!=parent.m_x[m_loc]) m_flag=true;
			return;
		}
	}
	for(i=0;i<getNumDim();i++){
		mv_accPro[i]=0;
		if(mv_tabu[i]==0){
			sum+=phero[curNode][i];
		}
	}
	if(sum>0){
		for(i=0;i<getNumDim();i++){
			if(mv_tabu[i]==0){
				mv_accPro[i]=phero[curNode][i];
				mv_accPro[i]=mv_accPro[i]/sum;
			}
			if(i>0) mv_accPro[i]+=mv_accPro[i-1];
		}
		double p=Global::msp_global->mp_uniformAlg->Next();
		int pos;
		for(i=0;i<getNumDim();i++){
			if(p<=mv_accPro[i]){
				pos=i;
				break;
			}
		}
		++m_loc;	
		m_x[m_loc]=pos;

	}else{
		for(i=0;i<getNumDim();i++){
			if(mv_tabu[i]==0){
				++m_loc;
				m_x[m_loc]=i;
				break;
			}
		}
	}
	if(m_x[m_loc]!=parent.m_x[m_loc]) m_flag=true;
	mv_tabu[m_x[m_loc]]=1;*/

	
	for(i=0;i<numDim;i++){
		if(mv_tabu[i]==0){
			mv_accPro[i]=phero[curNode][i];
			if(firstZero==-1) firstZero=i;
		}else{
			mv_accPro[i]=0;
		}
		if(i>0) mv_accPro[i]+=mv_accPro[i-1];
	}
	++m_loc;
	if(mv_accPro[numDim-1]>0){
		double p=Global::msp_global->mp_uniformAlg->Next()*mv_accPro[numDim-1];
		//vector<double>::iterator it= find_if(mv_accPro.begin(),mv_accPro.end(),[&](const double &i){return p<=i;});
		vector<double>::iterator it= lower_bound(mv_accPro.begin(),mv_accPro.end(),p);
		m_x[m_loc]=int (it-mv_accPro.begin());
	}else{
		m_x[m_loc]=firstZero;
	}
	if(!m_flag&&m_x[m_loc]!=parent.data().m_x[m_loc]) m_flag=true;
	modifiedAnt(parent);
	mv_tabu[m_x[m_loc]]=1;
}
Пример #13
0
void Ant::initialize(int initNode)
{
	if(initNode!=-1) m_x[m_loc]=initNode;
	else m_x[m_loc]=int((getNumDim()-1)*Global::msp_global->mp_uniformAlg->Next());
	mv_tabu[int(m_x[m_loc])]=1;
}
void make_shift_tables(int bound[2][4][4], halfspinor_array* chi1,
                       halfspinor_array* chi2,

                       halfspinor_array* recv_bufs[2][4],
                       halfspinor_array* send_bufs[2][4],

                       void (*QDP_getSiteCoords)(int coord[], int node, int linearsite),
                       int (*QDP_getLinearSiteIndex)(const int coord[]),

                       int (*QDP_getNodeNumber)(const int coord[]))
{
    volatile int dir,i;
    const int my_node = QMP_get_node_number();
    int coord[4];
    int gcoord[4];
    int gcoord2[4];

    int linear;
    int **shift_table;
    int x,y,z,t;
    int *subgrid_size = getSubgridSize();
    int mu;
    int offset;

    int cb;
    const int *node_coord  = QMP_get_logical_coordinates();
    int p;
    int site, index;

    InvTab4 *xinvtab;
    InvTab4 *invtab;

    int qdp_index;
    int my_index;
    int num;
    int offsite_found;

    /* Setup the subgrid volume for ever after */
    subgrid_vol = 1;
    for(i=0; i < getNumDim(); ++i) {
        subgrid_vol *= getSubgridSize()[i];
    }

    /* Get the checkerboard size for ever after */
    subgrid_vol_cb = subgrid_vol / 2;

    /* Now I want to build the site table */
    /* I want it cache line aligned? */
    xsite_table = (int *)malloc(sizeof(int)*subgrid_vol+63L);
    if(xsite_table == 0x0 ) {
        QMP_error("Couldnt allocate site table");
        QMP_abort(1);
    }

    site_table = (int *)((((ptrdiff_t)(xsite_table))+63L)&(-64L));

    xinvtab = (InvTab4 *)malloc(sizeof(InvTab4)*subgrid_vol + 63L);
    if(xinvtab == 0x0 ) {
        QMP_error("Couldnt allocate site table");
        QMP_abort(1);
    }
    invtab = (InvTab4 *)((((ptrdiff_t)(xinvtab))+63L)&(-64L));

    /* Inversity of functions check:
       Check that myLinearSiteIndex3D is in fact the inverse
       of mySiteCoords3D, and that QDP_getSiteCoords is the
       inverse of QDP_linearSiteIndex()
    */
    for(p=0; p < 2; p++) {
        for(site=0; site < subgrid_vol_cb; site++) {

            /* Linear site index */
            my_index = site + subgrid_vol_cb*p;
            QDP_getSiteCoords(gcoord, my_node, my_index);
            linear=QDP_getLinearSiteIndex(gcoord);

            if( linear != my_index ) {
                printf("P%d cb=%d site=%d : QDP_getSiteCoords not inverse of QDP_getLinearSiteIndex(): my_index=%d linear=%d\n", my_node, p,site, my_index,linear);
            }

            mySiteCoords4D(gcoord, my_node, my_index);
            linear=myLinearSiteIndex4D(gcoord);

            if( linear != my_index ) {
                printf("P%d cb=%d site=%d : mySiteCoords3D not inverse of myLinearSiteIndex3D(): my_index=%d linear=%d\n", my_node, p,site, my_index,linear);
            }
        }
    }


    /* Loop through sites - you can choose your path below */
    /* This is a checkerboarded order which is identical hopefully
       to QDP++'s rb2 subset when QDP++ is in a CB2 layout */
    for(p=0; p < 2; p++) {
        for(t=0; t < subgrid_size[3]; t++) {
            for(z=0; z < subgrid_size[2]; z++) {
                for(y=0; y < subgrid_size[1]; y++) {
                    for(x=0; x < subgrid_size[0]/2; x++) {

                        coord[0] = 2*x + p;
                        coord[1] = y;
                        coord[2] = z;
                        coord[3] = t;

                        /* Make global */
                        for(i=0; i < 4; i++) {
                            coord[i] += subgrid_size[i]*node_coord[i];
                        }

                        /* Index of coordinate -- NB this is not lexicographic
                           but takes into account checkerboarding in QDP++ */
                        qdp_index = QDP_getLinearSiteIndex(coord);

                        /* Index of coordinate in my layout. -- NB this is not lexicographic
                           but takes into account my 3D checkerbaording */
                        my_index = myLinearSiteIndex4D(coord);
                        site_table[my_index] = qdp_index;

                        cb=parity(coord);
                        linear = my_index%subgrid_vol_cb;

                        invtab[qdp_index].cb=cb;
                        invtab[qdp_index].linearcb=linear;
                    }
                }
            }
        }
    }

    /* Site table transitivity check:
       for each site, convert to index in cb3d, convert to qdp index
       convert qdp_index to coordinate
       convert coordinate to back index in cb3d
       Check that your cb3d at the end is the same as you
       started with */
    for(p=0; p < 2; p++) {
        for(site=0; site < subgrid_vol_cb; site++) {

            /* My local index */
            my_index = site + subgrid_vol_cb*p;

            /* Convert to QDP index */
            qdp_index = site_table[ my_index ];

            /* Switch QDP index to coordinates */
            QDP_getSiteCoords(gcoord, my_node,qdp_index);

            /* Convert back to cb3d index */
            linear = myLinearSiteIndex4D(gcoord);

            /* Check new cb,cbsite index matches the old cb index */
            if (linear != my_index) {
                printf("P%d The Circle is broken. My index=%d qdp_index=%d coords=%d,%d,%d,%d linear(=my_index?)=%d\n", my_node, my_index, qdp_index, gcoord[0],gcoord[1],gcoord[2],gcoord[3],linear);
            }
        }
    }


    /* Consistency check 2: Test mySiteCoords 3D
       for all 3d cb,cb3index convert to
       cb3d linear index (my_index)
       convert to qdp_index (lookup in site table)

       Now convert qdp_index and my_index both to
       coordinates. They should produce the same coordinates
    */
    for(p=0; p < 2; p++) {
        for(site=0; site < subgrid_vol_cb; site++) {

            /* My local index */
            my_index = site + subgrid_vol_cb*p;
            mySiteCoords4D(gcoord, my_node, my_index);

            qdp_index = site_table[ my_index ];
            QDP_getSiteCoords(gcoord2, my_node,qdp_index);

            for(mu=0 ; mu < 4; mu++) {
                if( gcoord2[mu] != gcoord[mu] ) {
                    printf("P%d: my_index=%d qdp_index=%d mySiteCoords=(%d,%d,%d,%d) QDPsiteCoords=(%d,%d,%d,%d)\n", my_node, my_index, qdp_index, gcoord[0], gcoord[1], gcoord[2], gcoord[3], gcoord2[0], gcoord2[1], gcoord2[2], gcoord2[3]);
                    continue;
                }
            }

        }
    }

    /* Allocate the shift table */
    /* The structure is as follows: There are 4 shift tables in order:

      [ Table 1 | Table 2 | Table 3 | Table 4 ]
      Table 1: decomp_scatter_index[mu][site]
      Table 2: decomp_hvv_scatter_index[mu][site]
      Table 3: recons_mvv_gather_index[mu][site]
      Table 4: recons_gather_index[mu][site]

    */

    /* This 4 is for the 4 tables: Table 1-4*/
    if ((shift_table = (int **)malloc(4*sizeof(int*))) == 0 ) {
        QMP_error("init_wnxtsu3dslash: could not initialize shift_table");
        QMP_abort(1);

    }

    for(i=0; i < 4; i++) {
        /* This 4 is for the 4 comms dierctions: */
        if ((shift_table[i] = (int *)malloc(4*subgrid_vol*sizeof(int))) == 0) {
            QMP_error("init_wnxtsu3dslash: could not initialize shift_table");
            QMP_abort(1);
        }
    }


    /* Initialize the boundary counters */
    for(cb=0; cb < 2; cb++) {
        for(dir=0; dir < 4; dir++) {
            bound[cb][0][dir] = 0;
            bound[cb][1][dir] = 0;
            bound[cb][2][dir] = 0;
            bound[cb][3][dir] = 0;
        }
    }


    for(cb=0; cb < 2; cb++) {
        for(site=0; site < subgrid_vol_cb; ++site) {

            index = cb*subgrid_vol_cb + site;

            /* Fetch site from site table */
            qdp_index = site_table[index];

            /* Get its coords */
            QDP_getSiteCoords(coord, my_node, qdp_index);

            /* Loop over directions building up shift tables */
            for(dir=0; dir < 4; dir++) {

                int fcoord[4], bcoord[4];
                int fnode, bnode;
                int blinear, flinear;

                /* Backwards displacement*/
                offs(bcoord, coord, dir, -1);
                bnode   = QDP_getNodeNumber(bcoord);
                blinear = QDP_getLinearSiteIndex(bcoord);

                /* Forward displacement */
                offs(fcoord, coord, dir, +1);
                fnode   = QDP_getNodeNumber(fcoord);
                flinear = QDP_getLinearSiteIndex(fcoord);

                /* Scatter:  decomp_{plus,minus} */
                /* Operation: a^F(shift(x,type=0),dir) <- decomp(psi(x),dir) */
                /* Send backwards - also called a receive from forward */
                if (bnode != my_node) {
                    /* Offnode */
                    /* Append to Tail 1, increase boundary count */
                    /* This is the correct code */
                    shift_table[DECOMP_SCATTER][dir+4*index]
                        = subgrid_vol_cb + bound[1-cb][DECOMP_SCATTER][dir];

                    bound[1-cb][DECOMP_SCATTER][dir]++;

                }
                else {
                    /* On node. Note the linear part of its (cb3, linear) bit,
                       using a reverse lookup */
                    shift_table[DECOMP_SCATTER][dir+4*index] =
                        invtab[blinear].linearcb;
                }


                /* Scatter:  decomp_hvv_{plus,minus} */
                /* Operation:  a^B(shift(x,type=1),dir) <- U^dag(x,dir)*decomp(psi(x),dir) */
                /* Send forwards - also called a receive from backward */
                if (fnode != my_node) {
                    /* Offnode */
                    /* Append to Tail 1, increase boundary count */
                    shift_table[DECOMP_HVV_SCATTER][dir+4*index]
                        = subgrid_vol_cb + bound[1-cb][DECOMP_HVV_SCATTER][dir];

                    bound[1-cb][DECOMP_HVV_SCATTER][dir]++;

                }
                else {
                    /* On node. Note the linear part of its (cb3, linear) bit,
                       using a reverse lookup */
                    shift_table[DECOMP_HVV_SCATTER][dir+4*index]           /* Onnode */
                        = invtab[flinear].linearcb ;
                }


                /* Gather:  mvv_recons_{plus,minus} */
                /* Operation:  chi(x) <-  \sum_dir U(x,dir)*a^F(shift(x,type=2),dir) */
                /* Receive from forward */
                if (fnode != my_node) {
                    /* Offnode */
                    /* Append to Tail 2, increase boundary count */

                    shift_table[RECONS_MVV_GATHER][dir+4*index] =
                        2*subgrid_vol_cb + (bound[cb][RECONS_MVV_GATHER][dir]);

                    bound[cb][RECONS_MVV_GATHER][dir]++;

                }
                else {
                    /* On node. Note the linear part of its (cb3, linear) bit,
                       using a reverse lookup. Note this is a recons post shift,
                       so the linear coordinate to invert is mine rather than the neighbours */
                    shift_table[RECONS_MVV_GATHER][dir+4*index] =
                        invtab[qdp_index].linearcb ;
                }

                /* Gather:  recons_{plus,minus} */
                /* Operation:  chi(x) +=  \sum_dir recons(a^B(shift(x,type=3),dir),dir) */
                /* Receive from backward */
                if (bnode != my_node) {

                    shift_table[RECONS_GATHER][dir+4*index] =
                        2*subgrid_vol_cb + bound[cb][RECONS_GATHER][dir];

                    bound[cb][RECONS_GATHER][dir]++;

                }
                else {
                    /* On node. Note the linear part of its (cb3, linear) bit,
                       using a reverse lookup. Note this is a recons post shift,
                       so the linear coordinate to invert is mine rather than the neighbours */

                    shift_table[RECONS_GATHER][dir+4*index] =
                        invtab[qdp_index].linearcb ;
                }
            }
        }
    }

    /* Sanity check - make sure the sending and receiving counters match */
    for(cb=0; cb < 2; cb++) {
        for(dir=0; dir < 4; dir++) {

            /* Sanity 1: Must have same number of boundary sites on each cb for
            a given operation */
            for(i = 0; i < 4; i++) {
                if (bound[1-cb][i][dir] != bound[cb][i][dir]) {

                    QMP_error("SSE Wilson dslash - make_shift_tables: type 0 diff. cb send/recv counts do not match: %d %d",
                              bound[1-cb][i][dir],bound[cb][i][dir]);
                    QMP_abort(1);
                }
            }


        }
    }

    /* Now I want to make the offset table into the half spinor temporaries */
    /* The half spinor temporaries will look like this:

       dir=0 [ Body Half Spinors ][ Tail 1 Half Spinors ][ Tail 2 Half Spinors ]
       dir=1 [ Body Half Spinors ][ Tail 1 Half Spinors ][ Tail 2 Half Spinors ]
       ...

       And each of these blocks of half spinors will be sized to vol_cb
       sites (ie half volume only).  The shift_table() for a given site and
       direction indexes into one of these lines. So the offset table essentially
       delineates which line one picks, by adding an offset of
       3*subgrid_vol_cb*dir
       To the shift. The result from offset table, can be used directly as a
       pointer displacement on the temporaries.

       Perhaps the best way to condsider this is to consider a value
       of shift_table[type][dir/site] that lands in the body. The
       shift table merely gives me a site index. But the data needs
       to be different for each direction for that site index. Hence
       we need to replicate the body, for each dir. The 3xsubgrid_vol_cb
       is just there to take care of the buffers.

       Or another way to think of it is that there is a 'body element' index
       specified by the shift table lookup, and that dir is just the slowest
       varying index.

    */

    /* 4 dims, 4 types, rest of the magic is to align the thingie */
    xoffset_table = (halfspinor_array **)malloc(4*4*subgrid_vol*sizeof(halfspinor_array*)+63L);
    if( xoffset_table == 0 ) {
        QMP_error("init_wnxtsu3dslash: could not initialize offset_table[i]");
        QMP_abort(1);
    }
    /* This is the bit what aligns straight from AMD Manual */
    offset_table = (halfspinor_array**)((((ptrdiff_t)(xoffset_table)) + 63L) & (-64L));

    /* Walk through the shift_table and remap the offsets into actual
       pointers */

    /* DECOMP_SCATTER */
    num=0;
    for(dir =0; dir < Nd; dir++) {

        /* Loop through all the sites. Remap the offsets either to local
           arrays or pointers */
        offsite_found=0;
        for(site=0; site < subgrid_vol; site++) {
            offset = shift_table[DECOMP_SCATTER][dir+4*site];
            if( offset >= subgrid_vol_cb ) {
                /* Found an offsite guy. It's address must be to the send back buffer */
                /* send to back index = recv from forward index = 0  */
                offsite_found++;
                offset_table[ dir + 4*(site + subgrid_vol*DECOMP_SCATTER) ] =
                    send_bufs[0][num]+(offset - subgrid_vol_cb);
            }
            else {
                /* Guy is onsite: This is DECOMP_SCATTER so offset to chi1 */
                offset_table[ dir + 4*(site + subgrid_vol*DECOMP_SCATTER) ] =
                    chi1+shift_table[DECOMP_SCATTER][dir+4*site]+subgrid_vol_cb*dir;
            }
        }

        if( offsite_found > 0 ) {
            /* If we found an offsite guy, next direction has to
            go into the next dir part of the send bufs */
            num++;
        }
    }

    /* DECOMP_HVV_SCATTER */
    /* Restart num-s */
    num=0;
    for(dir =0; dir <Nd; dir++) {
        offsite_found=0;
        for(site=0; site < subgrid_vol; site++) {
            offset = shift_table[DECOMP_HVV_SCATTER][dir+4*site];
            if( offset >= subgrid_vol_cb ) {
                /* Found an offsite guy. It's address must be to the send forw buffer */
                /* send to forward / receive from backward index = 1 */
                offsite_found++;

                offset_table[ dir + 4*(site + subgrid_vol*DECOMP_HVV_SCATTER) ] =
                    send_bufs[1][num]+(offset - subgrid_vol_cb);
            }
            else {
                /* Guy is onsite. This is DECOMP_HVV_SCATTER so offset to chi2 */
                offset_table[ dir + 4*(site + subgrid_vol*DECOMP_HVV_SCATTER) ] =
                    chi2+shift_table[DECOMP_HVV_SCATTER][dir+4*site ]+subgrid_vol_cb*dir;
            }
        }
        if( offsite_found > 0 ) {
            num++;
        }
    }

    /* RECONS_MVV_GATHER */
    num=0;
    for(dir =0; dir <Nd; dir++) {
        offsite_found=0;
        for(site=0; site < subgrid_vol; site++) {
            offset = shift_table[RECONS_MVV_GATHER][dir+4*site];
            if( offset >= 2*subgrid_vol_cb ) {
                /* Found an offsite guy. It's address must be to the recv from front buffer */
                /* recv_from front index = send to back index = 0 */
                offsite_found++;
                offset_table[ dir + 4*(site + subgrid_vol*RECONS_MVV_GATHER) ] =
                    recv_bufs[0][num]+(offset - 2*subgrid_vol_cb);
            }
            else {
                /* Guy is onsite */
                /* This is RECONS_MVV_GATHER so offset with respect to chi1 */
                offset_table[ dir + 4*(site + subgrid_vol*RECONS_MVV_GATHER) ] =
                    chi1+shift_table[RECONS_MVV_GATHER][dir+4*site ]+subgrid_vol_cb*dir;
            }
        }
        if( offsite_found > 0 ) {
            num++;
        }
    }

    /* RECONS_GATHER */
    num=0;
    for(dir =0; dir <Nd; dir++) {
        offsite_found=0;
        for(site=0; site < subgrid_vol; site++) {
            offset = shift_table[RECONS_GATHER][dir+4*site];
            if( offset >= 2*subgrid_vol_cb ) {
                /* Found an offsite guy. It's address must be to the recv from back buffer */
                /* receive from back = send to forward index =  1*/
                offsite_found++;
                offset_table[ dir + 4*(site + subgrid_vol*RECONS_GATHER) ] =
                    recv_bufs[1][num]+(offset - 2*subgrid_vol_cb);
            }
            else {
                /* Guy is onsite */
                /* This is RECONS_GATHER so offset with respect to chi2 */
                offset_table[ dir + 4*(site + subgrid_vol*RECONS_GATHER ) ] =
                    chi2+shift_table[RECONS_GATHER][dir+4*site ]+subgrid_vol_cb*dir;
            }
        }
        if( offsite_found > 0 ) {
            num++;
        }
    }



    /* Free shift table - it is no longer needed. We deal solely with offsets */
    for(i=0; i < 4; i++) {
        free( (shift_table)[i] );
    }
    free( shift_table );

    free( xinvtab );

}