/* unique numeric SMP-node identifier */ long vt_pform_node_id() { #ifdef BGL_GROUP_ON_NODEBOARD return ((mybgl.location >> 6) & 0x1fff); #else if ( BGLPersonality_virtualNodeMode(&mybgl) ) return ( BGLPersonality_psetNum(&mybgl) * BGLPersonality_numNodesInPset(&mybgl) + BGLPersonality_rankInPset(&mybgl)) * 2 + rts_get_processor_id(); else return BGLPersonality_psetNum(&mybgl) * BGLPersonality_numNodesInPset(&mybgl) + BGLPersonality_rankInPset(&mybgl); #endif }
/* unique string SMP-node identifier */ char* vt_pform_node_name() { #ifdef BGL_GROUP_ON_NODEBOARD static char buf[BGLPERSONALITY_MAX_LOCATION]; bgl_getNodeidString(&mybgl, buf); return buf; #else static char node[128]; unsigned x = BGLPersonality_xCoord(&mybgl); unsigned y = BGLPersonality_yCoord(&mybgl); unsigned z = BGLPersonality_zCoord(&mybgl); sprintf(node, "node-%03d-%03d-%03d-%d", x, y, z, rts_get_processor_id()); /* -- BGL internal location string static char buf[BGLPERSONALITY_MAX_LOCATION]; BGLPersonality_getLocationString(&mybgl, buf); -- */ return node; #endif }
//------------------------------------------------------------------ // It initialize the inter-core ic_locks of the 4 memory-fifos // to 0 indicating that the fifos are empty. //------------------------------------------------------------------ void BGLCPSTorusMFifo_Init (void) { char ic_lock[16] __attribute__((aligned(BGL_QUAD_ALIGNSIZE))); // Get core id int pir = rts_get_processor_id(); // Initialize the inter-core ic_locks to 0 ic_lock[0] = 0; if(pir = 0){ QuadMove(&ic_lock,MFIFO_A,0); QuadMove(&ic_lock,MFIFO_D,0); } else { QuadMove(&ic_lock,MFIFO_B,0); QuadMove(&ic_lock,MFIFO_C,0); } // Local barrier BGL_Barrier_Pass(BGL_AppBarriers); }
void init_qmp(int * argc, char ***argv) { #if 0 printf("init_qmp(%d %p)\n",*argc,*argv); for(int i = 0; i<*argc;i++){ printf("argv[%d](before)=%s\n",i,(*argv)[i]); } #endif #if 0 spi_init(); #endif QMP_thread_level_t prv; #ifndef UNIFORM_SEED_NO_COMMS QMP_status_t init_status = QMP_init_msg_passing(argc, argv, QMP_THREAD_SINGLE, &prv); if (init_status) printf("QMP_init_msg_passing returned %d\n",init_status); peRank = QMP_get_node_number(); peNum = QMP_get_number_of_nodes(); if(!peRank)printf("QMP_init_msg_passing returned %d\n",init_status); if (init_status != QMP_SUCCESS) { QMP_error("%s\n",QMP_error_string(init_status)); } // check QMP thread level // Added by Hantao if(peRank == 0) { switch(prv) { case QMP_THREAD_SINGLE: printf("QMP thread level = QMP_THREAD_SINGLE\n"); break; case QMP_THREAD_FUNNELED: printf("QMP thread level = QMP_THREAD_FUNNELED\n"); break; case QMP_THREAD_SERIALIZED: printf("QMP thread level = QMP_THREAD_SERIALIZED\n"); break; case QMP_THREAD_MULTIPLE: printf("QMP thread level = QMP_THREAD_MULTIPLE\n"); break; default: printf("QMP thread level = no idea what this is, boom!\n"); } } //Check to make sure that this machine is a GRID machine //Exit if not GRID machine QMP_ictype qmp_type = QMP_get_msg_passing_type(); //Get information about the allocated machine peNum = QMP_get_number_of_nodes(); NDIM = QMP_get_allocated_number_of_dimensions(); peGrid = QMP_get_allocated_dimensions(); pePos = QMP_get_allocated_coordinates(); if(peRank==0){ for(int i = 0; i<*argc;i++){ printf("argv[%d])(after)=%s\n",i,(*argv)[i]); } } #else QMP_status_t init_status = QMP_SUCCESS; peRank=0; peNum=1; NDIM=4; #endif //#if (TARGET == BGL) || (TARGET == BGP) if (NDIM>5){ peNum = 1; for(int i = 0;i<5;i++) peNum *= peGrid[i]; peRank = peRank % peNum; } int if_print=1; for(int i = 0;i<NDIM;i++) if (pePos[i]>=2) if_print=0; if (if_print){ printf("Rank=%d Num=%d NDIM=%d\n",peRank,peNum,NDIM); printf("dim:"); for(int i = 0;i<NDIM;i++) printf(" %d",peGrid[i]); printf("\n"); printf("pos:"); for(int i = 0;i<NDIM;i++) printf(" %d",pePos[i]); printf("\n"); #if 0 int rc; BGLPersonality pers; rts_get_personality(&pers, sizeof(pers)); printf("from personality: %d %d %d %d\n",pers.xCoord,pers.yCoord,pers.zCoord,rts_get_processor_id()); #endif } // printf("from personality:\n"); #if 0 if ( (qmp_type!= QMP_GRID) && (qmp_type !=QMP_MESH) ) { QMP_error("CPS on QMP only implemented for GRID or MESH, not (%d) machines\n",qmp_type); } #endif // printf("QMP_declare_logical_topology(peGrid, NDIM)\n"); #ifndef UNIFORM_SEED_NO_COMMS //Declare the logical topology (Redundant for GRID machines) if (QMP_declare_logical_topology(peGrid, NDIM) != QMP_SUCCESS) { QMP_error("Node %d: Failed to declare logical topology\n",peRank); exit(-4); } #endif initialized = true; printf("Rank=%d init_qmp() done\n",peRank); }
//------------------------------------------------------------------ // Various initializations // Here it is assumed that the TLBs for cores 0, 1 are set // so that the addresses below translate appropriately. //------------------------------------------------------------------ void BGLCPSVarious_Init (void) { int pir; //---------------------------------------------------------------- // get the core id //---------------------------------------------------------------- pir = rts_get_processor_id(); //---------------------------------------------------------------- // Set the addresses where the sender status are stored //---------------------------------------------------------------- if(pir == 0){ stat_se_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[6] = MFIFO_C0_SEND_P; stat_se_ptr[7] = MFIFO_C0_SEND_M; } else { stat_se_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_se_ptr[6] = MFIFO_C1_SEND_P; stat_se_ptr[7] = MFIFO_C1_SEND_M; } //---------------------------------------------------------------- // Set the addresses where the receiver status are stored //---------------------------------------------------------------- if(pir == 0){ stat_re_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[6] = MFIFO_C0_RECV_P; stat_re_ptr[7] = MFIFO_C0_RECV_M; } else { stat_re_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_STATUS0_OFFSET); stat_re_ptr[6] = MFIFO_C1_RECV_P; stat_re_ptr[7] = MFIFO_C1_RECV_M; } //---------------------------------------------------------------- // Set the offset within the status quad-word value, // addressed in Bytes, where the 1 Byte status for a given // sender is located. The possible offsets are 0, 1, ...15. //---------------------------------------------------------------- stat_se[0] = 7; // status of recv fifo 0 stat_se[1] = 7; // status of recv fifo 1 stat_se[2] = 8; // status of recv fifo 2 stat_se[3] = 8; // status of recv fifo 3 stat_se[4] = 9; // status of recv fifo 4 stat_se[5] = 9; // status of recv fifo 5 stat_se[6] = 0; // status of memory recv fifo 6 stat_se[7] = 0; // status of memory recv fifo 7 //---------------------------------------------------------------- // Set the offset within the status quad-word value, // addressed in Bytes, where the 1 Byte status for a given // receiver is located. The possible offsets are 0, 1, ...15. //---------------------------------------------------------------- stat_re[0] = 0; // status of recv fifo 0 stat_re[1] = 1; // status of recv fifo 1 stat_re[2] = 2; // status of recv fifo 2 stat_re[3] = 3; // status of recv fifo 3 stat_re[4] = 4; // status of recv fifo 4 stat_re[5] = 5; // status of recv fifo 5 stat_re[6] = 0; // status of memory recv fifo 6 stat_re[7] = 0; // status of memory recv fifo 7 //---------------------------------------------------------------- // Set the sender fifo addresses. //---------------------------------------------------------------- if(pir == 0){ fifo_se_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_0_OFFSET); fifo_se_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_0_OFFSET); fifo_se_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_1_OFFSET); fifo_se_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_1_OFFSET); fifo_se_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_2_OFFSET); fifo_se_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_2_OFFSET); fifo_se_ptr[6] = MFIFO_C0_SEND_P + 1; fifo_se_ptr[7] = MFIFO_C0_SEND_M + 1; } else { fifo_se_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_0_OFFSET); fifo_se_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_0_OFFSET); fifo_se_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_1_OFFSET); fifo_se_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_1_OFFSET); fifo_se_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_2_OFFSET); fifo_se_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAIN_2_OFFSET); fifo_se_ptr[6] = MFIFO_C1_SEND_P + 1; fifo_se_ptr[7] = MFIFO_C1_SEND_M + 1; } //---------------------------------------------------------------- // Set the receiver fifo addresses. //---------------------------------------------------------------- if(pir == 0){ fifo_re_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_0_OFFSET); fifo_re_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_1_OFFSET); fifo_re_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_2_OFFSET); fifo_re_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_3_OFFSET); fifo_re_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_4_OFFSET); fifo_re_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_5_OFFSET); fifo_re_ptr[6] = MFIFO_C0_RECV_P + 1; fifo_re_ptr[7] = MFIFO_C0_RECV_M + 1; } else { fifo_re_ptr[0] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_0_OFFSET); fifo_re_ptr[1] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_1_OFFSET); fifo_re_ptr[2] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_2_OFFSET); fifo_re_ptr[3] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_3_OFFSET); fifo_re_ptr[4] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_4_OFFSET); fifo_re_ptr[5] = (BGLQuad *) (BGL_MEM_TORUS_G0_BASE+BGL_MEM_TORUS_DATAOUT_5_OFFSET); fifo_re_ptr[6] = MFIFO_C1_RECV_P + 1; fifo_re_ptr[7] = MFIFO_C1_RECV_M + 1; } for(int i=0; i<8; i++){ send_poll[i] = 0; recv_poll[i] = 0; } }
//------------------------------------------------------------------ // It fills in the buffer hdr_send_buf allocated in this file with // 8x3=24 headers : one for each of x+, x-, y+, y-, z+, z-, t_, t- // and for each of those for sizes 32B, 128B, 256B. The headers // for t+, t- are set to 0, since they are not used by the memory // communications. // // It also sets the hint bits for nearest neighbor communication. // // Should be called once before any other routines in this file are // used. //------------------------------------------------------------------ void BGLCPSTorusPacketHeader_InitFill (void) { int nn; int x, y, z; int Lx, Ly, Lz; int pir; BGLPersonality pers; // Get the core id pir = rts_get_processor_id(); // Get personality info rts_get_personality(&pers, sizeof(pers)); // Set the sizes of each direction // (size starts from 1) Lx = pers.xSize; Ly = pers.ySize; Lz = pers.zSize; // Set the coordinates of this node // (coordinate ranges fro 0 to size-1) x = pers.xCoord; y = pers.yCoord; z = pers.zCoord; // Fill the header for a packet destined to go to the // nearest neighbor along x+ if(x == Lx-1){ nn = 0; } else { nn = x+1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[0][0]), 1, 0, 0, 0, 0, 0, nn, y, z, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[0][1]), 1, 0, 0, 0, 0, 0, nn, y, z, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[0][2]), 1, 0, 0, 0, 0, 0, nn, y, z, pir, 7); // Fill the header for a packet destined to go to the // nearest neighbor along x- if(x == 0){ nn = Lx-1; } else { nn = x-1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[1][0]), 0, 1, 0, 0, 0, 0, nn, y, z, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[1][1]), 0, 1, 0, 0, 0, 0, nn, y, z, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[1][2]), 0, 1, 0, 0, 0, 0, nn, y, z, pir, 7); // Fill the header for a packet destined to go to the // nearest neighbor along y+ if(y == Ly-1){ nn = 0; } else { nn = y+1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[2][0]), 0, 0, 1, 0, 0, 0, x, nn, z, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[2][1]), 0, 0, 1, 0, 0, 0, x, nn, z, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[2][2]), 0, 0, 1, 0, 0, 0, x, nn, z, pir, 7); // Fill the header for a packet destined to go to the // nearest neighbor along y- if(y == 0){ nn = Ly-1; } else { nn = y-1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[3][0]), 0, 0, 0, 1, 0, 0, x, nn, z, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[3][1]), 0, 0, 0, 1, 0, 0, x, nn, z, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[3][2]), 0, 0, 0, 1, 0, 0, x, nn, z, pir, 7); // Fill the header for a packet destined to go to the // nearest neighbor along z+ if(z == Lz-1){ nn = 0; } else { nn = z+1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[4][0]), 0, 0, 0, 0, 1, 0, x, y, nn, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[4][1]), 0, 0, 0, 0, 1, 0, x, y, nn, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[4][2]), 0, 0, 0, 0, 1, 0, x, y, nn, pir, 7); // Fill the header for a packet destined to go to the // nearest neighbor along z- if(z == 0){ nn = Lz-1; } else { nn = z-1; } BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[5][0]), 0, 0, 0, 0, 0, 1, x, y, nn, pir, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[5][1]), 0, 0, 0, 0, 0, 1, x, y, nn, pir, 3); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[5][2]), 0, 0, 0, 0, 0, 1, x, y, nn, pir, 7); // Fill a dummy header for t+ with 0 BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[6][0]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[6][1]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[6][2]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); // Fill a dummy header for t- with 0 BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[7][0]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[7][1]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); BGLCPSTorusPacketHeader_Init(&(hdr_send_buf[7][2]), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); }
//------------------------------------------------------------------ // The Wilson fermion communication routine. //------------------------------------------------------------------ void wfm_comm() { int i, k, dir, ig, ic, group, d; int mu_se[8]; int mu_re[8]; int mu_nc[8]; BGLQuad *fifo; BGLQuad *qdata; char stat[16] __attribute__((aligned(BGL_QUAD_ALIGNSIZE))); int pir; int wfm_dir; // register int u asm("r9") = 16; register int u = 16; // Get the core id pir = rts_get_processor_id(); mu_se[0] = 0; mu_se[1] = 4; mu_se[2] = 1; mu_se[3] = 5; mu_se[4] = 2; mu_se[5] = 6; mu_se[6] = 3; mu_se[7] = 7; mu_re[0] = 4; mu_re[1] = 0; mu_re[2] = 5; mu_re[3] = 1; mu_re[4] = 6; mu_re[5] = 2; mu_re[6] = 7; mu_re[7] = 3; mu_nc[0] = 0; mu_nc[1] = 4; mu_nc[2] = 1; mu_nc[3] = 5; mu_nc[4] = 2; mu_nc[5] = 6; mu_nc[6] = 3; mu_nc[7] = 7; group = 4; //------------------------------------------------------------------------ // Send plus receive minus x y z //------------------------------------------------------------------------ for(ig=0; ig<wfm_max_numchunk/group; ig++){ //Send x y z //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; QuadMove(stat_se_ptr[0], &stat, 30); for(d=0; d<3; d++){ dir = 2*d + pir; wfm_dir = bgl_cps_dir[dir]; if( (i < wfm_numchunk[mu_nc[wfm_dir]]) && (grid_end[dir] != 1) ){ QuadMove(stat_se_ptr[dir], &stat, 30); while (1) { if (stat[stat_se[dir]] < SEND_FIFO_LEVEL) { break; } send_poll[dir]++; QuadMove(stat_se_ptr[dir], &stat, 30); } fifo = fifo_se_ptr[dir]; qdata = (BGLQuad *) wfm_send_ad[mu_se[wfm_dir]+8*i]; TORUS_SEND_SPINOR(dir, fifo, qdata); } } } // printf("send xyz 0\n"); //Send / Receive t //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; // Send t dir = 6; wfm_dir = bgl_cps_dir[dir]; { if( (i < wfm_numchunk[mu_nc[wfm_dir]]) && (grid_end[dir] != 1) ){ while (1) { QuadMove(stat_se_ptr[dir], &stat, 30); if (stat[stat_se[dir]] < SEND_FIFO_LEVEL) { break; } send_poll[dir]++; } fifo = fifo_se_ptr[dir]; qdata = (BGLQuad *) wfm_send_ad[mu_se[wfm_dir]+8*i]; MEM_SEND_SPINOR(dir, fifo, qdata); } } // Receive t dir = 7; wfm_dir = bgl_cps_dir[dir]; { if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] != 1) ){ while (1) { QuadMove(stat_re_ptr[dir], &stat, 30); if (stat[stat_re[dir]] > RECV_FIFO_LEVEL) { break; } recv_poll[dir]++; } fifo = fifo_re_ptr[dir]; qdata = (BGLQuad *) wfm_recv_ad[mu_re[wfm_dir]+8*i]; MEM_RECV_SPINOR(dir, fifo, qdata); } if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] == 1) ){ IFloat *data = wfm_recv_ad[mu_re[wfm_dir]+8*i]; for(k=0; k<12; k++){ data[k] = 0; } } } } // printf("send receive t 0\n"); // Recv x y z //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; QuadMove(stat_re_ptr[0], &stat, 30); for(d=0; d<3; d++){ dir = 2*d + (1+pir)%2; wfm_dir = bgl_cps_dir[dir]; if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] != 1) ){ QuadMove(stat_re_ptr[dir], &stat, 30); while (1) { if (stat[stat_re[dir]] > RECV_FIFO_LEVEL) { break; } recv_poll[dir]++; QuadMove(stat_re_ptr[dir], &stat, 30); } fifo = fifo_re_ptr[dir]; qdata = (BGLQuad *) wfm_recv_ad[mu_re[wfm_dir]+8*i]; TORUS_RECV_SPINOR(dir, fifo, qdata); } if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] == 1) ){ IFloat *data = wfm_recv_ad[mu_re[wfm_dir]+8*i]; for(k=0; k<12; k++){ data[k] = 0; } } } } // printf("recv xyz 0\n"); } //------------------------------------------------------------------------ // Send minus receive plus x,y,z //------------------------------------------------------------------------ for(ig=0; ig<wfm_max_numchunk/group; ig++){ // time_4[ig] = BGLTimebase(); // poll_count_4[ig] = poll_count; //Send x y z //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; QuadMove(stat_se_ptr[0], &stat, 30); for(d=0; d<3; d++){ dir = 2*d + (1+pir)%2; wfm_dir = bgl_cps_dir[dir]; if( (i < wfm_numchunk[mu_nc[wfm_dir]]) && (grid_end[dir] != 1) ){ QuadMove(stat_se_ptr[dir], &stat, 30); while (1) { if (stat[stat_se[dir]] < SEND_FIFO_LEVEL) { break; } send_poll[dir]++; QuadMove(stat_se_ptr[dir], &stat, 30); } fifo = fifo_se_ptr[dir]; qdata = (BGLQuad *) wfm_send_ad[mu_se[wfm_dir]+8*i]; TORUS_SEND_SPINOR(dir, fifo, qdata); } } } // printf("send xyz 1\n"); //Send / Receive t //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; // Send t dir = 7; wfm_dir = bgl_cps_dir[dir]; { if( (i < wfm_numchunk[mu_nc[wfm_dir]]) && (grid_end[dir] != 1) ){ while (1) { QuadMove(stat_se_ptr[dir], &stat, 30); if (stat[stat_se[dir]] < SEND_FIFO_LEVEL) { break; } send_poll[dir]++; } fifo = fifo_se_ptr[dir]; qdata = (BGLQuad *) wfm_send_ad[mu_se[wfm_dir]+8*i]; MEM_SEND_SPINOR(dir, fifo, qdata); } } // Receive t dir = 6; wfm_dir = bgl_cps_dir[dir]; { if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] != 1) ){ while (1) { QuadMove(stat_re_ptr[dir], &stat, 30); if (stat[stat_re[dir]] > RECV_FIFO_LEVEL) { break; } recv_poll[dir]++; } fifo = fifo_re_ptr[dir]; qdata = (BGLQuad *) wfm_recv_ad[mu_re[wfm_dir]+8*i]; MEM_RECV_SPINOR(dir, fifo, qdata); } if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] == 1) ){ IFloat *data = wfm_recv_ad[mu_re[wfm_dir]+8*i]; for(k=0; k<12; k++){ data[k] = 0; } } } } // printf("send recv t 1\n"); // Recv x y z //---------------------------------------------------------------------- for(ic=0;ic<group;ic++){ i = group*ig + ic; QuadMove(stat_re_ptr[0], &stat, 30); for(d=0; d<3; d++){ dir = 2*d + pir; wfm_dir = bgl_cps_dir[dir]; if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] != 1) ){ QuadMove(stat_re_ptr[dir], &stat, 30); while (1) { if (stat[stat_re[dir]] > RECV_FIFO_LEVEL) { break; } recv_poll[dir]++; QuadMove(stat_re_ptr[dir], &stat, 30); } fifo = fifo_re_ptr[dir]; qdata = (BGLQuad *) wfm_recv_ad[mu_re[wfm_dir]+8*i]; TORUS_RECV_SPINOR(dir, fifo, qdata); } if(i < wfm_numchunk[mu_nc[wfm_dir]] && (grid_end[dir] == 1) ){ IFloat *data = wfm_recv_ad[mu_re[wfm_dir]+8*i]; for(k=0; k<12; k++){ data[k] = 0; } } } } // printf("recv xyz 1\n"); } }