//-------------------------------------------------------------------------------------- // FUNCTION: iRCCE_init //-------------------------------------------------------------------------------------- // initialize the library //-------------------------------------------------------------------------------------- int iRCCE_init(void) { int i; #ifdef AIR #ifndef _OPENMP int * air_base = (int *) MallocConfigReg(FPGA_BASE + 0xE000); #endif #endif for(i=0; i<RCCE_MAXNP; i++) { iRCCE_irecv_queue[i] = NULL; } iRCCE_isend_queue = NULL; iRCCE_irecv_any_source_queue = NULL; #ifdef AIR #ifndef _OPENMP // Assign and Initialize First Set of Atomic Increment Registers for (i = 0; i < RCCE_MAXNP; i++) { iRCCE_atomic_inc_regs[i].counter = air_base + 2*i; iRCCE_atomic_inc_regs[i].init = air_base + 2*i + 1; if(RCCE_IAM == 0) *iRCCE_atomic_inc_regs[i].init = 0; } // Assign and Initialize Second Set of Atomic Increment Registers air_base = (int *) MallocConfigReg(FPGA_BASE + 0xF000); for (i = 0; i < RCCE_MAXNP; i++) { iRCCE_atomic_inc_regs[RCCE_MAXNP+i].counter = air_base + 2*i; iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = air_base + 2*i + 1; if(RCCE_IAM == 0) *iRCCE_atomic_inc_regs[RCCE_MAXNP+i].init = 0; } #endif // We need two AIRs for iRCCE_barrier(); iRCCE_atomic_alloc(&iRCCE_atomic_barrier[0]); iRCCE_atomic_alloc(&iRCCE_atomic_barrier[1]); #endif RCCE_barrier(&RCCE_COMM_WORLD); return (iRCCE_SUCCESS); }
//-------------------------------------------------------------------------------------- // FUNCTION: RCCE_init //-------------------------------------------------------------------------------------- // initialize the library and sanitize parameter list //-------------------------------------------------------------------------------------- int RCCE_init( int *argc, // pointer to argc, passed in from main program char ***argv // pointer to argv, passed in from main program ) { int i, ue, dummy_offset, loc, error; #ifdef SCC int x, y, z; unsigned int physical_lockaddress; #endif #ifdef SHMADD unsigned int RCCE_SHM_BUFFER_offset ,result, rd_slot_nbr, wr_slot_nbr; #endif void *nothing = NULL; #ifdef SCC // Copperridge specific initialization... InitAPI(0);fflush(0); #endif // save pointer to executable name for later insertion into the argument list char *executable_name = (*argv)[0]; RCCE_NP = atoi(*(++(*argv))); RC_REFCLOCKGHZ = atof(*(++(*argv))); // put the participating core ids (unsorted) into an array for (ue=0; ue<RCCE_NP; ue++) { RC_COREID[ue] = atoi(*(++(*argv))); } #ifndef SCC // if using the functional emulator, must make sure to have read all command line // parameters up to now before overwriting (shifted) first one with executable // name; even though argv is made firstprivate, that applies only the pointer to // the arguments, not the actual data #pragma omp barrier #endif // make sure executable name is as expected (*argv)[0] = executable_name; RC_MY_COREID = MYCOREID(); // adjust apparent number of command line arguments, so it will appear to main // program that number of UEs, clock frequency, and core ID list were not on // command line *argc -= RCCE_NP+2; // sort array of participating phyical core IDs to determine their ranks RCCE_qsort((char *)RC_COREID, RCCE_NP, sizeof(int), id_compare); // determine rank of calling core for (ue=0; ue<RCCE_NP; ue++) { if (RC_COREID[ue] == RC_MY_COREID) RCCE_IAM = ue; } #ifdef SHMADD // printf("Using SHMADD\n"); RCCE_SHM_BUFFER_offset = 0x00; // RCCE_SHM_BUFFER_offset = 0x3FFFF80; // RCCE_SHM_BUFFER_offset = 0x4000000; // RCCE_SHM_BUFFER_offset = 0x181000; rd_slot_nbr=0x80; for(i=0; i<60; i++) { result = readLUT(rd_slot_nbr); result -= 1; wr_slot_nbr = rd_slot_nbr + 4; writeLUT(wr_slot_nbr,result); rd_slot_nbr++; } #endif // leave in one reassuring debug print if (DEBUG){ printf("My rank is %d, physical core ID is %d\n", RCCE_IAM, RC_MY_COREID); fflush(0); } if (RCCE_IAM<0) return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_CORE_NOT_IN_HOSTFILE)); #ifdef SCC // compute and memory map addresses of test&set registers for all participating cores for (ue=0; ue<RCCE_NP; ue++) { z = Z_PID(RC_COREID[ue]); x = X_PID(RC_COREID[ue]); y = Y_PID(RC_COREID[ue]); physical_lockaddress = CRB_ADDR(x,y) + (z==0 ? LOCK0 : LOCK1); virtual_lockaddress[ue] = (t_vcharp) MallocConfigReg(physical_lockaddress); } #endif // initialize MPB starting addresses for all participating cores; allow one // dummy cache line at front of MPB for fooling write combine buffer in case // of single-byte MPB access RCCE_fool_write_combine_buffer = RC_COMM_BUFFER_START(RCCE_IAM); for (ue=0; ue<RCCE_NP; ue++) RCCE_comm_buffer[ue] = RC_COMM_BUFFER_START(ue) + RCCE_LINE_SIZE; // gross MPB size is set equal to maximum RCCE_BUFF_SIZE = RCCE_BUFF_SIZE_MAX - RCCE_LINE_SIZE; #ifdef RC_POWER_MANAGEMENT #ifndef SCC // always store RPC queue data structure at beginning of usable MPB, so allocatable // storage needs to skip it. Only need to do this for functional emulator for (ue=0; ue<RCCE_NP; ue++) RCCE_comm_buffer[ue] += REGULATOR_LENGTH; RCCE_BUFF_SIZE -= REGULATOR_LENGTH; #endif #endif // initialize RCCE_malloc RCCE_malloc_init(RCCE_comm_buffer[RCCE_IAM],RCCE_BUFF_SIZE); #ifdef SHMADD RCCE_shmalloc_init(RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset ,RCCE_SHM_SIZE_MAX); #ifdef SHMDBG printf("\n%d:%s:%d: RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX: % x %x\n", RCCE_IAM, __FILE__,__LINE__,RCCE_SHM_BUFFER_offset ,RCCE_SHM_SIZE_MAX); #endif #else RCCE_shmalloc_init(RC_SHM_BUFFER_START(),RCCE_SHM_SIZE_MAX); #endif // initialize the (global) flag bookkeeping data structure for (loc=0; loc<RCCE_FLAGS_PER_LINE; loc++) RCCE_flags.flag[loc] = (char)((unsigned int)0); RCCE_flags.line_address = NULL; RCCE_flags.members=0; RCCE_flags.next=NULL; // create global communicator (equivalent of MPI_COMM_WORLD); this will also allocate // the two synchronization flags associated with the global barrier RCCE_comm_split(RCCE_global_color, nothing, &RCCE_COMM_WORLD); // if power management is enabled, initialize more stuff; this includes two more // communicators (for voltage and frequency domains), plus two synchronization flags // associated with the barrier for each communicator #ifdef RC_POWER_MANAGEMENT if (error=RCCE_init_RPC(RC_COREID, RCCE_IAM, RCCE_NP)) return(RCCE_error_return(RCCE_debug_RPC,error)); #endif #ifndef GORY // if we use the simplified API, we need to define more flags upfront for (ue=0; ue<RCCE_NP; ue++) { if (error=RCCE_flag_alloc(&RCCE_sent_flag[ue])) return(RCCE_error_return(RCCE_debug_synch,error)); if (error=RCCE_flag_alloc(&RCCE_ready_flag[ue])) return(RCCE_error_return(RCCE_debug_synch,error)); } #endif return (RCCE_SUCCESS); }