int main( void ) { int i,j; int value; register int temp; double a,b,c,d; a = 1.;b=2.;c=3.; d = __fmadd(a,b,c); a = __fmadd(c,b,d); d = __fmadd(a,b,c); __lwsync(); __nop(); __lwsync(); __nop(); d = __fctidz(a); d = __fctidz(d); b = __fctid(a); b = __fctid(b); c = __fcfid(a); c = __fcfid(c); __dcbzl(32,&value); __dcbzl(&value,0); return a+b+c+d; }
void a_stat(track_t *track, unsigned int tracked, struct timespec *delay) { int i, e; struct timespec before,after; /* Get rid of the read & write file descriptors */ clock_gettime(CLOCK_REALTIME,&before); warn_t(&before,"STARTING A_STAT\n"); for (i=0; i<tracked; i++) { close(track[i].fd_w); close(track[i].fd_r); } for ( ; ; nanosleep(delay, NULL)) { for (i=0; i<tracked; i++) { pthread_mutex_lock(&(track[i].s_stat.m)); __lwsync(); track[i].s_stat.ready=0; /* clock_gettime(CLOCK_REALTIME,&before); warn_t(&before,"POLL STAT\n"); */ clock_gettime(CLOCK_REALTIME,&before); e=stat(track[i].fn, &(track[i].s_stat.s)); clock_gettime(CLOCK_REALTIME,&after); track[i].s_stat.t=after; if (e>=0) track[i].s_stat.ready=1; else warn_t(&after, "stat(\"%s\",..) failed: %s, error code %d\n", track[i].fn, strerror(errno), errno); if (track[i].s_stat.ready && ((after.tv_sec - before.tv_sec)*1000000000L + after.tv_nsec - before.tv_nsec > LONGIO)) warn_t(&after,"stat(\"%s\",..) long I/O > " QUOTE(LONGIO) "nsec\n", &after, track[i].fn); __lwsync(); pthread_mutex_unlock(&(track[i].s_stat.m)); /*track[i].s_stat.ready=1;*/ __lwsync(); } } }
inline int32_t atomic_decrement( int32_t * pw ) { // return --*pw; int32_t originalValue; __lwsync(); originalValue = fetch_and_add( pw, -1 ); __isync(); return (originalValue - 1); }
inline int atomic_conditional_increment( int *pw ) { // if( *pw != 0 ) ++*pw; // return *pw; __lwsync(); int v = *const_cast<volatile int*>(pw); for (;;) // loop until state is known { if (v == 0) return 0; if (__compare_and_swap(pw, &v, v + 1)) { __isync(); return (v + 1); } } }
static void __attribute__((noinline)) __sm_init(void) { char* tmp; size_t total_size; size_t rank_divider; size_t pagesize = (size_t)getpagesize(); int do_init; //Whether to do initialization //Set up a temporary area on the stack for malloc() calls during our // initialization process. uint64_t* temp_space = alloca(TEMP_SIZE); sm_mspace = create_mspace_with_base(temp_space, TEMP_SIZE, 0); //Keep this for use with valgrind. //sm_mspace = create_mspace_with_base(sm_temp, TEMP_SIZE, 0); //sm_region->limit = (intptr_t)sm_region + TEMP_SIZE; //Query environment variables to figure out how much size is available. //The value of SM_SIZE is always expected to be megabytes. tmp = getenv("SM_SIZE"); if(tmp == NULL) { //On BGQ, the size var MUST be set. //If it is not, there probably is only enough shared memory for the // system reservation. Can't assume there's usable SM, so abort. #ifdef __bg__ ERROR("SM_SIZE env var not set (make sure BG_SHAREDMEMSIZE is set too"); #else total_size = DEFAULT_TOTAL_SIZE; #endif } else { total_size = atol(tmp) * 1024L * 1024L; } //SM_RANKS and DEFAULT_RANK_DIVIDER indicate how many regions to break the //SM region into -- one region per rank/process. tmp = getenv("SM_RANKS"); //if (tmp == NULL){ #ifdef __MIC__ tmp = getenv("MIC_PPN"); #endif //} if(tmp == NULL) { rank_divider = DEFAULT_RANK_DIVIDER; } else { rank_divider = atol(tmp); } //offset is the size taken by sm_region at the beginning of the space. size_t offset = ((sizeof(struct sm_region) / pagesize) + 1) * pagesize; #ifdef USE_PROC_MAPS void* map_addr = find_map_address(total_size + offset); #else void* map_addr = NULL; #endif printf("map addr : %lu \n", map_addr); //Set up the SM region using one of mmap/sysv/pshm do_init = __sm_init_region(map_addr, total_size + offset); //Only the process creating the file should initialize. if(do_init) { //Only the initializing process registers the shutdown handler. atexit(__sm_destroy); sm_region->limit = (intptr_t)sm_region + total_size + offset; #ifdef __bg__ //Ensure everything above is set before brk below: // setting brk is the synchronization signal. __lwsync(); #endif sm_region->brk = (intptr_t)sm_region + offset; } else { //Wait for another process to finish initialization. void* volatile * brk_ptr = (void**)&sm_region->brk; while(*brk_ptr == NULL); //Ensure none of the following loads occur during/before the spin loop. #ifdef __bg__ __lwsync(); #endif } //Create my own mspace. size_t local_size = total_size / rank_divider; printf("params: map_addr : %lu sm_region: %lu brk : %lu limit : %lu total_size : %ld offset : %lu ranks : %ld local_size : %ld \n", map_addr, sm_region, sm_region->brk ,sm_region->limit, total_size, offset, rank_divider, local_size); //Check that this process' region is mapped to the same address as the //process that initialized the region. if(sm_region->limit != (intptr_t)sm_region + total_size + offset) { printf("ERROR !!! ==> params: map_addr : %lu sm_region: %lu brk : %lu limit : %lu total_size : %ld offset : %lu ranks : %ld local_size : %ld \n", map_addr, sm_region, sm_region->brk ,sm_region->limit, total_size, offset, rank_divider, local_size); ERROR("sm_region limit %lx doesn't match computed limit %lx", sm_region->limit, (intptr_t)sm_region + total_size + offset); } sm_lower = sm_region; sm_upper = (void*)sm_region->limit; //void* base = sm_morecore(local_size); void* base = (void*)__sync_fetch_and_add(&sm_region->brk, local_size); if(base < sm_lower || base >= sm_upper) { printf("ERROR !!! ==> params:sm_lower : %lu sm_upper : %lu map_addr : %lu sm_region: %lu brk : %lu limit : %lu total_size : %ld offset : %lu ranks : %ld local_size : %ld \n", sm_lower, sm_upper ,map_addr, sm_region, sm_region->brk ,sm_region->limit, total_size, offset, rank_divider, local_size); ERROR("Got local base %p outside of range %p -> %p",base, sm_lower, sm_upper); } //Clearing the memory seems to avoid some bugs and // forces out subtle OOM issues here instead of later. //memset(base, 0, local_size); //WARNING("%d sm_region %p base %p total_size %lx local_size %lx\n", // getpid(), sm_region, base, total_size, local_size); //Careful to subtract off space for the local data. sm_mspace = create_mspace_with_base(base, local_size, 1); }