/* Fix the order of the panels to be taken. */ void Preorder(const int pnum, const int n, const int *etree, int *cur_pan, queue_t *taskq, int *fb_cols, int *bcol, pxgstrf_shared_t *pxgstrf_shared) { register int w, dad, dad_ukids; #undef POSTORDER #ifdef POSTORDER if ( *cur_pan == EMPTY ) { *cur_pan = 0; } else { w = pxgstrf_shared->pan_status[*cur_pan].size; *cur_pan += w; } #else /* Breadth-first bottom up */ if ( *cur_pan != EMPTY ) { dad = DADPANEL (*cur_pan); dad_ukids = --pxgstrf_shared->pan_status[dad].ukids; if ( dad_ukids == 0 ) { taskq->queue[taskq->tail++] = dad; ++taskq->count; } } *cur_pan = taskq->queue[taskq->head++]; --taskq->count; #endif --pxgstrf_shared->tasks_remain; *bcol = *cur_pan; }
void pxgstrf_scheduler(const int pnum, const int n, const int *etree, int *cur_pan, int *bcol, pxgstrf_shared_t *pxgstrf_shared) { /* * -- SuperLU MT routine (version 1.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * August 15, 1997 * * Purpose * ======= * * pxgstrf_scheduler() gets a panel for the processor to work on. * It schedules a panel in decreasing order of priority: * (1) the current panel's parent, if it can be done without pipelining * (2) any other panel in the queue that can be done without pipelining * ("CANGO" status) * (3) any other panel in the queue that can be done with pipelining * ("CANPIPE" status) * * Arguments * ========= * pnum (input) int * Processor number. * * n (input) int * Column dimension of the matrix. * * etree (input) int* * Elimination tree of A'*A, size n. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to n-1; etree[root] = n. * * cur_pan (input/output) int* * On entry, the current panel just finished by this processor; * On exit, [0, n-1]: the new panel to work on; * EMPTY: failed to get any work, will try later; * n: all panels are taken; ready to terminate. * * taskq (input/output) queue_t* * Global work queue. * * fb_cols (input/output) int* * The farthest busy descendant of each (leading column of the) panel. * * bcol (output) int* * The most distant busy descendant of cur_pan in the *linear* * pipeline of busy descendants. If all proper descendants of * cur_pan are done, bcol is returned equal to cur_pan. * * Defining terms * ============== * o parent(panel) = etree(last column in the panel) * o the kids of a panel = collective kids of all columns in the panel * kids[REP] = SUM_{j in panel} ( kids[j] ) * o linear pipeline - what does it mean in the panel context? * if ukids[REP] = 0, then the panel becomes a leaf (CANGO) * if ukids[REP] = 1 && ukids[firstcol] = 1, then the panel can * be taken with pipelining (CANPIPE) * * NOTES * ===== * o When a "busy" panel finishes, if its parent has only one remaining * undone child there is no check to see if the parent should change * from "unready" to "canpipe". Thus a few potential pipelinings will * be lost, but checking out this pipeline opportunity may be costly. * */ register int dad, dad_ukids, jcol, w, j; int *fb_cols = pxgstrf_shared->fb_cols; queue_t *taskq = &pxgstrf_shared->taskq; Gstat_t *Gstat = pxgstrf_shared->Gstat; #ifdef PROFILE double t; #endif jcol = *cur_pan; if ( jcol != EMPTY ) { #ifdef DOMAINS if ( in_domain[jcol] == TREE_DOMAIN ) dad = etree[jcol]; else #endif dad = DADPANEL (jcol); } /* w_top = sp_ienv(1)/2; if ( w_top == 0 ) w_top = 1;*/ #ifdef PROFILE TIC(t); #endif #if ( MACH==SUN ) mutex_lock( &pxgstrf_shared->lu_locks[SCHED_LOCK] ); #elif ( MACH==DEC || MACH==PTHREAD ) pthread_mutex_lock( &pxgstrf_shared->lu_locks[SCHED_LOCK] ); #elif ( MACH==SGI || MACH==ORIGIN ) #pragma critical lock(pxgstrf_shared->lu_locks[SCHED_LOCK]) #elif ( MACH==CRAY_PVP ) #pragma _CRI guard SCHED_LOCK #elif ( MACH==OPENMP ) #pragma omp critical ( SCHED_LOCK ) #endif { /* ---- START CRITICAL SECTION ---- */ /* Update the status of the current panel and its parent, so that * the other processors waiting on it can proceed. * If all siblings are done, and dad is not busy, then take dad. */ if ( jcol != EMPTY ) { /* jcol was just finished by this processor */ dad_ukids = --pxgstrf_shared->pan_status[dad].ukids; #ifdef DEBUG printf("(%d) DONE %d in Scheduler(), dad %d, STATE %d, dad_ukids %d\n", pnum, jcol, dad, STATE(dad), dad_ukids); #endif if ( dad_ukids == 0 && STATE( dad ) > BUSY ) { /* dad not started */ jcol = dad; #ifdef DEBUG printf("(%d) Scheduler[1] Got dad %d, STATE %d\n", pnum, jcol, STATE(dad)); #endif #ifdef PROFILE ++(Gstat->panhows[DADPAN]); #endif } else { /* Try to get a panel from the task Q. */ while ( 1 ) { /*>>if ( (j = Dequeue(taskq, &item)) == EMPTY ) {*/ if ( taskq->count <= 0 ) { jcol = EMPTY; break; } else { jcol = taskq->queue[taskq->head++]; --taskq->count; if ( STATE( jcol ) >= CANGO ) { /* CANGO or CANPIPE */ #ifdef DEBUG printf("(%d) Dequeue[1] Got %d, STATE %d, Qcount %d\n", pnum, jcol, STATE(jcol), j); #endif #ifdef PROFILE if (STATE( jcol ) == CANGO) ++(Gstat->panhows[NOPIPE]); else ++(Gstat->panhows[PIPE]); #endif break; } } } /* while */ } } else { /* * jcol was EMPTY; Try to get a panel from the task Q. */ while ( 1 ) { /*>>if ( (j = Dequeue(taskq, &item)) == EMPTY ) {*/ if ( taskq->count <= 0 ) { jcol = EMPTY; break; } else { jcol = taskq->queue[taskq->head++]; --taskq->count; if ( STATE( jcol ) >= CANGO ) { /* CANGO or CANPIPE */ #ifdef DEBUG printf("(%d) Dequeue[2] Got %d, STATE %d, Qcount %d\n", pnum, jcol, STATE(jcol), j); #endif #ifdef PROFILE if (STATE( jcol ) == CANGO) ++(Gstat->panhows[NOPIPE]); else ++(Gstat->panhows[PIPE]); #endif break; } } } /* while */ } /* * Update the status of the new panel "jcol" and its parent "dad". */ if ( jcol != EMPTY ) { --pxgstrf_shared->tasks_remain; #ifdef DOMAINS if ( in_domain[jcol] == TREE_DOMAIN ) { /* Dequeue the first descendant of this domain */ *bcol = taskq->queue[taskq->head++]; --taskq->count; } else #endif { STATE( jcol ) = BUSY; w = pxgstrf_shared->pan_status[jcol].size; for (j = jcol; j < jcol+w; ++j) pxgstrf_shared->spin_locks[j] = 1; dad = DADPANEL (jcol); if ( dad < n && pxgstrf_shared->pan_status[dad].ukids == 1 ) { STATE( dad ) = CANPIPE; /*>> j = Enqueue(taskq, dad);*/ taskq->queue[taskq->tail++] = dad; ++taskq->count; #ifdef DEBUG printf("(%d) Enqueue() %d's dad %d ->CANPIPE, Qcount %d\n", pnum, jcol, dad, j); #endif } #ifdef PROFILE Gstat->procstat[pnum].panels++; #endif /* Find the farthest busy descendant of the new panel and its parent.*/ *bcol = fb_cols[jcol]; #ifdef DEBUG printf("(%d) Scheduler[2] fb_cols[%d]=%d, STATE %d\n", pnum, jcol, *bcol, STATE( *bcol )); #endif while ( STATE( *bcol ) == DONE ) *bcol = DADPANEL (*bcol); fb_cols[dad] = *bcol; } /* else regular_panel */ } /* if jcol != empty */ *cur_pan = jcol; #ifdef DEBUG printf("(%d) Exit C.S. tasks_remain %d, cur_pan %d\n", pnum, pxgstrf_shared->tasks_remain, jcol); #endif } /* ---- END CRITICAL SECTION ---- */ #if ( MACH==SUN ) /* Exit C.S. */ mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] ); #elif ( MACH==DEC || MACH==PTHREAD ) pthread_mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] ); #elif ( MACH==CRAY_PVP ) #pragma _CRI endguard SCHED_LOCK #endif #ifdef PROFILE Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t; #endif return; }