/* * Executes all tasks which are in the r-queue at the moment of the * call. This routine is called to make sure that all tasks in the * queue are dequeued before continueing with other tasks. */ void PMR_process_r_queue(int tid, proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, workQ_t *workQ, counter_t *num_left, double *work, int *iwork) { int thread_support = procinfo->thread_support; int t, num_tasks; int status; task_t *task; num_tasks = PMR_get_num_tasks(workQ->r_queue); for (t=0; t<num_tasks; t++) { task = PMR_remove_task_at_front(workQ->r_queue); if ( task != NULL ) { if (task->flag == CLUSTER_TASK_FLAG) { if (thread_support != MPI_THREAD_FUNNELED || tid == 0) { /* if MPI_THREAD_FUNNELED only tid==0 should process * these tasks, otherwise any thread can do it */ status = PMR_process_c_task((cluster_t *) task->data, tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); if (status == C_TASK_PROCESSED) { free(task); } else { PMR_insert_task_at_back(workQ->r_queue, task); } } else { PMR_insert_task_at_back(workQ->r_queue, task); } } /* end if cluster task */ if (task->flag == REFINE_TASK_FLAG) { PMR_process_r_task((refine_t *) task->data, procinfo, Wstruct, tolstruct, work, iwork); free(task); } } /* end if task removed */ } /* end for t */ } /* end process_entire_r_queue */
int PMR_process_r_queue(int tid, workQ_t *workQ, val_t *Wstruct, tol_t *tolstruct, double *work, int *iwork) { task_t *task; int info; while ((task = PMR_remove_task_at_front(workQ->r_queue)) != NULL) { assert(task->flag == REFINEMENT_TASK_FLAG); info = PMR_process_r_task((refine_t *) task->data, tid, Wstruct, tolstruct, work, iwork); assert(info == 0); free(task); } return(0); }
/* * Refine eigenvalues with respect to new rrr */ static inline int refine_eigvals (cluster_t *cl, int rf_begin, int rf_end, int tid, proc_t *procinfo, rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, counter_t *num_left, workQ_t *workQ, double *work, int *iwork) { int rf_size = rf_end-rf_begin+1; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; double *restrict D = RRR->D; double *restrict L = RRR->L; double *restrict DLL = RRR->DLL; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; double *restrict Wshifted = Wstruct->Wshifted; double pivmin = tolstruct->pivmin; double rtol1 = tolstruct->rtol1; double rtol2 = tolstruct->rtol2; /* Determine if refinement should be split into tasks */ int left = PMR_get_counter_value(num_left); int nz = Zstruct->nz; int nthreads = procinfo->nthreads; int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads)); int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK); int offset, i, p, q; double savegap; task_t *task; if (own_part < rf_size) { int others_part = rf_size - own_part; int num_tasks = iceil(rf_size, own_part) - 1; /* >1 */ int chunk = others_part/num_tasks; /* floor */ int ts_begin=rf_begin, ts_end; p = Windex[rf_begin]; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; q = p + chunk - 1; task = PMR_create_r_task (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid); if (ts_begin <= ts_end) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_refine_sem_post(task->data); /* case chunk=0 */ ts_begin = ts_end + 1; p = q + 1; } ts_end = rf_end; q = Windex[rf_end]; offset = Windex[ts_begin] - 1; /* Call bisection routine to refine the values */ if (ts_begin <= ts_end) { int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); } /* Empty "all" r-queue refine tasks before waiting */ int num_iter = PMR_get_num_tasks(workQ->r_queue); for (i=0; i<num_iter; i++) { task = PMR_remove_task_at_front(workQ->r_queue); if (task != NULL) { if (task->flag == REFINE_TASK_FLAG) { PMR_process_r_task ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork); free(task); } else { PMR_insert_task_at_back(workQ->r_queue, task); } } /* if task */ } /* end for i */ /* Barrier: wait until all created tasks finished */ int count = num_tasks; while (count > 0) { while ( PMR_refine_sem_wait(task->data) != 0 ) { }; count--; } PMR_refine_sem_destroy(task->data); /* Edit right gap at splitting point */ ts_begin = rf_begin; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1] - Wshifted[ts_end] - Werr[ts_end]); ts_begin = ts_end + 1; } } else { /* Refinement of cluster without creating tasks */ /* 'p' and 'q' are local (within block) indices of * the first/last eigenvalue of the cluster */ p = Windex[rf_begin]; q = Windex[rf_end]; offset = Windex[rf_begin] - 1; /* = p - 1 */ if (p == q) { savegap = Wgap[rf_begin]; Wgap[rf_begin] = 0.0; } /* Bisection routine to refine the values */ int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); if (p == q) Wgap[rf_begin] = savegap; } /* end refine with or without creating tasks */ /* refined eigenvalues with all shifts applied in W */ double sigma = L[bl_size-1]; for (i=rf_begin; i<=rf_end; i++) W[i] = Wshifted[i] + sigma; return 0; } /* end refine_eigvals */
/* * Processes all the tasks put in the work queue. */ static void *empty_workQ(void *argin) { /* input arguments */ int tid; int nthreads; counter_t *num_left; workQ_t *workQ; in_t *Dstruct; val_t *Wstruct; vec_t *Zstruct; tol_t *tolstruct; int n; /* others */ task_t *task; double *work; int *iwork; /* retrieve necessary arguments from structures */ retrieve_aux3((aux3_t *) argin, &tid, &nthreads, &num_left, &workQ, &Dstruct, &Wstruct, &Zstruct, &tolstruct); n = Wstruct->n; /* max. needed double precision work space: dlar1v */ work = (double *) malloc( 4*n * sizeof(double) ); assert(work != NULL); /* max. needed double precision work space: dlarrb */ iwork = (int *) malloc( 2*n * sizeof(int) ); assert(iwork != NULL); /* While loop to empty the work queue */ while (PMR_get_counter_value(num_left) > 0) { task = PMR_remove_task_at_front(workQ->r_queue); if (task != NULL) { assert(task->flag == REFINEMENT_TASK_FLAG); PMR_process_r_task((refine_t *) task->data, tid, Wstruct, tolstruct, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->s_queue); if ( task != NULL ) { assert(task->flag == SINGLETON_TASK_FLAG); PMR_process_s_task((singleton_t *) task->data, tid, num_left, workQ, Wstruct, Zstruct, tolstruct, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->c_queue); if ( task != NULL ) { assert(task->flag == CLUSTER_TASK_FLAG); PMR_process_c_task((cluster_t *) task->data, tid, nthreads, num_left, workQ, Wstruct, Zstruct, tolstruct, work, iwork); free(task); continue; } } /* end while */ free(work); free(iwork); return(NULL); }