/* * Processes all the tasks put in the work queue. */ static void *empty_workQ(void *argin) { int tid; proc_t *procinfo; val_t *Wstruct; vec_t *Zstruct; tol_t *tolstruct; workQ_t *workQ; counter_t *num_left; retrieve_auxarg3 ((auxarg3_t*)argin, &tid, &procinfo, &Wstruct, &Zstruct, &tolstruct, &workQ, &num_left); int n = Wstruct->n; /* max. needed double precision work space: odr1v */ double *work = (double*)malloc(4*n*sizeof(double)); assert(work != NULL); /* max. needed double precision work space: odrrb */ int *iwork = (int*)malloc(2*n*sizeof(int)); assert(iwork != NULL); /* while loop to empty the work queue */ while (PMR_get_counter_value(num_left) > 0) { /* empty r-queue before processing other tasks */ PMR_process_r_queue (tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); task_t *task = PMR_remove_task_at_front(workQ->s_queue); if ( task != NULL ) { assert(task->flag == SINGLETON_TASK_FLAG); PMR_process_s_task ((singleton_t*)task->data, tid, procinfo, Wstruct, Zstruct, tolstruct, num_left, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->c_queue); if ( task != NULL ) { assert(task->flag == CLUSTER_TASK_FLAG); PMR_process_c_task ((cluster_t*)task->data, tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); free(task); continue; } } /* end while */ free(work); free(iwork); return NULL; }
/* TODO: Refactor this routine */ static inline int create_subtasks (cluster_t *cl, int tid, proc_t *procinfo, rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, workQ_t *workQ, counter_t *num_left) { /* From inputs */ int cl_begin = cl->begin; int cl_end = cl->end; int depth = cl->depth; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; double lgap; int pid = procinfo->pid; int nproc = procinfo->nproc; int nthreads = procinfo->nthreads; bool proc_involved=true; double *restrict Wgap = Wstruct->Wgap; double *restrict Wshifted = Wstruct->Wshifted; int *restrict iproc = Wstruct->iproc; int ldz = Zstruct->ldz; double *restrict Z = Zstruct->Z; int *restrict Zindex = Zstruct->Zindex; /* others */ int i, l, k; int max_size; task_t *task; bool task_inserted; int new_first, new_last, new_size, new_ftt1, new_ftt2; int sn_first, sn_last, sn_size; rrr_t *RRR_parent; int new_lpid, new_rpid; double *restrict D_parent; double *restrict L_parent; int my_first, my_last; bool copy_parent_rrr; max_size = fmax(1, PMR_get_counter_value(num_left) / (fmin(depth+1,4)*nthreads) ); task_inserted = true; new_first = cl_begin; for (i=cl_begin; i<=cl_end; i++) { if ( i == cl_end ) new_last = i; else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) ) new_last = i; else continue; new_size = new_last - new_first + 1; if (new_size == 1) { /* singleton was found */ if (new_first==cl_begin || task_inserted==true) { /* initialize new singleton task */ sn_first = new_first; sn_last = new_first; sn_size = 1; } else { /* extend singleton task by one */ sn_last++; sn_size++; } /* insert task if ... */ if (i==cl_end || sn_size>=max_size || Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { /* Check if process involved in s-task */ proc_involved = false; for (k=sn_first; k<=sn_last; k++) { if (iproc[k] == pid) { proc_involved = true; break; } } if (proc_involved == false) { task_inserted = true; new_first = i + 1; continue; } /* Insert task as process is involved */ if (sn_first == cl_begin) { lgap = cl->lgap; } else { lgap = Wgap[sn_first-1]; } PMR_increment_rrr_dependencies(RRR); task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin, bl_end, bl_spdiam, lgap, RRR); PMR_insert_task_at_back(workQ->s_queue, task); task_inserted = true; } else { task_inserted = false; } } else { /* cluster was found */ /* check if process involved in processing the new cluster */ new_lpid = nproc-1; new_rpid = -1; for (l=new_first; l<=new_last; l++) { if (iproc[l] != -1) { new_lpid = imin(new_lpid, iproc[l]); new_rpid = imax(new_rpid, iproc[l]); } } if (new_lpid > pid || new_rpid < pid) { task_inserted = true; new_first = i + 1; continue; } /* find gap to the left */ if (new_first == cl_begin) { lgap = cl->lgap; } else { lgap = Wgap[new_first - 1]; } /* determine where to store the parent rrr needed by the * cluster to find its new rrr */ my_first = imax(new_first, cl->proc_W_begin); my_last = imin(new_last, cl->proc_W_end); if ( my_first == my_last ) { /* only one eigenvalue of cluster belongs to process */ copy_parent_rrr = true; } else { /* store parent rrr in Z at column new_ftt */ copy_parent_rrr = false; } new_ftt1 = Zindex[my_first ]; new_ftt2 = Zindex[my_first + 1]; if (copy_parent_rrr == true) { /* Copy parent RRR into alloceted arrays and mark them * for freeing later */ D_parent = (double *) malloc(bl_size * sizeof(double)); assert(D_parent != NULL); L_parent = (double *) malloc(bl_size * sizeof(double)); assert(L_parent != NULL); memcpy(D_parent, RRR->D, bl_size*sizeof(double)); memcpy(L_parent, RRR->L, bl_size*sizeof(double)); RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, NULL, bl_size, depth); PMR_set_copied_parent_rrr_flag(RRR_parent, true); } else { /* copy parent RRR into Z to make cluster task independent */ memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, bl_size*sizeof(double)); memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, bl_size*sizeof(double)); RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin], &Z[new_ftt2*ldz + bl_begin], NULL, NULL, bl_size, depth); } /* Create the task for the cluster and put it in the queue */ task = PMR_create_c_task(new_first, new_last, depth+1, bl_begin, bl_end, bl_spdiam, lgap, cl->proc_W_begin, cl->proc_W_end, new_lpid, new_rpid, RRR_parent); if (new_lpid != new_rpid) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_insert_task_at_back(workQ->c_queue, task); task_inserted = true; } /* if singleton or cluster found */ new_first = i + 1; } /* end i */ /* set flag in RRR that last singleton is created */ PMR_set_parent_processed_flag(RRR); /* clean up */ PMR_try_destroy_rrr(RRR); free(cl); return 0; } /* end create_subtasks */
/* * Refine eigenvalues with respect to new rrr */ static inline int refine_eigvals (cluster_t *cl, int rf_begin, int rf_end, int tid, proc_t *procinfo, rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, counter_t *num_left, workQ_t *workQ, double *work, int *iwork) { int rf_size = rf_end-rf_begin+1; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; double *restrict D = RRR->D; double *restrict L = RRR->L; double *restrict DLL = RRR->DLL; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; double *restrict Wshifted = Wstruct->Wshifted; double pivmin = tolstruct->pivmin; double rtol1 = tolstruct->rtol1; double rtol2 = tolstruct->rtol2; /* Determine if refinement should be split into tasks */ int left = PMR_get_counter_value(num_left); int nz = Zstruct->nz; int nthreads = procinfo->nthreads; int MIN_REFINE_CHUNK = fmax(2,nz/(4*nthreads)); int own_part = (int)fmax(ceil((double)left/nthreads),MIN_REFINE_CHUNK); int offset, i, p, q; double savegap; task_t *task; if (own_part < rf_size) { int others_part = rf_size - own_part; int num_tasks = iceil(rf_size, own_part) - 1; /* >1 */ int chunk = others_part/num_tasks; /* floor */ int ts_begin=rf_begin, ts_end; p = Windex[rf_begin]; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; q = p + chunk - 1; task = PMR_create_r_task (ts_begin, ts_end, D, DLL, p, q, bl_size, bl_spdiam, tid); if (ts_begin <= ts_end) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_refine_sem_post(task->data); /* case chunk=0 */ ts_begin = ts_end + 1; p = q + 1; } ts_end = rf_end; q = Windex[rf_end]; offset = Windex[ts_begin] - 1; /* Call bisection routine to refine the values */ if (ts_begin <= ts_end) { int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[ts_begin], &Wgap[ts_begin], &Werr[ts_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); } /* Empty "all" r-queue refine tasks before waiting */ int num_iter = PMR_get_num_tasks(workQ->r_queue); for (i=0; i<num_iter; i++) { task = PMR_remove_task_at_front(workQ->r_queue); if (task != NULL) { if (task->flag == REFINE_TASK_FLAG) { PMR_process_r_task ((refine_t*)task->data, procinfo, Wstruct, tolstruct, work, iwork); free(task); } else { PMR_insert_task_at_back(workQ->r_queue, task); } } /* if task */ } /* end for i */ /* Barrier: wait until all created tasks finished */ int count = num_tasks; while (count > 0) { while ( PMR_refine_sem_wait(task->data) != 0 ) { }; count--; } PMR_refine_sem_destroy(task->data); /* Edit right gap at splitting point */ ts_begin = rf_begin; for (i=0; i<num_tasks; i++) { ts_end = ts_begin + chunk - 1; Wgap[ts_end] = fmax(0.0, Wshifted[ts_end + 1] - Werr[ts_end + 1] - Wshifted[ts_end] - Werr[ts_end]); ts_begin = ts_end + 1; } } else { /* Refinement of cluster without creating tasks */ /* 'p' and 'q' are local (within block) indices of * the first/last eigenvalue of the cluster */ p = Windex[rf_begin]; q = Windex[rf_end]; offset = Windex[rf_begin] - 1; /* = p - 1 */ if (p == q) { savegap = Wgap[rf_begin]; Wgap[rf_begin] = 0.0; } /* Bisection routine to refine the values */ int info; odrrb (&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); if (p == q) Wgap[rf_begin] = savegap; } /* end refine with or without creating tasks */ /* refined eigenvalues with all shifts applied in W */ double sigma = L[bl_size-1]; for (i=rf_begin; i<=rf_end; i++) W[i] = Wshifted[i] + sigma; return 0; } /* end refine_eigvals */
/* * Processes all the tasks put in the work queue. */ static void *empty_workQ(void *argin) { /* input arguments */ int tid; int nthreads; counter_t *num_left; workQ_t *workQ; in_t *Dstruct; val_t *Wstruct; vec_t *Zstruct; tol_t *tolstruct; int n; /* others */ task_t *task; double *work; int *iwork; /* retrieve necessary arguments from structures */ retrieve_aux3((aux3_t *) argin, &tid, &nthreads, &num_left, &workQ, &Dstruct, &Wstruct, &Zstruct, &tolstruct); n = Wstruct->n; /* max. needed double precision work space: dlar1v */ work = (double *) malloc( 4*n * sizeof(double) ); assert(work != NULL); /* max. needed double precision work space: dlarrb */ iwork = (int *) malloc( 2*n * sizeof(int) ); assert(iwork != NULL); /* While loop to empty the work queue */ while (PMR_get_counter_value(num_left) > 0) { task = PMR_remove_task_at_front(workQ->r_queue); if (task != NULL) { assert(task->flag == REFINEMENT_TASK_FLAG); PMR_process_r_task((refine_t *) task->data, tid, Wstruct, tolstruct, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->s_queue); if ( task != NULL ) { assert(task->flag == SINGLETON_TASK_FLAG); PMR_process_s_task((singleton_t *) task->data, tid, num_left, workQ, Wstruct, Zstruct, tolstruct, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->c_queue); if ( task != NULL ) { assert(task->flag == CLUSTER_TASK_FLAG); PMR_process_c_task((cluster_t *) task->data, tid, nthreads, num_left, workQ, Wstruct, Zstruct, tolstruct, work, iwork); free(task); continue; } } /* end while */ free(work); free(iwork); return(NULL); }