CAMLprim value NAME(value vCMP, value vN, value vOFSX, value vINCX, value vX) { CAMLparam2(vCMP, vX); #if defined(OCAML_SORT_CALLBACK) CAMLlocal2(va, vb); #endif const size_t GET_INT(N); int GET_INT(INCX); VEC_PARAMS(X); NUMBER *const base_ptr = X_data; const size_t max_thresh = MAX_THRESH * sizeof(NUMBER) * INCX; if (N == 0) CAMLreturn(Val_unit); #ifndef OCAML_SORT_CALLBACK caml_enter_blocking_section(); /* Allow other threads */ #endif #define QUICKSORT_LT(a, b) OCAML_SORT_LT((*a), (*b)) QUICKSORT(NUMBER, base_ptr, INCX, max_thresh); #undef QUICKSORT_LT #ifndef OCAML_SORT_CALLBACK caml_leave_blocking_section(); /* Disallow other threads */ #endif CAMLreturn(Val_unit); }
CAMLprim value NAME_PERM(value vCMP, value vN, value vOFSP, value vINCP, value vP, value vOFSX, value vINCX, value vX) { CAMLparam3(vCMP, vP, vX); #if defined(OCAML_SORT_CALLBACK) CAMLlocal2(va, vb); #endif const size_t GET_INT(N); int GET_INT(INCX), GET_INT(INCP); VEC_PARAMS(X); intnat OFSX = Long_val(vOFSX); intnat *P_data = ((intnat *) Caml_ba_data_val(vP)) + (Long_val(vOFSP) - 1); size_t i; NUMBER *const X = X_data - OFSX; /* so P values are FORTRAN indices */ intnat *const base_ptr = P_data; const size_t max_thresh = MAX_THRESH * sizeof(intnat) * INCP; if (N == 0) CAMLreturn(Val_unit); #ifndef OCAML_SORT_CALLBACK caml_enter_blocking_section(); /* Allow other threads */ #endif /* Initialize the permutation to the "identity". */ for(i = 0; i < N; i += 1) P_data[i * INCP] = OFSX + i * INCX; #define QUICKSORT_LT(a, b) OCAML_SORT_LT((X[*a]), (X[*b])) QUICKSORT(intnat, base_ptr, INCP, max_thresh); #undef QUICKSORT_LT #ifndef OCAML_SORT_CALLBACK caml_leave_blocking_section(); /* Disallow other threads */ #endif CAMLreturn(Val_unit); }
static Error ExProcessTaskGroup (int sync) { Error ret = ERROR; EXTaskGroup tg; EXTask task; int i, j; int todo; #if 0 volatile int *count; /* task group counter */ #endif int totalTodo; #define NUM_TASKS_ALLOCED 256 Pointer _args[NUM_TASKS_ALLOCED]; PFI _funcs[NUM_TASKS_ALLOCED]; int _repeats[NUM_TASKS_ALLOCED]; Pointer *args = _args; PFI *funcs = _funcs; int *repeats = _repeats; int status; WorkIndex _ilist[NUM_TASKS_ALLOCED]; WorkIndex *ilist = _ilist; ErrorCode ecode; char *emsg; EXTask myTask = NULL; int myIter = 0; if (EMPTY) return (OK); POP (tg); if (tg->nused == 0) { ExDestroyTaskGroup (tg); return (OK); } DXMarkTime ("start parallel"); /* * Remember whether or not this is a syncronous task group. */ ecode = DXGetError (); emsg = DXGetErrorMessage (); if (ecode != ERROR_NONE || *emsg != '\0') { if (ecode != ERROR_NONE) DXWarning ("#4840"); else DXWarning ("#4850"); tg->error = ecode; tg->emsg = _dxf_ExCopyString (emsg); } tg->sync = sync; todo = tg->nused; status = get_status (); /* * Only bother to sort if the tasks actually have different cost * estimates associated with them. */ if (todo > NUM_TASKS_ALLOCED) { ilist = (WorkIndex *) DXAllocateLocal (todo * sizeof (WorkIndex)); if (ilist == NULL) goto error; } task = tg->tasks; for (i = 0; i < todo; ++i) { ilist[i].task = task + i; ilist[i].work = task[i].work; _dxfCopyContext(&(task[i].taskContext), _dxd_exContext); } if (tg->minwork != tg->maxwork) QUICKSORT (ilist, todo); #ifdef TASK_TIME DXMarkTimeLocal ("finish sort"); #endif /* * Schedule/Execute the tasks appropriately. */ if (todo > NUM_TASKS_ALLOCED) { funcs = (PFI *) DXAllocateLocal (todo * sizeof (PFI )); args = (Pointer *) DXAllocateLocal (todo * sizeof (Pointer)); repeats = (int *) DXAllocateLocal (todo * sizeof (int )); if (funcs == NULL || args == NULL || repeats == NULL) goto error; } /* Save a task for the executer to execute */ i = 0; if (sync) { myTask = ilist[i].task; myIter = ilist[i].task->repeat - 1; ilist[i].task->repeat--; if (ilist[i].task->repeat == 0) { i = 1; } } totalTodo = 1; for (j = 0; i < todo; j++, i++) { funcs[j] = ExProcessTask; args[j] = (Pointer) ilist[i].task; totalTodo += (repeats[j] = ilist[i].task->repeat); } tg->ntodo = totalTodo; if (ilist[0].task->repeat == 0) { --todo; } #ifdef TASK_TIME DXMarkTimeLocal ("queue all tasks"); #endif _dxf_ExRQEnqueueMany (todo, funcs, args, repeats, (long) tg, 0, FALSE); #ifdef TASK_TIME DXMarkTimeLocal ("queued all tasks"); #endif if (funcs != _funcs) DXFree ((Pointer)funcs); if (args != _args) DXFree ((Pointer)args); if (repeats != _repeats) DXFree ((Pointer)repeats); if (ilist != _ilist) DXFree ((Pointer)ilist); if (! sync) { ret = OK; } else { int knt; /* * This processor is now restricted to processing tasks in this * task group. Once it can no longer get a job in this task group * from the run queue then just spin and wait for all of the outstanding * tasks in the group to complete. */ #ifdef TASK_TIME DXMarkTimeLocal ("tasks enqueued"); #endif /* Do the task that I saved above as myTask */ if (myTask != NULL) ExProcessTask (myTask, myIter); #if 0 before the changes made to fix bugs found when debugging SMP linux -- gda count = &tg->ntodo; while (*count > 0) { if (! _dxf_ExRQDequeue ((long) tg)) break; } DXMarkTimeLocal ("waiting"); set_status (PS_JOINWAIT); /* Every 100 times of checking count, try to see if anyone added * on to the queue. */ while (*count > 0) { _dxf_ExRQDequeue ((long)tg); for (i = 0; *count && i < 100; ++i) ; } #else do { DXlock(&tg->lock, 0); knt = tg->ntodo; DXunlock(&tg->lock, 0); _dxf_ExRQDequeue ((long) tg); } while(knt); #endif DXMarkTimeLocal ("joining"); set_status (status); ret = (tg->error == ERROR_NONE) ? OK : ERROR; if (ret != OK) DXSetError (tg->error, tg->emsg? tg->emsg: "#8360"); ExDestroyTaskGroup (tg); } DXMarkTime ("end parallel"); return (ret); error: if (funcs != _funcs) DXFree ((Pointer) funcs); if (args != _args) DXFree ((Pointer) args); if (repeats != _repeats) DXFree ((Pointer) repeats); if (ilist != _ilist) DXFree ((Pointer) ilist); return (ret); }