/* ** _pullIterate ** ** (documentation) ** ** NB: this implements the body of thread 0, the master thread */ int _pullIterate(pullContext *pctx) { char me[]="_pullIterate", err[BIFF_STRLEN]; double time0; int myError; unsigned int thi; if (!pctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(PULL, err); return 1; } if (pctx->verbose) { fprintf(stderr, "%s: start iter %d w/ %u threads; energy = %g\n", me, pctx->iter, pctx->threadNum, _pullEnergyTotal(pctx)); } time0 = airTime(); /* the _pullWorker checks finished after iterBarrierA */ pctx->finished = AIR_FALSE; /* initialize index of next bin to be doled out to threads */ pctx->binNextIdx=0; if (pctx->threadNum > 1) { airThreadBarrierWait(pctx->iterBarrierA); } myError = AIR_FALSE; if (_pullProcess(pctx->task[0])) { sprintf(err, "%s: master thread trouble w/ iter %u", me, pctx->iter); biffAdd(PULL, err); pctx->finished = AIR_TRUE; myError = AIR_TRUE; } if (pctx->threadNum > 1) { airThreadBarrierWait(pctx->iterBarrierB); } if (pctx->finished) { if (!myError) { /* we didn't set finished- one of the workers must have */ sprintf(err, "%s: worker error on iter %u", me, pctx->iter); biffAdd(PULL, err); } return 1; } pctx->stuckNum = 0; for (thi=0; thi<pctx->threadNum; thi++) { pctx->stuckNum += pctx->task[thi]->stuckNum; } _pullPointNixMeRemove(pctx); if (pullRebin(pctx)) { sprintf(err, "%s: problem with new point locations", me); biffAdd(PULL, err); return 1; } pctx->timeIteration = airTime() - time0; return 0; }
int coilFinish(coilContext *cctx) { char me[]="coilFinish", err[BIFF_STRLEN]; unsigned int tidx; if (!cctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(COIL, err); return 1; } if (cctx->verbose > 1) { fprintf(stderr, "%s: finishing workers\n", me); } cctx->finished = AIR_TRUE; if (cctx->numThreads > 1) { airThreadBarrierWait(cctx->filterBarrier); for (tidx=1; tidx<cctx->numThreads; tidx++) { airThreadJoin(cctx->task[tidx]->thread, &(cctx->task[tidx]->returnPtr)); cctx->task[tidx]->thread = airThreadNix(cctx->task[tidx]->thread); cctx->task[tidx] = _coilTaskNix(cctx->task[tidx]); } } cctx->task[0]->thread = airThreadNix(cctx->task[0]->thread); cctx->task[0] = _coilTaskNix(cctx->task[0]); cctx->task = (coilTask **)airFree(cctx->task); if (cctx->numThreads > 1) { cctx->nextSliceMutex = airThreadMutexNix(cctx->nextSliceMutex); cctx->filterBarrier = airThreadBarrierNix(cctx->filterBarrier); cctx->updateBarrier = airThreadBarrierNix(cctx->updateBarrier); } return 0; }
/* ******** coilIterate ** ** (documentation) ** ** NB: this implements the body of thread 0 */ int coilIterate(coilContext *cctx, int numIterations) { char me[]="coilIterate", err[BIFF_STRLEN]; int iter; double time0, time1; if (!cctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(COIL, err); return 1; } time0 = airTime(); for (iter=0; iter<numIterations; iter++) { cctx->iter = iter; if (cctx->verbose) { fprintf(stderr, "%s: starting iter %d (of %d)\n", me, iter, numIterations); } cctx->finished = AIR_FALSE; if (cctx->numThreads > 1) { airThreadBarrierWait(cctx->filterBarrier); } /* first: filter */ if (cctx->verbose > 1) { fprintf(stderr, "%s: filtering ... \n", me); } _coilProcess(cctx->task[0], AIR_TRUE); /* second: update */ if (cctx->verbose > 1) { fprintf(stderr, "%s: updating ... \n", me); } if (cctx->numThreads > 1) { airThreadBarrierWait(cctx->updateBarrier); } _coilProcess(cctx->task[0], AIR_FALSE); } time1 = airTime(); if (cctx->verbose) { fprintf(stderr, "%s: elapsed time = %g (%g/iter)\n", me, time1 - time0, (time1 - time0)/numIterations); } return 0; }
void * _coilWorker(void *_task) { char me[]="_coilWorker"; coilTask *task; task = (coilTask *)_task; while (1) { /* wait until parent has set cctx->finished */ if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): waiting to check finished\n", me, task->threadIdx); } if (task->cctx->numThreads > 1) { airThreadBarrierWait(task->cctx->filterBarrier); } if (task->cctx->finished) { if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx); } break; } /* else there's work to do ... */ /* first: filter */ if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): filtering ... \n", me, task->threadIdx); } _coilProcess(task, AIR_TRUE); /* second: update */ if (task->cctx->numThreads > 1) { airThreadBarrierWait(task->cctx->updateBarrier); } if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): updating ... \n", me, task->threadIdx); } _coilProcess(task, AIR_FALSE); } return _task; }
/* the main loop for each worker thread */ void * _pullWorker(void *_task) { char me[]="_pushWorker", err[BIFF_STRLEN]; pullTask *task; task = (pullTask *)_task; while (1) { if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%u): waiting on barrier A\n", me, task->threadIdx); } /* pushFinish sets finished prior to the barriers */ airThreadBarrierWait(task->pctx->iterBarrierA); if (task->pctx->finished) { if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%u): done!\n", me, task->threadIdx); } break; } /* else there's work to do ... */ if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%u): starting to process\n", me, task->threadIdx); } if (_pullProcess(task)) { /* HEY clearly not threadsafe to have errors ... */ sprintf(err, "%s: thread %u trouble", me, task->threadIdx); biffAdd(PULL, err); task->pctx->finished = AIR_TRUE; } if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%u): waiting on barrier B\n", me, task->threadIdx); } airThreadBarrierWait(task->pctx->iterBarrierB); } return _task; }
/* the main loop for each worker thread */ void * _pushWorker(void *_task) { char me[]="_pushWorker", *err; pushTask *task; task = (pushTask *)_task; while (1) { if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%d): waiting to check finished\n", me, task->threadIdx); } /* pushFinish sets finished prior to the barriers */ airThreadBarrierWait(task->pctx->stageBarrierA); if (task->pctx->finished) { if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx); } break; } /* else there's work to do ... */ if (task->pctx->verbose > 1) { fprintf(stderr, "%s(%d): starting to run stage %d\n", me, task->threadIdx, task->pctx->stageIdx); } if (_pushStageRun(task, task->pctx->stageIdx)) { err = biffGetDone(PUSH); fprintf(stderr, "%s: task %d trouble with stage %d:\n%s", me, task->threadIdx, task->pctx->stageIdx, err); /* HEY: we should be using the "finished" mechanism to shut the whole production down */ } airThreadBarrierWait(task->pctx->stageBarrierB); } return _task; }
/* ** this is called *after* pushOutputGet ** ** should nix everything created by the many _push*Setup() functions */ int pushFinish(pushContext *pctx) { char me[]="pushFinish", err[BIFF_STRLEN]; unsigned int ii, tidx; if (!pctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(PUSH, err); return 1; } if (pctx->verbose > 1) { fprintf(stderr, "%s: finishing workers\n", me); } pctx->finished = AIR_TRUE; if (pctx->numThread > 1) { airThreadBarrierWait(pctx->stageBarrierA); } for (tidx=pctx->numThread; tidx>0; tidx--) { if (tidx-1) { airThreadJoin(pctx->task[tidx-1]->thread, &(pctx->task[tidx-1]->returnPtr)); } pctx->task[tidx-1]->thread = airThreadNix(pctx->task[tidx-1]->thread); pctx->task[tidx-1] = _pushTaskNix(pctx->task[tidx-1]); } pctx->task = (pushTask **)airFree(pctx->task); pctx->nten = nrrdNuke(pctx->nten); pctx->ninv = nrrdNuke(pctx->ninv); pctx->nmask = nrrdNuke(pctx->nmask); pctx->gctx = gageContextNix(pctx->gctx); pctx->fctx = tenFiberContextNix(pctx->fctx); for (ii=0; ii<pctx->numBin; ii++) { pushBinDone(pctx->bin + ii); } pctx->bin = (pushBin *)airFree(pctx->bin); pctx->binsEdge = pctx->numBin = 0; if (pctx->numThread > 1) { pctx->binMutex = airThreadMutexNix(pctx->binMutex); pctx->stageBarrierA = airThreadBarrierNix(pctx->stageBarrierA); pctx->stageBarrierB = airThreadBarrierNix(pctx->stageBarrierB); } return 0; }
/* ** this is called *after* pullOutputGet ** ** should nix everything created by the many _pull*Setup() functions */ int pullFinish(pullContext *pctx) { char me[]="pullFinish", err[BIFF_STRLEN]; unsigned int tidx; if (!pctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(PULL, err); return 1; } pctx->finished = AIR_TRUE; if (pctx->threadNum > 1) { if (pctx->verbose > 1) { fprintf(stderr, "%s: finishing workers\n", me); } airThreadBarrierWait(pctx->iterBarrierA); /* worker threads now pass barrierA and see that finished is AIR_TRUE, and then bail, so now we collect them */ for (tidx=pctx->threadNum; tidx>0; tidx--) { if (tidx-1) { airThreadJoin(pctx->task[tidx-1]->thread, &(pctx->task[tidx-1]->returnPtr)); } } pctx->binMutex = airThreadMutexNix(pctx->binMutex); pctx->iterBarrierA = airThreadBarrierNix(pctx->iterBarrierA); pctx->iterBarrierB = airThreadBarrierNix(pctx->iterBarrierB); } /* no need for _pullVolumeFinish(pctx), at least not now */ /* no need for _pullInfoFinish(pctx), at least not now */ _pullTaskFinish(pctx); _pullBinFinish(pctx); _pullPointFinish(pctx); /* yes, nixed bins deleted pnts inside, but other buffers still have to be freed */ return 0; }
void * _alanTuringWorker(void *_task) { alan_t *tendata, *ten, react, conf, Dxx, Dxy, Dyy, /* Dxz, Dyz, */ *tpx, *tmx, *tpy, *tmy, /* *tpz, *tmz, */ *lev0, *lev1, *parm, deltaT, alpha, beta, A, B, *v[27], lapA, lapB, corrA, corrB, deltaA, deltaB, diffA, diffB, change; int dim, iter, stop, startW, endW, idx, px, mx, py, my, pz, mz, startY, endY, startZ, endZ, sx, sy, sz, x, y, z; alanTask *task; task = (alanTask *)_task; dim = task->actx->dim; sx = task->actx->size[0]; sy = task->actx->size[1]; sz = (2 == dim ? 1 : task->actx->size[2]); parm = (alan_t*)(task->actx->nparm->data); diffA = AIR_CAST(alan_t, task->actx->diffA/pow(task->actx->deltaX, dim)); diffB = AIR_CAST(alan_t, task->actx->diffB/pow(task->actx->deltaX, dim)); startW = task->idx*sy/task->actx->numThreads; endW = (task->idx+1)*sy/task->actx->numThreads; tendata = task->actx->nten ? (alan_t *)task->actx->nten->data : NULL; react = task->actx->react; if (2 == dim) { startZ = 0; endZ = 1; startY = startW; endY = endW; } else { startZ = startW; endZ = endW; startY = 0; endY = sy; } for (iter = 0; (alanStopNot == task->actx->stop && (0 == task->actx->maxIteration || iter < task->actx->maxIteration)); iter++) { if (0 == task->idx) { task->actx->iter = iter; task->actx->nlev = task->actx->_nlev[(iter+1) % 2]; } lev0 = (alan_t*)(task->actx->_nlev[iter % 2]->data); lev1 = (alan_t*)(task->actx->_nlev[(iter+1) % 2]->data); stop = alanStopNot; change = 0; conf = 1; /* if you have no data; this will stay 1 */ for (z = startZ; z < endZ; z++) { if (task->actx->wrap) { pz = AIR_MOD(z+1, sz); mz = AIR_MOD(z-1, sz); } else { pz = AIR_MIN(z+1, sz-1); mz = AIR_MAX(z-1, 0); } for (y = startY; y < endY; y++) { if (task->actx->wrap) { py = AIR_MOD(y+1, sy); my = AIR_MOD(y-1, sy); } else { py = AIR_MIN(y+1, sy-1); my = AIR_MAX(y-1, 0); } for (x = 0; x < sx; x++) { if (task->actx->wrap) { px = AIR_MOD(x+1, sx); mx = AIR_MOD(x-1, sx); } else { px = AIR_MIN(x+1, sx-1); mx = AIR_MAX(x-1, 0); } idx = x + sx*(y + sy*z); A = lev0[0 + 2*idx]; B = lev0[1 + 2*idx]; deltaT = parm[0 + 3*idx]; alpha = parm[1 + 3*idx]; beta = parm[2 + 3*idx]; lapA = lapB = corrA = corrB = 0; if (2 == dim) { /* ** 0 1 2 ----> X ** 3 4 5 ** 6 7 8 ** | ** v Y */ v[1] = lev0 + 2*( x + sx*(my)); v[3] = lev0 + 2*(mx + sx*( y)); v[5] = lev0 + 2*(px + sx*( y)); v[7] = lev0 + 2*( x + sx*(py)); if (tendata) { /* ** 0 1 2 Dxy/2 Dyy -Dxy/2 ** 3 4 5 Dxx -2*(Dxx + Dyy) Dxx ** 6 7 8 -Dxy/2 Dyy Dxy/2 */ v[0] = lev0 + 2*(mx + sx*(my)); v[2] = lev0 + 2*(px + sx*(my)); v[6] = lev0 + 2*(mx + sx*(py)); v[8] = lev0 + 2*(px + sx*(py)); ten = tendata + 4*idx; conf = AIR_CAST(alan_t, (AIR_CLAMP(0.3, ten[0], 1) - 0.3)/0.7); if (conf) { Dxx = ten[1]; Dxy = ten[2]; Dyy = ten[3]; lapA = (Dxy*(v[0][0] + v[8][0] - v[2][0] - v[6][0])/2 + Dxx*(v[3][0] + v[5][0]) + Dyy*(v[1][0] + v[7][0]) - 2*(Dxx + Dyy)*A); lapB = (Dxy*(v[0][1] + v[8][1] - v[2][1] - v[6][1])/2 + Dxx*(v[3][1] + v[5][1]) + Dyy*(v[1][1] + v[7][1]) - 2*(Dxx + Dyy)*B); if (!(task->actx->homogAniso)) { tpx = tendata + 4*(px + sx*( y + sy*( z))); tmx = tendata + 4*(mx + sx*( y + sy*( z))); tpy = tendata + 4*( x + sx*(py + sy*( z))); tmy = tendata + 4*( x + sx*(my + sy*( z))); corrA = ((tpx[1]-tmx[1])*(v[5][0]-v[3][0])/4+ /* Dxx,x*A,x */ (tpx[2]-tmx[2])*(v[7][0]-v[1][0])/4+ /* Dxy,x*A,y */ (tpy[2]-tmy[2])*(v[5][0]-v[3][0])/4+ /* Dxy,y*A,x */ (tpy[3]-tmy[3])*(v[7][0]-v[1][0])); /* Dyy,y*A,y */ corrB = ((tpx[1]-tmx[1])*(v[5][1]-v[3][1])/4+ /* Dxx,x*B,x */ (tpx[2]-tmx[2])*(v[7][1]-v[1][1])/4+ /* Dxy,x*B,y */ (tpy[2]-tmy[2])*(v[5][1]-v[3][1])/4+ /* Dxy,y*B,x */ (tpy[3]-tmy[3])*(v[7][1]-v[1][1])); /* Dyy,y*B,y */ } } else { /* no confidence; you diffuse */ lapA = v[1][0] + v[3][0] + v[5][0] + v[7][0] - 4*A; lapB = v[1][1] + v[3][1] + v[5][1] + v[7][1] - 4*B; } } else { /* no data; you diffuse */ lapA = v[1][0] + v[3][0] + v[5][0] + v[7][0] - 4*A; lapB = v[1][1] + v[3][1] + v[5][1] + v[7][1] - 4*B; } } else { /* 3 == dim */ /* ** 0 1 2 ---- X ** 3 4 5 ** 6 7 8 ** / ** / 9 10 11 ** Y 12 13 14 ** 15 16 17 ** ** 18 19 20 ** 21 22 23 ** 24 25 26 ** | ** | ** Z */ v[ 4] = lev0 + 2*( x + sx*( y + sy*(mz))); v[10] = lev0 + 2*( x + sx*(my + sy*( z))); v[12] = lev0 + 2*(mx + sx*( y + sy*( z))); v[14] = lev0 + 2*(px + sx*( y + sy*( z))); v[16] = lev0 + 2*( x + sx*(py + sy*( z))); v[22] = lev0 + 2*( x + sx*( y + sy*(pz))); if (tendata) { if (!(task->actx->homogAniso)) { } } else { lapA = (v[ 4][0] + v[10][0] + v[12][0] + v[14][0] + v[16][0] + v[22][0] - 6*A); lapB = (v[ 4][1] + v[10][1] + v[12][1] + v[14][1] + v[16][1] + v[22][1] - 6*B); } } deltaA = deltaT*(react*conf*task->actx->K*(alpha - A*B) + diffA*(lapA + corrA)); if (AIR_ABS(deltaA) > task->actx->maxPixelChange) { stop = alanStopDiverged; } change += AIR_ABS(deltaA); deltaB = deltaT*(react*conf*task->actx->K*(A*B - B - beta) + diffB*(lapB + corrB)); if (!( AIR_EXISTS(deltaA) && AIR_EXISTS(deltaB) )) { stop = alanStopNonExist; } A += deltaA; B = AIR_MAX(0, B + deltaB); lev1[0 + 2*idx] = A; lev1[1 + 2*idx] = B; } } } /* add change to global sum in a threadsafe way */ airThreadMutexLock(task->actx->changeMutex); task->actx->averageChange += change/(sx*sy*sz); task->actx->changeCount += 1; if (task->actx->changeCount == task->actx->numThreads) { /* I must be the last thread to reach this point; all others must have passed the mutex unlock, and are sitting at the barrier */ if (alanStopNot != stop) { /* there was some problem in going from lev0 to lev1, which we deal with now by setting actx->stop */ task->actx->stop = stop; } else if (task->actx->averageChange < task->actx->minAverageChange) { /* we converged */ task->actx->stop = alanStopConverged; } else { /* we keep going */ _alanPerIteration(task->actx, iter); if (task->actx->perIteration) { task->actx->perIteration(task->actx, iter); } } task->actx->averageChange = 0; task->actx->changeCount = 0; } airThreadMutexUnlock(task->actx->changeMutex); /* force all threads to line up here, once per iteration */ airThreadBarrierWait(task->actx->iterBarrier); } if (iter == task->actx->maxIteration) { /* HEY: all threads will agree on this, right? */ task->actx->stop = alanStopMaxIteration; } /* else: the non-alanStopNot value of task->actx->stop made us stop */ return _task; }
/* ******** pushIterate ** ** (documentation) ** ** NB: this implements the body of thread 0 */ int pushIterate(pushContext *pctx) { char me[]="pushIterate", *_err, err[BIFF_STRLEN]; unsigned int ti, numThing; if (!pctx) { sprintf(err, "%s: got NULL pointer", me); biffAdd(PUSH, err); return 1; } if (pctx->verbose) { fprintf(stderr, "%s: starting iteration\n", me); } /* the _pushWorker checks finished after the barriers */ pctx->finished = AIR_FALSE; pctx->binIdx=0; pctx->stageIdx=0; for (ti=0; ti<pctx->numThread; ti++) { pctx->task[ti]->sumVel = 0; pctx->task[ti]->numThing = 0; } do { if (pctx->numThread > 1) { airThreadBarrierWait(pctx->stageBarrierA); } if (pctx->verbose) { fprintf(stderr, "%s: starting iter %d stage %d\n", me, pctx->iter, pctx->stageIdx); } if (_pushStageRun(pctx->task[0], pctx->stageIdx)) { _err = biffGetDone(PUSH); fprintf(stderr, "%s: task %d trouble w/ iter %d stage %d:\n%s", me, pctx->task[0]->threadIdx, pctx->iter, pctx->task[0]->pctx->stageIdx, _err); return 1; } if (pctx->numThread > 1) { airThreadBarrierWait(pctx->stageBarrierB); } /* This is the only code to happen between barriers */ pctx->stageIdx++; pctx->binIdx=0; } while (pctx->stageIdx < pctx->numStage); pctx->meanVel = 0; numThing = 0; for (ti=0; ti<pctx->numThread; ti++) { pctx->meanVel += pctx->task[ti]->sumVel; /* fprintf(stderr, "!%s: task %d sumVel = %g\n", me, ti, pctx->task[ti]->sumVel); */ numThing += pctx->task[ti]->numThing; } pctx->meanVel /= numThing; if (pushRebin(pctx)) { sprintf(err, "%s: problem with new point locations", me); biffAdd(PUSH, err); return 1; } if (0 && 100 == pctx->iter) { _pushForceSample(pctx, 300, 300); } return 0; }