Example #1
** _pullIterate
** (documentation)
** NB: this implements the body of thread 0, the master thread
_pullIterate(pullContext *pctx) {
  char me[]="_pullIterate", err[BIFF_STRLEN];
  double time0;
  int myError;
  unsigned int thi;

  if (!pctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(PULL, err); return 1;
  if (pctx->verbose) {
    fprintf(stderr, "%s: start iter %d w/ %u threads; energy = %g\n",
            me, pctx->iter, pctx->threadNum, _pullEnergyTotal(pctx));

  time0 = airTime();

  /* the _pullWorker checks finished after iterBarrierA */
  pctx->finished = AIR_FALSE;

  /* initialize index of next bin to be doled out to threads */

  if (pctx->threadNum > 1) {
  myError = AIR_FALSE;
  if (_pullProcess(pctx->task[0])) {
    sprintf(err, "%s: master thread trouble w/ iter %u", me, pctx->iter);
    biffAdd(PULL, err);
    pctx->finished = AIR_TRUE;
    myError = AIR_TRUE;
  if (pctx->threadNum > 1) {
  if (pctx->finished) {
    if (!myError) {
      /* we didn't set finished- one of the workers must have */
      sprintf(err, "%s: worker error on iter %u", me, pctx->iter);
      biffAdd(PULL, err); 
    return 1;
  pctx->stuckNum = 0;
  for (thi=0; thi<pctx->threadNum; thi++) {
    pctx->stuckNum += pctx->task[thi]->stuckNum;
  if (pullRebin(pctx)) {
    sprintf(err, "%s: problem with new point locations", me);
    biffAdd(PULL, err); return 1;

  pctx->timeIteration = airTime() - time0;
  return 0;
Example #2
coilFinish(coilContext *cctx) {
  char me[]="coilFinish", err[BIFF_STRLEN];
  unsigned int tidx;

  if (!cctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(COIL, err); return 1;

  if (cctx->verbose > 1) {
    fprintf(stderr, "%s: finishing workers\n", me);
  cctx->finished = AIR_TRUE;
  if (cctx->numThreads > 1) {
    for (tidx=1; tidx<cctx->numThreads; tidx++) {
      airThreadJoin(cctx->task[tidx]->thread, &(cctx->task[tidx]->returnPtr));
      cctx->task[tidx]->thread = airThreadNix(cctx->task[tidx]->thread);
      cctx->task[tidx] = _coilTaskNix(cctx->task[tidx]);
  cctx->task[0]->thread = airThreadNix(cctx->task[0]->thread);
  cctx->task[0] = _coilTaskNix(cctx->task[0]);
  cctx->task = (coilTask **)airFree(cctx->task);

  if (cctx->numThreads > 1) {
    cctx->nextSliceMutex = airThreadMutexNix(cctx->nextSliceMutex);
    cctx->filterBarrier = airThreadBarrierNix(cctx->filterBarrier);
    cctx->updateBarrier = airThreadBarrierNix(cctx->updateBarrier);

  return 0;
Example #3
******** coilIterate
** (documentation)
** NB: this implements the body of thread 0
coilIterate(coilContext *cctx, int numIterations) {
  char me[]="coilIterate", err[BIFF_STRLEN];
  int iter;
  double time0, time1;

  if (!cctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(COIL, err); return 1;
  time0 = airTime();
  for (iter=0; iter<numIterations; iter++) {
    cctx->iter = iter;
    if (cctx->verbose) {
      fprintf(stderr, "%s: starting iter %d (of %d)\n", me, iter, 
    cctx->finished = AIR_FALSE;
    if (cctx->numThreads > 1) {
    /* first: filter */
    if (cctx->verbose > 1) {
      fprintf(stderr, "%s: filtering ... \n", me);
    _coilProcess(cctx->task[0], AIR_TRUE);

    /* second: update */
    if (cctx->verbose > 1) {
      fprintf(stderr, "%s: updating ... \n", me);
    if (cctx->numThreads > 1) {
    _coilProcess(cctx->task[0], AIR_FALSE);

  time1 = airTime();
  if (cctx->verbose) {
    fprintf(stderr, "%s: elapsed time = %g (%g/iter)\n", me,
            time1 - time0, (time1 - time0)/numIterations);
  return 0;
Example #4
void *
_coilWorker(void *_task) {
  char me[]="_coilWorker";
  coilTask *task;

  task = (coilTask *)_task;

  while (1) {
    /* wait until parent has set cctx->finished */
    if (task->cctx->verbose > 1) {
      fprintf(stderr, "%s(%d): waiting to check finished\n",
              me, task->threadIdx);
    if (task->cctx->numThreads > 1) {
    if (task->cctx->finished) {
      if (task->cctx->verbose > 1) {
        fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx);
    /* else there's work to do ... */

    /* first: filter */
    if (task->cctx->verbose > 1) {
      fprintf(stderr, "%s(%d): filtering ... \n",
              me, task->threadIdx);
    _coilProcess(task, AIR_TRUE);

    /* second: update */
    if (task->cctx->numThreads > 1) {
    if (task->cctx->verbose > 1) {
      fprintf(stderr, "%s(%d): updating ... \n",
              me, task->threadIdx);
    _coilProcess(task, AIR_FALSE);


  return _task;
Example #5
/* the main loop for each worker thread */
void *
_pullWorker(void *_task) {
  char me[]="_pushWorker", err[BIFF_STRLEN];
  pullTask *task;
  task = (pullTask *)_task;

  while (1) {
    if (task->pctx->verbose > 1) {
      fprintf(stderr, "%s(%u): waiting on barrier A\n",
              me, task->threadIdx);
    /* pushFinish sets finished prior to the barriers */
    if (task->pctx->finished) {
      if (task->pctx->verbose > 1) {
        fprintf(stderr, "%s(%u): done!\n", me, task->threadIdx);
    /* else there's work to do ... */    
    if (task->pctx->verbose > 1) {
      fprintf(stderr, "%s(%u): starting to process\n", me, task->threadIdx);
    if (_pullProcess(task)) {
      /* HEY clearly not threadsafe to have errors ... */
      sprintf(err, "%s: thread %u trouble", me, task->threadIdx);
      biffAdd(PULL, err); 
      task->pctx->finished = AIR_TRUE;
    if (task->pctx->verbose > 1) {
      fprintf(stderr, "%s(%u): waiting on barrier B\n",
              me, task->threadIdx);

  return _task;
/* the main loop for each worker thread */
void *
_pushWorker(void *_task) {
  char me[]="_pushWorker", *err;
  pushTask *task;
  task = (pushTask *)_task;

  while (1) {
    if (task->pctx->verbose > 1) {
      fprintf(stderr, "%s(%d): waiting to check finished\n",
              me, task->threadIdx);
    /* pushFinish sets finished prior to the barriers */
    if (task->pctx->finished) {
      if (task->pctx->verbose > 1) {
        fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx);
    /* else there's work to do ... */
    if (task->pctx->verbose > 1) {
      fprintf(stderr, "%s(%d): starting to run stage %d\n",
              me, task->threadIdx, task->pctx->stageIdx);
    if (_pushStageRun(task, task->pctx->stageIdx)) {
      err = biffGetDone(PUSH);
      fprintf(stderr, "%s: task %d trouble with stage %d:\n%s", me,
              task->threadIdx, task->pctx->stageIdx, err);
      /* HEY: we should be using the "finished" mechanism to
         shut the whole production down */

  return _task;
** this is called *after* pushOutputGet
** should nix everything created by the many _push*Setup() functions
pushFinish(pushContext *pctx) {
  char me[]="pushFinish", err[BIFF_STRLEN];
  unsigned int ii, tidx;

  if (!pctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(PUSH, err); return 1;

  if (pctx->verbose > 1) {
    fprintf(stderr, "%s: finishing workers\n", me);
  pctx->finished = AIR_TRUE;
  if (pctx->numThread > 1) {
  for (tidx=pctx->numThread; tidx>0; tidx--) {
    if (tidx-1) {
    pctx->task[tidx-1]->thread = airThreadNix(pctx->task[tidx-1]->thread);
    pctx->task[tidx-1] = _pushTaskNix(pctx->task[tidx-1]);
  pctx->task = (pushTask **)airFree(pctx->task);

  pctx->nten = nrrdNuke(pctx->nten);
  pctx->ninv = nrrdNuke(pctx->ninv);
  pctx->nmask = nrrdNuke(pctx->nmask);
  pctx->gctx = gageContextNix(pctx->gctx);
  pctx->fctx = tenFiberContextNix(pctx->fctx);
  for (ii=0; ii<pctx->numBin; ii++) {
    pushBinDone(pctx->bin + ii);
  pctx->bin = (pushBin *)airFree(pctx->bin);
  pctx->binsEdge = pctx->numBin = 0;

  if (pctx->numThread > 1) {
    pctx->binMutex = airThreadMutexNix(pctx->binMutex);
    pctx->stageBarrierA = airThreadBarrierNix(pctx->stageBarrierA);
    pctx->stageBarrierB = airThreadBarrierNix(pctx->stageBarrierB);

  return 0;
Example #8
** this is called *after* pullOutputGet
** should nix everything created by the many _pull*Setup() functions
pullFinish(pullContext *pctx) {
  char me[]="pullFinish", err[BIFF_STRLEN];
  unsigned int tidx;

  if (!pctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(PULL, err); return 1;

  pctx->finished = AIR_TRUE;
  if (pctx->threadNum > 1) {
    if (pctx->verbose > 1) {
      fprintf(stderr, "%s: finishing workers\n", me);
    /* worker threads now pass barrierA and see that finished is AIR_TRUE,
       and then bail, so now we collect them */
    for (tidx=pctx->threadNum; tidx>0; tidx--) {
      if (tidx-1) {
    pctx->binMutex = airThreadMutexNix(pctx->binMutex);
    pctx->iterBarrierA = airThreadBarrierNix(pctx->iterBarrierA);
    pctx->iterBarrierB = airThreadBarrierNix(pctx->iterBarrierB);

  /* no need for _pullVolumeFinish(pctx), at least not now */
  /* no need for _pullInfoFinish(pctx), at least not now */
  _pullPointFinish(pctx); /* yes, nixed bins deleted pnts inside, but
                             other buffers still have to be freed */

  return 0;
Example #9
void *
_alanTuringWorker(void *_task) {
  alan_t *tendata, *ten, react,
    conf, Dxx, Dxy, Dyy, /* Dxz, Dyz, */
    *tpx, *tmx, *tpy, *tmy, /* *tpz, *tmz, */
    *lev0, *lev1, *parm, deltaT, alpha, beta, A, B,
    *v[27], lapA, lapB, corrA, corrB, 
    deltaA, deltaB, diffA, diffB, change;
  int dim, iter, stop, startW, endW, idx,
    px, mx, py, my, pz, mz,
    startY, endY, startZ, endZ, sx, sy, sz, x, y, z;
  alanTask *task;

  task = (alanTask *)_task;
  dim = task->actx->dim;
  sx = task->actx->size[0];
  sy = task->actx->size[1];
  sz = (2 == dim ? 1 : task->actx->size[2]);
  parm = (alan_t*)(task->actx->nparm->data);
  diffA = AIR_CAST(alan_t, task->actx->diffA/pow(task->actx->deltaX, dim));
  diffB = AIR_CAST(alan_t, task->actx->diffB/pow(task->actx->deltaX, dim));
  startW = task->idx*sy/task->actx->numThreads;
  endW = (task->idx+1)*sy/task->actx->numThreads;
  tendata = task->actx->nten ? (alan_t *)task->actx->nten->data : NULL;
  react = task->actx->react;

  if (2 == dim) {
    startZ = 0;
    endZ = 1;
    startY = startW;
    endY = endW;
  } else {
    startZ = startW;
    endZ = endW;
    startY = 0;
    endY = sy;

  for (iter = 0; 
       (alanStopNot == task->actx->stop 
        && (0 == task->actx->maxIteration
            || iter < task->actx->maxIteration)); 
       iter++) {

    if (0 == task->idx) {
      task->actx->iter = iter;
      task->actx->nlev = task->actx->_nlev[(iter+1) % 2];
    lev0 = (alan_t*)(task->actx->_nlev[iter % 2]->data);
    lev1 = (alan_t*)(task->actx->_nlev[(iter+1) % 2]->data);
    stop = alanStopNot;
    change = 0;
    conf = 1;  /* if you have no data; this will stay 1 */
    for (z = startZ; z < endZ; z++) {
      if (task->actx->wrap) {
        pz = AIR_MOD(z+1, sz);
        mz = AIR_MOD(z-1, sz);
      } else {
        pz = AIR_MIN(z+1, sz-1);
        mz = AIR_MAX(z-1, 0);
      for (y = startY; y < endY; y++) {
        if (task->actx->wrap) {
          py = AIR_MOD(y+1, sy);
          my = AIR_MOD(y-1, sy);
        } else {
          py = AIR_MIN(y+1, sy-1);
          my = AIR_MAX(y-1, 0);
        for (x = 0; x < sx; x++) {
          if (task->actx->wrap) {
            px = AIR_MOD(x+1, sx);
            mx = AIR_MOD(x-1, sx);
          } else {
            px = AIR_MIN(x+1, sx-1);
            mx = AIR_MAX(x-1, 0);
          idx = x + sx*(y + sy*z);
          A = lev0[0 + 2*idx];
          B = lev0[1 + 2*idx];
          deltaT = parm[0 + 3*idx];
          alpha = parm[1 + 3*idx];
          beta = parm[2 + 3*idx];
          lapA = lapB = corrA = corrB = 0;
          if (2 == dim) {
            **  0 1 2 ----> X
            **  3 4 5
            **  6 7 8
            **  |
            **  v Y
            v[1] = lev0 + 2*( x + sx*(my));
            v[3] = lev0 + 2*(mx + sx*( y));
            v[5] = lev0 + 2*(px + sx*( y));
            v[7] = lev0 + 2*( x + sx*(py));
            if (tendata) {
              **  0 1 2    Dxy/2          Dyy        -Dxy/2
              **  3 4 5     Dxx     -2*(Dxx + Dyy)     Dxx
              **  6 7 8   -Dxy/2          Dyy         Dxy/2
              v[0] = lev0 + 2*(mx + sx*(my));
              v[2] = lev0 + 2*(px + sx*(my));
              v[6] = lev0 + 2*(mx + sx*(py));
              v[8] = lev0 + 2*(px + sx*(py));
              ten = tendata + 4*idx;
              conf = AIR_CAST(alan_t, (AIR_CLAMP(0.3, ten[0], 1) - 0.3)/0.7);
              if (conf) {
                Dxx = ten[1];
                Dxy = ten[2];
                Dyy = ten[3];
                lapA = (Dxy*(v[0][0] + v[8][0] - v[2][0] - v[6][0])/2
                        + Dxx*(v[3][0] + v[5][0]) + Dyy*(v[1][0] + v[7][0])
                        - 2*(Dxx + Dyy)*A);
                lapB = (Dxy*(v[0][1] + v[8][1] - v[2][1] - v[6][1])/2
                        + Dxx*(v[3][1] + v[5][1]) + Dyy*(v[1][1] + v[7][1])
                        - 2*(Dxx + Dyy)*B);
                if (!(task->actx->homogAniso)) {
                  tpx = tendata + 4*(px + sx*( y + sy*( z)));
                  tmx = tendata + 4*(mx + sx*( y + sy*( z)));
                  tpy = tendata + 4*( x + sx*(py + sy*( z)));
                  tmy = tendata + 4*( x + sx*(my + sy*( z)));
                  corrA = ((tpx[1]-tmx[1])*(v[5][0]-v[3][0])/4+ /* Dxx,x*A,x */
                           (tpx[2]-tmx[2])*(v[7][0]-v[1][0])/4+ /* Dxy,x*A,y */
                           (tpy[2]-tmy[2])*(v[5][0]-v[3][0])/4+ /* Dxy,y*A,x */
                           (tpy[3]-tmy[3])*(v[7][0]-v[1][0]));  /* Dyy,y*A,y */
                  corrB = ((tpx[1]-tmx[1])*(v[5][1]-v[3][1])/4+ /* Dxx,x*B,x */
                           (tpx[2]-tmx[2])*(v[7][1]-v[1][1])/4+ /* Dxy,x*B,y */
                           (tpy[2]-tmy[2])*(v[5][1]-v[3][1])/4+ /* Dxy,y*B,x */
                           (tpy[3]-tmy[3])*(v[7][1]-v[1][1]));  /* Dyy,y*B,y */
              } else {
                /* no confidence; you diffuse */
                lapA = v[1][0] + v[3][0] + v[5][0] + v[7][0] - 4*A;
                lapB = v[1][1] + v[3][1] + v[5][1] + v[7][1] - 4*B;
            } else {
              /* no data; you diffuse */
              lapA = v[1][0] + v[3][0] + v[5][0] + v[7][0] - 4*A;
              lapB = v[1][1] + v[3][1] + v[5][1] + v[7][1] - 4*B;
          } else {
            /* 3 == dim */
            **          0   1   2   ---- X
            **        3   4   5
            **      6   7   8
            **    /
            **  /       9  10  11
            ** Y     12  13  14
            **     15  16  17
            **         18  19  20
            **       21  22  23
            **     24  25  26
            **         |
            **         |
            **         Z
            v[ 4] = lev0 + 2*( x + sx*( y + sy*(mz)));
            v[10] = lev0 + 2*( x + sx*(my + sy*( z)));
            v[12] = lev0 + 2*(mx + sx*( y + sy*( z)));
            v[14] = lev0 + 2*(px + sx*( y + sy*( z)));
            v[16] = lev0 + 2*( x + sx*(py + sy*( z)));
            v[22] = lev0 + 2*( x + sx*( y + sy*(pz)));
            if (tendata) {

              if (!(task->actx->homogAniso)) {
            } else {
              lapA = (v[ 4][0] + v[10][0] + v[12][0]
                      + v[14][0] + v[16][0] + v[22][0] - 6*A);
              lapB = (v[ 4][1] + v[10][1] + v[12][1]
                      + v[14][1] + v[16][1] + v[22][1] - 6*B);
          deltaA = deltaT*(react*conf*task->actx->K*(alpha - A*B) 
                           + diffA*(lapA + corrA));
          if (AIR_ABS(deltaA) > task->actx->maxPixelChange) {
            stop = alanStopDiverged;
          change += AIR_ABS(deltaA);
          deltaB = deltaT*(react*conf*task->actx->K*(A*B - B - beta)
                           + diffB*(lapB + corrB));
          if (!( AIR_EXISTS(deltaA) && AIR_EXISTS(deltaB) )) {
            stop = alanStopNonExist;
          A += deltaA;
          B = AIR_MAX(0, B + deltaB);
          lev1[0 + 2*idx] = A;
          lev1[1 + 2*idx] = B; 
    /* add change to global sum in a threadsafe way */
    task->actx->averageChange += change/(sx*sy*sz);
    task->actx->changeCount += 1;
    if (task->actx->changeCount == task->actx->numThreads) {
      /* I must be the last thread to reach this point; all 
         others must have passed the mutex unlock, and are
         sitting at the barrier */
      if (alanStopNot != stop) {
        /* there was some problem in going from lev0 to lev1, which
           we deal with now by setting actx->stop */
        task->actx->stop = stop;
      } else if (task->actx->averageChange < task->actx->minAverageChange) {
        /* we converged */
        task->actx->stop = alanStopConverged;
      } else {
        /* we keep going */
        _alanPerIteration(task->actx, iter);
        if (task->actx->perIteration) {
          task->actx->perIteration(task->actx, iter);
      task->actx->averageChange = 0;
      task->actx->changeCount = 0;

    /* force all threads to line up here, once per iteration */
  if (iter == task->actx->maxIteration) {
    /* HEY: all threads will agree on this, right? */
    task->actx->stop = alanStopMaxIteration;
  /* else: the non-alanStopNot value of task->actx->stop made us stop */
  return _task;
******** pushIterate
** (documentation)
** NB: this implements the body of thread 0
pushIterate(pushContext *pctx) {
  char me[]="pushIterate", *_err, err[BIFF_STRLEN];
  unsigned int ti, numThing;

  if (!pctx) {
    sprintf(err, "%s: got NULL pointer", me);
    biffAdd(PUSH, err); return 1;
  if (pctx->verbose) {
    fprintf(stderr, "%s: starting iteration\n", me);
  /* the _pushWorker checks finished after the barriers */
  pctx->finished = AIR_FALSE;
  for (ti=0; ti<pctx->numThread; ti++) {
    pctx->task[ti]->sumVel = 0;
    pctx->task[ti]->numThing = 0;
  do {
    if (pctx->numThread > 1) {
    if (pctx->verbose) {
      fprintf(stderr, "%s: starting iter %d stage %d\n", me, 
              pctx->iter, pctx->stageIdx);
    if (_pushStageRun(pctx->task[0], pctx->stageIdx)) {
      _err = biffGetDone(PUSH);
      fprintf(stderr, "%s: task %d trouble w/ iter %d stage %d:\n%s", me,
              pctx->task[0]->threadIdx, pctx->iter,
              pctx->task[0]->pctx->stageIdx, _err);
      return 1;
    if (pctx->numThread > 1) {
    /* This is the only code to happen between barriers */
  } while (pctx->stageIdx < pctx->numStage);
  pctx->meanVel = 0;
  numThing = 0;
  for (ti=0; ti<pctx->numThread; ti++) {
    pctx->meanVel += pctx->task[ti]->sumVel;
    fprintf(stderr, "!%s: task %d sumVel = %g\n", me,
            ti, pctx->task[ti]->sumVel);
    numThing += pctx->task[ti]->numThing;
  pctx->meanVel /= numThing;
  if (pushRebin(pctx)) {
    sprintf(err, "%s: problem with new point locations", me);
    biffAdd(PUSH, err); return 1;
  if (0 && 100 == pctx->iter) {
    _pushForceSample(pctx, 300, 300);
  return 0;