void Task::closeout()
{
  enum { RESPAWN = int( Kokkos::Experimental::TASK_STATE_WAITING ) |
                   int( Kokkos::Experimental::TASK_STATE_EXECUTING ) };

#if 0
fprintf( stdout
       , "worker(%d.%d) task 0x%.12lx %s\n"
       , qthread_shep()
       , qthread_worker_local(NULL)
       , reinterpret_cast<unsigned long>(this)
       , ( m_state == RESPAWN ? "respawn" : "complete" )
       );
fflush(stdout);
#endif

  // When dependent tasks run there would be a race
  // condition between destroying this task and
  // querying the active count pointer from this task.
  int volatile * const active_count = m_active_count ;

  if ( m_state == RESPAWN ) {
    // Task requests respawn, set state to waiting and reschedule the task
    m_state = Kokkos::Experimental::TASK_STATE_WAITING ;
    schedule();
  }
  else {

    // Task did not respawn, is complete
    m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ;

    // Release dependences before allowing dependent tasks to run.
    // Otherwise there is a thread race condition for removing dependences.
    for ( int i = 0 ; i < m_dep_size ; ++i ) {
      assign( & m_dep[i] , 0 );
    }

    // Set qthread FEB to full so that dependent tasks are allowed to execute.
    // This 'task' may be deleted immediately following this function call.
    qthread_fill( & m_qfeb );

    // The dependent task could now complete and destroy 'this' task
    // before the call to 'qthread_fill' returns.  Therefore, for
    // thread safety assume that 'this' task has now been destroyed.
  }

  // Decrement active task count before returning.
  Kokkos::atomic_decrement( active_count );
}
示例#2
0
// Notes:
// -    Each task receives distinct copy of parent
// -    Copy of child is shallow, be careful with `state` member
static aligned_t visit(void *args_)
{
    node_t  *parent          = (node_t *)args_;
    int      parent_height   = parent->height;
    int      num_children    = parent->num_children;
    aligned_t expect         = parent->expect;
    aligned_t num_descendants[num_children];
    aligned_t sum_descendants = 1;

    if (num_children != 0) {
        node_t     child __attribute__((aligned(8)));
        aligned_t  donec = 0;

        // Spawn children, if any
        child.height = parent_height + 1;
        child.dc     = &donec;
        child.expect = num_children;

        qthread_empty(&donec);

        for (int i = 0; i < num_children; i++) {
            child.acc    = &num_descendants[i];

            for (int j = 0; j < num_samples; j++) {
                rng_spawn(parent->state.state, child.state.state, i);
            }

            child.num_children = calc_num_children(&child);

            qthread_fork_syncvar_copyargs(visit, &child, sizeof(node_t), NULL);
        }

        // Wait for children to finish up, accumulate descendants counts
        if (donec != expect) qthread_readFF(NULL, &donec);

        for (int i = 0; i < num_children; i++) {
            sum_descendants += num_descendants[i];
        }
    }

    *parent->acc = sum_descendants;
    if (qthread_incr(parent->dc, 1) + 1 == expect) {
        qthread_fill(parent->dc);
    }

    return 0;
}
示例#3
0
/*
 * The main procedure simply creates a producer and a consumer task to run in
 * parallel
 */
int main(int argc,
         char *argv[])
{
    aligned_t t[2];

    assert(qthread_initialize() == 0);

    CHECK_VERBOSE();
    NUMARG(bufferSize, "BUFFERSIZE");
    numItems = 8 * bufferSize;
    NUMARG(numItems, "NUMITEMS");

    iprintf("%i threads...\n", qthread_num_shepherds());

    buff = malloc(sizeof(aligned_t) * bufferSize);
    for (unsigned int i = 0; i < bufferSize; ++i) {
        buff[i] = 0;
    }

    qthread_fork(consumer, NULL, &t[0]);
    qthread_fork(producer, NULL, &t[1]);
    qthread_readFF(NULL, &t[0]);
    qthread_readFF(NULL, &t[1]);

    /* cleanup... unnecessary in general, but for the moment I'm tracking down
     * errors in the FEB system, so let's clean up */
    for (unsigned int i = 0; i < bufferSize; ++i) {
	qthread_fill(buff + i);
    }

    free(buff);

    iprintf("Success!\n");

    return 0;
}
示例#4
0
inline int qthread_fill(const T *const dest)
{
    QTHREAD_CHECKSIZE(T);
    return qthread_fill((aligned_t *)dest);
}