Пример #1
0
void frm_grid_setup_threads(struct frm_grid_t *grid)
{
	struct frm_cuda_function_t *function = grid->function;

	struct frm_threadblock_t *threadblock;
	struct frm_warp_t *warp;
	struct frm_thread_t *thread;

	int bidx, bidy, bidz;  /* 3D threadblock ID iterators */
	int lidx, lidy, lidz;  /* 3D thread local ID iterators */

	int tid;  /* Global ID iterator */
	int bid;  /* Threadblock ID iterator */
	int wid;  /* Warp ID iterator */
	int lid;  /* Local ID iterator */

	/* Array of threadblocks */
	grid->threadblock_count = function->group_count;
	grid->threadblock_id_first = 0;
	grid->threadblock_id_last = grid->threadblock_count - 1;
	grid->threadblocks = calloc(grid->threadblock_count, sizeof(void *));
	for (bid = 0; bid < grid->threadblock_count; bid++)
		grid->threadblocks[bid] = frm_threadblock_create();
	
	/* Array of warps */
	grid->warps_per_threadblock = (function->local_size + frm_emu_warp_size - 1) / frm_emu_warp_size;
	grid->warp_count = grid->warps_per_threadblock * grid->threadblock_count;
	grid->warp_id_first = 0;
	grid->warp_id_last = grid->warp_count - 1;
	assert(grid->warps_per_threadblock > 0 && grid->warp_count > 0);
	grid->warps = calloc(grid->warp_count, sizeof(void *));
	for (wid = 0; wid < grid->warp_count; wid++)
	{
		bid = wid / grid->warps_per_threadblock;
		grid->warps[wid] = frm_warp_create();
		warp = grid->warps[wid];
		threadblock = grid->threadblocks[bid];

		warp->id = wid;
		warp->id_in_threadblock = wid % grid->warps_per_threadblock;
		warp->grid = grid;
		warp->threadblock = threadblock;
		DOUBLE_LINKED_LIST_INSERT_TAIL(threadblock, running, warp);
	}

	/* Array of threads */
	grid->thread_count = function->global_size;
	grid->thread_id_first = 0;
	grid->thread_id_last = grid->thread_count - 1;
	grid->threads = calloc(grid->thread_count, sizeof(void *));
	tid = 0;
	bid = 0;
	for (bidz = 0; bidz < function->group_count3[2]; bidz++)
	{
		for (bidy = 0; bidy < function->group_count3[1]; bidy++)
		{
			for (bidx = 0; bidx < function->group_count3[0]; bidx++)
			{
				/* Assign threadblock ID */
				threadblock = grid->threadblocks[bid];
				threadblock->grid = grid;
				threadblock->id_3d[0] = bidx;
				threadblock->id_3d[1] = bidy;
				threadblock->id_3d[2] = bidz;
				threadblock->id = bid;
				frm_threadblock_set_status(threadblock, frm_threadblock_pending);

				/* First, last, and number of threads in threadblock */
				threadblock->thread_id_first = tid;
				threadblock->thread_id_last = tid + function->local_size - 1;
				threadblock->thread_count = function->local_size;
				threadblock->threads = &grid->threads[tid];
				snprintf(threadblock->name, sizeof(threadblock->name), "threadblock[i%d-i%d]",
					threadblock->thread_id_first, threadblock->thread_id_last);

				/* First ,last, and number of warps in threadblock */
				threadblock->warp_id_first = bid * grid->warps_per_threadblock;
				threadblock->warp_id_last = threadblock->warp_id_first + grid->warps_per_threadblock - 1;
				threadblock->warp_count = grid->warps_per_threadblock;
				threadblock->warps = &grid->warps[threadblock->warp_id_first];

				/* Iterate through threads */
				lid = 0;
				for (lidz = 0; lidz < function->local_size3[2]; lidz++)
				{
					for (lidy = 0; lidy < function->local_size3[1]; lidy++)
					{
						for (lidx = 0; lidx < function->local_size3[0]; lidx++)
						{
							/* Warp ID */
							wid = bid * grid->warps_per_threadblock +
								lid / frm_emu_warp_size;
							assert(wid < grid->warp_count);
							warp = grid->warps[wid];
							
							/* Create thread */
							grid->threads[tid] = frm_thread_create();
							thread = grid->threads[tid];
							thread->grid = grid;

							/* Global IDs */
							thread->id_3d[0] = bidx * function->local_size3[0] + lidx;
							thread->id_3d[1] = bidy * function->local_size3[1] + lidy;
							thread->id_3d[2] = bidz * function->local_size3[2] + lidz;
							thread->id = tid;

							/* Local IDs */
							thread->id_in_threadblock_3d[0] = lidx;
							thread->id_in_threadblock_3d[1] = lidy;
							thread->id_in_threadblock_3d[2] = lidz;
							thread->id_in_threadblock = lid;

							/* Other */
							thread->id_in_warp = thread->id_in_threadblock % frm_emu_warp_size;
							thread->threadblock = grid->threadblocks[bid];
							thread->warp = grid->warps[wid];

							/* First, last, and number of threads in warp */
							if (!warp->thread_count) {
								warp->thread_id_first = tid;
								warp->threads = &grid->threads[tid];
							}
							warp->thread_count++;
							warp->thread_id_last = tid;
							bit_map_set(warp->active_stack, thread->id_in_warp, 1, 1);

                                                        /* Save local IDs in register R0 */
                                                        thread->sr[FRM_SR_Tid_X].v.i = lidx;  /* R0.x */
                                                        thread->sr[FRM_SR_Tid_Y].v.i = lidy;  /* R0.y */
                                                        thread->sr[FRM_SR_Tid_Z].v.i = lidz;  /* R0.z */

                                                        /* Save threadblock IDs in register R1 */
                                                        thread->sr[FRM_SR_CTAid_X].v.i = bidx;  /* R1.x */
                                                        thread->sr[FRM_SR_CTAid_Y].v.i = bidy;  /* R1.y */
                                                        thread->sr[FRM_SR_CTAid_Z].v.i = bidz;  /* R1.z */

							/* Next thread */
							tid++;
							lid++;
						}
					}
				}

				/* Next threadblock */
				bid++;
			}
		}
	}

	/* Assign names to warps */
	for (wid = 0; wid < grid->warp_count; wid++)
	{
		warp = grid->warps[wid];
		snprintf(warp->name, sizeof(warp->name), "warp[i%d-i%d]",
			warp->thread_id_first, warp->thread_id_last);

		/* Initialize warp program counter */
                warp->buf_start = function->function_buffer.ptr;
                warp->buf = warp->buf_start;
                warp->buf_size = function->function_buffer.size;
	}

	/* Debug */
	printf("local_size = %d (%d,%d,%d)\n", function->local_size, function->local_size3[0],
		function->local_size3[1], function->local_size3[2]);
	printf("global_size = %d (%d,%d,%d)\n", function->global_size, function->global_size3[0],
		function->global_size3[1], function->global_size3[2]);
	printf("group_count = %d (%d,%d,%d)\n", function->group_count, function->group_count3[0],
		function->group_count3[1], function->group_count3[2]);
	printf("warp_count = %d\n", grid->warp_count);
	printf("warps_per_threadblock = %d\n", grid->warps_per_threadblock);
	printf(" tid tid2 tid1 tid0   bid bid2 bid1 bid0   lid lid2 lid1 lid0  warp            work-group\n");
	for (tid = 0; tid < grid->thread_count; tid++)
	{
		thread = grid->threads[tid];
		warp = thread->warp;
		threadblock = thread->threadblock;
		printf("%4d %4d %4d %4d  ", thread->id, thread->id_3d[2],
			thread->id_3d[1], thread->id_3d[0]);
		printf("%4d %4d %4d %4d  ", threadblock->id, threadblock->id_3d[2],
			threadblock->id_3d[1], threadblock->id_3d[0]);
		printf("%4d %4d %4d %4d  ", thread->id_in_threadblock, thread->id_in_threadblock_3d[2],
			thread->id_in_threadblock_3d[1], thread->id_in_threadblock_3d[0]);
		printf("%20s.%-4d  ", warp->name, thread->id_in_warp);
		printf("%20s.%-4d\n", threadblock->name, thread->id_in_threadblock);
	}

}
Пример #2
0
static void frm_grid_setup_arrays(struct frm_grid_t *grid) {
  struct frm_thread_block_t *thread_block;
  struct frm_warp_t *warp;
  struct frm_thread_t *thread;

  int bid; /* Thread block ID */
  int wid; /* Warp ID iterator */
  int tid; /* Thread ID iterator */

  /* Create array/lists of thread blocks */
  grid->thread_block_count = grid->block_count;
  grid->thread_blocks = (struct frm_thread_block_t **)xcalloc(
      grid->block_count, sizeof(struct frm_thread_block_t *));
  grid->pending_thread_blocks = list_create();
  grid->running_thread_blocks = list_create();
  grid->finished_thread_blocks = list_create();

  for (bid = 0; bid < grid->block_count; bid++) {
    /* Create new thread block */
    thread_block = frm_thread_block_create();
    grid->thread_blocks[bid] = thread_block;

    /* Initialize thread block */
    thread_block->id = bid;
    snprintf(thread_block->name, sizeof(thread_block->name),
             "thread-block[g%d-b%d]", grid->id, thread_block->id);
    thread_block->grid = grid;

    /* Add to pending list */
    list_add(grid->pending_thread_blocks, thread_block);

    /* Create array/lists of warps */
    thread_block->warp_count =
        (grid->block_size + frm_emu_warp_size - 1) / frm_emu_warp_size;
    thread_block->warps = (struct frm_warp_t **)xcalloc(
        thread_block->warp_count, sizeof(struct frm_warp_t *));
    thread_block->running_warps = list_create();
    thread_block->finished_warps = list_create();

    for (wid = 0; wid < thread_block->warp_count; wid++) {
      /* Create new warp */
      warp = frm_warp_create();
      thread_block->warps[wid] = warp;

      /* Initialize warp */
      warp->id = wid + bid * thread_block->warp_count;
      warp->id_in_thread_block = wid;
      snprintf(warp->name, sizeof(warp->name), "warp[g%d-b%d-w%d]", grid->id,
               thread_block->id, warp->id_in_thread_block);
      warp->grid = grid;
      warp->thread_block = thread_block;
      warp->inst_buffer = grid->function->inst_buffer;
      warp->inst_buffer_size = grid->function->inst_buffer_size;
      if (wid < thread_block->warp_count - 1)
        warp->thread_count = frm_emu_warp_size;
      else
        warp->thread_count = grid->block_size -
                             (thread_block->warp_count - 1) * frm_emu_warp_size;
      warp->threads = (struct frm_thread_t **)xcalloc(
          warp->thread_count, sizeof(struct frm_thread_t *));

      /* Add to running list */
      list_add(thread_block->running_warps, warp);
    }

    /* Create array/lists of threads */
    thread_block->thread_count = grid->block_size;
    thread_block->threads = (struct frm_thread_t **)xcalloc(
        thread_block->thread_count, sizeof(struct frm_thread_t *));

    for (tid = 0; tid < thread_block->thread_count; tid++) {
      /* Create new thread */
      thread = frm_thread_create();
      thread_block->threads[tid] = thread;

      /* Initialize thread */
      thread->id = tid + bid * thread_block->thread_count;
      thread->id_in_warp = tid % frm_emu_warp_size;
      thread->id_in_thread_block = tid;
      thread->warp = thread_block->warps[tid / frm_emu_warp_size];
      thread->thread_block = thread_block;
      thread->grid = grid;

      /* Save thread IDs in special register R0 */
      thread->sr[FRM_SR_Tid_X].v.i = tid % grid->block_size3[0];
      thread->sr[FRM_SR_Tid_Y].v.i = tid / grid->block_size3[0];
      thread->sr[FRM_SR_Tid_Z].v.i =
          tid / (grid->block_size3[0] * grid->block_size3[1]);

      /* Save thread block IDs in special register R1 */
      thread->sr[FRM_SR_CTAid_X].v.i = bid % grid->block_count3[0];
      thread->sr[FRM_SR_CTAid_Y].v.i = bid / grid->block_count3[0];
      thread->sr[FRM_SR_CTAid_Z].v.i =
          bid / (grid->block_count3[0] * grid->block_count3[1]);

      /* Set predicate register #7 to 1 */
      thread->pr[7] = 1;

      /* Link thread with warp */
      thread->warp->threads[thread->id_in_warp] = thread;
    }
  }
}