Example #1
0
static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
                                    struct pipe_fence_handle **out_fence)
{
   struct amdgpu_winsys *ws = cs->ctx->ws;
   struct pipe_fence_handle *fence;
   int i, j, r;

   /* Create a fence. */
   fence = amdgpu_fence_create(cs->ctx,
                               cs->request.ip_type,
                               cs->request.ip_instance,
                               cs->request.ring);
   if (out_fence)
      amdgpu_fence_reference(out_fence, fence);

   cs->request.number_of_dependencies = 0;

   /* Since the kernel driver doesn't synchronize execution between different
    * rings automatically, we have to add fence dependencies manually. */
   pipe_mutex_lock(ws->bo_fence_lock);
   for (i = 0; i < cs->num_buffers; i++) {
      for (j = 0; j < RING_LAST; j++) {
         struct amdgpu_cs_fence *dep;
         unsigned idx;

         struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j];
         if (!bo_fence)
            continue;

         if (bo_fence->ctx == cs->ctx &&
             bo_fence->fence.ip_type == cs->request.ip_type &&
             bo_fence->fence.ip_instance == cs->request.ip_instance &&
             bo_fence->fence.ring == cs->request.ring)
            continue;

         if (amdgpu_fence_wait((void *)bo_fence, 0, false))
            continue;

         idx = cs->request.number_of_dependencies++;
         if (idx >= cs->max_dependencies) {
            unsigned size;

            cs->max_dependencies = idx + 8;
            size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
            cs->request.dependencies = realloc(cs->request.dependencies, size);
         }

         dep = &cs->request.dependencies[idx];
         memcpy(dep, &bo_fence->fence, sizeof(*dep));
      }
   }

   cs->request.fence_info.handle = NULL;
   if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) {
	cs->request.fence_info.handle = cs->ctx->user_fence_bo;
	cs->request.fence_info.offset = cs->base.ring_type;
   }

   r = amdgpu_cs_submit(cs->ctx->ctx, 0, &cs->request, 1);
   if (r) {
      if (r == -ENOMEM)
         fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
      else
         fprintf(stderr, "amdgpu: The CS has been rejected, "
                 "see dmesg for more information.\n");

      amdgpu_fence_signalled(fence);
   } else {
      /* Success. */
      uint64_t *user_fence = NULL;
      if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE)
         user_fence = cs->ctx->user_fence_cpu_address_base +
                      cs->request.fence_info.offset;
      amdgpu_fence_submitted(fence, &cs->request, user_fence);

      for (i = 0; i < cs->num_buffers; i++)
         amdgpu_fence_reference(&cs->buffers[i].bo->fence[cs->base.ring_type],
                                fence);
   }
   pipe_mutex_unlock(ws->bo_fence_lock);
   amdgpu_fence_reference(&fence, NULL);
}
Example #2
0
static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws,
                                          struct pipe_fence_handle *fence,
                                          uint64_t timeout)
{
   return amdgpu_fence_wait(fence, timeout, false);
}
Example #3
0
/**
 * cik_sdma_ring_test_ib - test an IB on the DMA engine
 *
 * @ring: amdgpu_ring structure holding ring information
 *
 * Test a simple IB in the DMA ring (CIK).
 * Returns 0 on success, error on failure.
 */
static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
{
    struct amdgpu_device *adev = ring->adev;
    struct amdgpu_ib ib;
    unsigned i;
    unsigned index;
    int r;
    u32 tmp = 0;
    u64 gpu_addr;

    r = amdgpu_wb_get(adev, &index);
    if (r) {
        dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
        return r;
    }

    gpu_addr = adev->wb.gpu_addr + (index * 4);
    tmp = 0xCAFEDEAD;
    adev->wb.wb[index] = cpu_to_le32(tmp);

    r = amdgpu_ib_get(ring, NULL, 256, &ib);
    if (r) {
        amdgpu_wb_free(adev, index);
        DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
        return r;
    }

    ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
    ib.ptr[1] = lower_32_bits(gpu_addr);
    ib.ptr[2] = upper_32_bits(gpu_addr);
    ib.ptr[3] = 1;
    ib.ptr[4] = 0xDEADBEEF;
    ib.length_dw = 5;

    r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
    if (r) {
        amdgpu_ib_free(adev, &ib);
        amdgpu_wb_free(adev, index);
        DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
        return r;
    }
    r = amdgpu_fence_wait(ib.fence, false);
    if (r) {
        amdgpu_ib_free(adev, &ib);
        amdgpu_wb_free(adev, index);
        DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
        return r;
    }
    for (i = 0; i < adev->usec_timeout; i++) {
        tmp = le32_to_cpu(adev->wb.wb[index]);
        if (tmp == 0xDEADBEEF)
            break;
        DRM_UDELAY(1);
    }
    if (i < adev->usec_timeout) {
        DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
                 ib.fence->ring->idx, i);
    } else {
        DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
        r = -EINVAL;
    }
    amdgpu_ib_free(adev, &ib);
    amdgpu_wb_free(adev, index);
    return r;
}
Example #4
0
static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
                           enum radeon_bo_usage usage)
{
   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
   struct amdgpu_winsys *ws = bo->rws;
   int i;

   if (bo->is_shared) {
      /* We can't use user fences for shared buffers, because user fences
       * are local to this process only. If we want to wait for all buffer
       * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
       */
      bool buffer_busy = true;
      int r;

      r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
      if (r)
         fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
                 r);
      return !buffer_busy;
   }

   if (timeout == 0) {
      /* Timeout == 0 is quite simple. */
      pipe_mutex_lock(ws->bo_fence_lock);
      for (i = 0; i < RING_LAST; i++)
         if (bo->fence[i]) {
            if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
               /* Release the idle fence to avoid checking it again later. */
               amdgpu_fence_reference(&bo->fence[i], NULL);
            } else {
               pipe_mutex_unlock(ws->bo_fence_lock);
               return false;
            }
         }
      pipe_mutex_unlock(ws->bo_fence_lock);
      return true;

   } else {
      struct pipe_fence_handle *fence[RING_LAST] = {};
      bool fence_idle[RING_LAST] = {};
      bool buffer_idle = true;
      int64_t abs_timeout = os_time_get_absolute_timeout(timeout);

      /* Take references to all fences, so that we can wait for them
       * without the lock. */
      pipe_mutex_lock(ws->bo_fence_lock);
      for (i = 0; i < RING_LAST; i++)
         amdgpu_fence_reference(&fence[i], bo->fence[i]);
      pipe_mutex_unlock(ws->bo_fence_lock);

      /* Now wait for the fences. */
      for (i = 0; i < RING_LAST; i++) {
         if (fence[i]) {
            if (amdgpu_fence_wait(fence[i], abs_timeout, true))
               fence_idle[i] = true;
            else
               buffer_idle = false;
         }
      }

      /* Release idle fences to avoid checking them again later. */
      pipe_mutex_lock(ws->bo_fence_lock);
      for (i = 0; i < RING_LAST; i++) {
         if (fence[i] == bo->fence[i] && fence_idle[i])
            amdgpu_fence_reference(&bo->fence[i], NULL);

         amdgpu_fence_reference(&fence[i], NULL);
      }
      pipe_mutex_unlock(ws->bo_fence_lock);

      return buffer_idle;
   }
}
Example #5
0
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
static void amdgpu_do_test_moves(struct amdgpu_device *adev)
{
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
	struct amdgpu_bo *vram_obj = NULL;
	struct amdgpu_bo **gtt_obj = NULL;
	uint64_t gtt_addr, vram_addr;
	unsigned n, size;
	int i, r;

	size = 1024 * 1024;

	/* Number of tests =
	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
	 */
	n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024;
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
		if (adev->rings[i])
			n -= adev->rings[i]->ring_size;
	if (adev->wb.wb_obj)
		n -= AMDGPU_GPU_PAGE_SIZE;
	if (adev->irq.ih.ring_obj)
		n -= adev->irq.ih.ring_size;
	n /= size;

	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
	if (!gtt_obj) {
		DRM_ERROR("Failed to allocate %d pointers\n", n);
		r = 1;
		goto out_cleanup;
	}

	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0,
			     NULL, &vram_obj);
	if (r) {
		DRM_ERROR("Failed to create VRAM object\n");
		goto out_cleanup;
	}
	r = amdgpu_bo_reserve(vram_obj, false);
	if (unlikely(r != 0))
		goto out_unref;
	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
	if (r) {
		DRM_ERROR("Failed to pin VRAM object\n");
		goto out_unres;
	}
	for (i = 0; i < n; i++) {
		void *gtt_map, *vram_map;
		void **gtt_start, **gtt_end;
		void **vram_start, **vram_end;
		struct amdgpu_fence *fence = NULL;

		r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i);
		if (r) {
			DRM_ERROR("Failed to create GTT object %d\n", i);
			goto out_lclean;
		}

		r = amdgpu_bo_reserve(gtt_obj[i], false);
		if (unlikely(r != 0))
			goto out_lclean_unref;
		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gtt_addr);
		if (r) {
			DRM_ERROR("Failed to pin GTT object %d\n", i);
			goto out_lclean_unres;
		}

		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
		if (r) {
			DRM_ERROR("Failed to map GTT object %d\n", i);
			goto out_lclean_unpin;
		}

		for (gtt_start = gtt_map, gtt_end = gtt_map + size;
		     gtt_start < gtt_end;
		     gtt_start++)
			*gtt_start = gtt_start;

		amdgpu_bo_kunmap(gtt_obj[i]);

		r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr,
				       size, NULL, &fence);

		if (r) {
			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
			goto out_lclean_unpin;
		}

		r = amdgpu_fence_wait(fence, false);
		if (r) {
			DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i);
			goto out_lclean_unpin;
		}

		amdgpu_fence_unref(&fence);

		r = amdgpu_bo_kmap(vram_obj, &vram_map);
		if (r) {
			DRM_ERROR("Failed to map VRAM object after copy %d\n", i);
			goto out_lclean_unpin;
		}

		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
		     vram_start = vram_map, vram_end = vram_map + size;
		     vram_start < vram_end;
		     gtt_start++, vram_start++) {
			if (*vram_start != gtt_start) {
				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
					  "expected 0x%p (GTT/VRAM offset "
					  "0x%16llx/0x%16llx)\n",
					  i, *vram_start, gtt_start,
					  (unsigned long long)
					  (gtt_addr - adev->mc.gtt_start +
					   (void*)gtt_start - gtt_map),
					  (unsigned long long)
					  (vram_addr - adev->mc.vram_start +
					   (void*)gtt_start - gtt_map));
				amdgpu_bo_kunmap(vram_obj);
				goto out_lclean_unpin;
			}
			*vram_start = vram_start;
		}

		amdgpu_bo_kunmap(vram_obj);

		r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr,
				       size, NULL, &fence);

		if (r) {
			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
			goto out_lclean_unpin;
		}

		r = amdgpu_fence_wait(fence, false);
		if (r) {
			DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i);
			goto out_lclean_unpin;
		}

		amdgpu_fence_unref(&fence);

		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
		if (r) {
			DRM_ERROR("Failed to map GTT object after copy %d\n", i);
			goto out_lclean_unpin;
		}

		for (gtt_start = gtt_map, gtt_end = gtt_map + size,
		     vram_start = vram_map, vram_end = vram_map + size;
		     gtt_start < gtt_end;
		     gtt_start++, vram_start++) {
			if (*gtt_start != vram_start) {
				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
					  "expected 0x%p (VRAM/GTT offset "
					  "0x%16llx/0x%16llx)\n",
					  i, *gtt_start, vram_start,
					  (unsigned long long)
					  (vram_addr - adev->mc.vram_start +
					   (void*)vram_start - vram_map),
					  (unsigned long long)
					  (gtt_addr - adev->mc.gtt_start +
					   (void*)vram_start - vram_map));
				amdgpu_bo_kunmap(gtt_obj[i]);
				goto out_lclean_unpin;
			}
		}

		amdgpu_bo_kunmap(gtt_obj[i]);

		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
			 gtt_addr - adev->mc.gtt_start);
		continue;

out_lclean_unpin:
		amdgpu_bo_unpin(gtt_obj[i]);
out_lclean_unres:
		amdgpu_bo_unreserve(gtt_obj[i]);
out_lclean_unref:
		amdgpu_bo_unref(&gtt_obj[i]);
out_lclean:
		for (--i; i >= 0; --i) {
			amdgpu_bo_unpin(gtt_obj[i]);
			amdgpu_bo_unreserve(gtt_obj[i]);
			amdgpu_bo_unref(&gtt_obj[i]);
		}
		if (fence)
			amdgpu_fence_unref(&fence);
		break;
	}

	amdgpu_bo_unpin(vram_obj);
out_unres:
	amdgpu_bo_unreserve(vram_obj);
out_unref:
	amdgpu_bo_unref(&vram_obj);
out_cleanup:
	kfree(gtt_obj);
	if (r) {
		printk(KERN_WARNING "Error while testing BO move.\n");
	}
}