Beispiel #1
0
static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
{
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
    boolean status =
        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;

    if (status) {
        cs->csc->validated_crelocs = cs->csc->crelocs;
    } else {
        /* Remove lately-added relocations. The validation failed with them
         * and the CS is about to be flushed because of that. Keep only
         * the already-validated relocations. */
        unsigned i;

        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
        }
        cs->csc->crelocs = cs->csc->validated_crelocs;

        /* Flush if there are any relocs. Clean up otherwise. */
        if (cs->csc->crelocs) {
            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
        } else {
            radeon_cs_context_cleanup(cs->csc);

            assert(cs->base.cdw == 0);
            if (cs->base.cdw != 0) {
                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
            }
        }
    }
    return status;
}
Beispiel #2
0
static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs,
                                                 struct radeon_bo *bo)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
    int i = -1;

    i = radeon_lookup_buffer(csc, bo);

    if (i >= 0) {
        /* For async DMA, every add_buffer call must add a buffer to the list
         * no matter how many duplicates there are. This is due to the fact
         * the DMA CS checker doesn't use NOP packets for offset patching,
         * but always uses the i-th buffer from the list to patch the i-th
         * offset. If there are N offsets in a DMA CS, there must also be N
         * buffers in the relocation list.
         *
         * This doesn't have to be done if virtual memory is enabled,
         * because there is no offset patching with virtual memory.
         */
        if (cs->ring_type != RING_DMA || cs->ws->info.r600_has_virtual_memory) {
            return i;
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->num_relocs >= csc->max_relocs) {
        uint32_t size;
        csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3));

        size = csc->max_relocs * sizeof(csc->relocs_bo[0]);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->num_relocs].bo = NULL;
    csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
    radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->num_relocs];
    reloc->handle = bo->handle;
    reloc->read_domains = 0;
    reloc->write_domain = 0;
    reloc->flags = 0;

    csc->reloc_indices_hashlist[hash] = csc->num_relocs;

    csc->chunks[1].length_dw += RELOC_DWORDS;

    return csc->num_relocs++;
}
Beispiel #3
0
/* Add the given fence to a slab buffer fence list.
 *
 * There is a potential race condition when bo participates in submissions on
 * two or more threads simultaneously. Since we do not know which of the
 * submissions will be sent to the GPU first, we have to keep the fences
 * of all submissions.
 *
 * However, fences that belong to submissions that have already returned from
 * their respective ioctl do not have to be kept, because we know that they
 * will signal earlier.
 */
static void radeon_bo_slab_fence(struct radeon_bo *bo, struct radeon_bo *fence)
{
    unsigned dst;

    assert(fence->num_cs_references);

    /* Cleanup older fences */
    dst = 0;
    for (unsigned src = 0; src < bo->u.slab.num_fences; ++src) {
        if (bo->u.slab.fences[src]->num_cs_references) {
            bo->u.slab.fences[dst] = bo->u.slab.fences[src];
            dst++;
        } else {
            radeon_bo_reference(&bo->u.slab.fences[src], NULL);
        }
    }
    bo->u.slab.num_fences = dst;

    /* Check available space for the new fence */
    if (bo->u.slab.num_fences >= bo->u.slab.max_fences) {
        unsigned new_max_fences = bo->u.slab.max_fences + 1;
        struct radeon_bo **new_fences = REALLOC(bo->u.slab.fences,
                                                bo->u.slab.max_fences * sizeof(*new_fences),
                                                new_max_fences * sizeof(*new_fences));
        if (!new_fences) {
            fprintf(stderr, "radeon_bo_slab_fence: allocation failure, dropping fence\n");
            return;
        }

        bo->u.slab.fences = new_fences;
        bo->u.slab.max_fences = new_max_fences;
    }

    /* Add the new fence */
    bo->u.slab.fences[bo->u.slab.num_fences] = NULL;
    radeon_bo_reference(&bo->u.slab.fences[bo->u.slab.num_fences], fence);
    bo->u.slab.num_fences++;
}
Beispiel #4
0
static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
{
    unsigned i;

    for (i = 0; i < csc->num_relocs; i++) {
        p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
        radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
    }
    for (i = 0; i < csc->num_slab_buffers; ++i) {
        p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references);
        radeon_bo_reference(&csc->slab_buffers[i].bo, NULL);
    }

    csc->num_relocs = 0;
    csc->num_validated_relocs = 0;
    csc->num_slab_buffers = 0;
    csc->chunks[0].length_dw = 0;
    csc->chunks[1].length_dw = 0;

    for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
        csc->reloc_indices_hashlist[i] = -1;
    }
}
Beispiel #5
0
static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
{
    unsigned i;

    for (i = 0; i < csc->crelocs; i++) {
        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
        radeon_bo_reference(&csc->relocs_bo[i], NULL);
    }

    csc->crelocs = 0;
    csc->validated_crelocs = 0;
    csc->chunks[0].length_dw = 0;
    csc->chunks[1].length_dw = 0;
    csc->used_gart = 0;
    csc->used_vram = 0;
    memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
}
Beispiel #6
0
static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs,
                                            struct radeon_bo *bo)
{
    struct radeon_cs_context *csc = cs->csc;
    unsigned hash;
    struct radeon_bo_item *item;
    int idx;
    int real_idx;

    idx = radeon_lookup_buffer(csc, bo);
    if (idx >= 0)
        return idx;

    real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);

    /* Check if the backing array is large enough. */
    if (csc->num_slab_buffers >= csc->max_slab_buffers) {
        unsigned new_max = MAX2(csc->max_slab_buffers + 16,
                                (unsigned)(csc->max_slab_buffers * 1.3));
        struct radeon_bo_item *new_buffers =
            REALLOC(csc->slab_buffers,
                    csc->max_slab_buffers * sizeof(*new_buffers),
                    new_max * sizeof(*new_buffers));
        if (!new_buffers) {
            fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
            return -1;
        }

        csc->max_slab_buffers = new_max;
        csc->slab_buffers = new_buffers;
    }

    /* Initialize the new relocation. */
    idx = csc->num_slab_buffers++;
    item = &csc->slab_buffers[idx];

    item->bo = NULL;
    item->u.slab.real_idx = real_idx;
    radeon_bo_reference(&item->bo, bo);
    p_atomic_inc(&bo->num_cs_references);

    hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
    csc->reloc_indices_hashlist[hash] = idx;

    return idx;
}
Beispiel #7
0
static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
{
    unsigned i;

    for (i = 0; i < csc->crelocs; i++) {
        p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
        radeon_bo_reference(&csc->relocs_bo[i], NULL);
    }

    csc->crelocs = 0;
    csc->validated_crelocs = 0;
    csc->chunks[0].length_dw = 0;
    csc->chunks[1].length_dw = 0;
    csc->used_gart = 0;
    csc->used_vram = 0;

    for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
        csc->reloc_indices_hashlist[i] = -1;
    }
}
Beispiel #8
0
static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
                                 struct radeon_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 enum radeon_bo_domain *added_domains)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
    bool update_hash = TRUE;
    int i;

    *added_domains = 0;
    if (csc->is_handle_added[hash]) {
        i = csc->reloc_indices_hashlist[hash];
        reloc = &csc->relocs[i];
        if (reloc->handle != bo->handle) {
            /* Hash collision, look for the BO in the list of relocs linearly. */
            for (i = csc->crelocs - 1; i >= 0; i--) {
                reloc = &csc->relocs[i];
                if (reloc->handle == bo->handle) {
                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
                    break;
                }
            }
        }

        if (i >= 0) {
            /* On DMA ring we need to emit as many relocation as there is use of the bo
             * thus each time this function is call we should grow add again the bo to
             * the relocation buffer
             *
             * Do not update the hash table if it's dma ring, so that first hash always point
             * to first bo relocation which will the one used by the kernel. Following relocation
             * will be ignore by the kernel memory placement (but still use by the kernel to
             * update the cmd stream with proper buffer offset).
             */
            update_hash = FALSE;
            update_reloc_domains(reloc, rd, wd, added_domains);
            if (cs->base.ring_type != RING_DMA) {
                csc->reloc_indices_hashlist[hash] = i;
                return i;
            }
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->crelocs >= csc->nrelocs) {
        uint32_t size;
        csc->nrelocs += 10;

        size = csc->nrelocs * sizeof(struct radeon_bo*);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->crelocs] = NULL;
    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->crelocs];
    reloc->handle = bo->handle;
    reloc->read_domains = rd;
    reloc->write_domain = wd;
    reloc->flags = 0;

    csc->is_handle_added[hash] = TRUE;
    if (update_hash) {
        csc->reloc_indices_hashlist[hash] = csc->crelocs;
    }

    csc->chunks[1].length_dw += RELOC_DWORDS;

    *added_domains = rd | wd;
    return csc->crelocs++;
}
Beispiel #9
0
static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
                                 struct radeon_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 enum radeon_bo_domain *added_domains)
{
    struct drm_radeon_cs_reloc *reloc;
    unsigned i;
    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;

    if (csc->is_handle_added[hash]) {
        i = csc->reloc_indices_hashlist[hash];
        reloc = &csc->relocs[i];
        if (reloc->handle == bo->handle) {
            update_reloc_domains(reloc, rd, wd, added_domains);
            return i;
        }

        /* Hash collision, look for the BO in the list of relocs linearly. */
        for (i = csc->crelocs; i != 0;) {
            --i;
            reloc = &csc->relocs[i];
            if (reloc->handle == bo->handle) {
                update_reloc_domains(reloc, rd, wd, added_domains);

                csc->reloc_indices_hashlist[hash] = i;
                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
                return i;
            }
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->crelocs >= csc->nrelocs) {
        uint32_t size;
        csc->nrelocs += 10;

        size = csc->nrelocs * sizeof(struct radeon_bo*);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->crelocs] = NULL;
    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->crelocs];
    reloc->handle = bo->handle;
    reloc->read_domains = rd;
    reloc->write_domain = wd;
    reloc->flags = 0;

    csc->is_handle_added[hash] = TRUE;
    csc->reloc_indices_hashlist[hash] = csc->crelocs;

    csc->chunks[1].length_dw += RELOC_DWORDS;

    *added_domains = rd | wd;
    return csc->crelocs++;
}
Beispiel #10
0
static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
                                 struct radeon_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 unsigned priority,
                                 enum radeon_bo_domain *added_domains)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
    int i = -1;

    assert(priority < 64);
    *added_domains = 0;

    i = radeon_lookup_buffer(csc, bo);

    if (i >= 0) {
        reloc = &csc->relocs[i];
        update_reloc(reloc, rd, wd, priority / 4, added_domains);
        csc->relocs_bo[i].priority_usage |= 1llu << priority;

        /* For async DMA, every add_buffer call must add a buffer to the list
         * no matter how many duplicates there are. This is due to the fact
         * the DMA CS checker doesn't use NOP packets for offset patching,
         * but always uses the i-th buffer from the list to patch the i-th
         * offset. If there are N offsets in a DMA CS, there must also be N
         * buffers in the relocation list.
         *
         * This doesn't have to be done if virtual memory is enabled,
         * because there is no offset patching with virtual memory.
         */
        if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
            return i;
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->crelocs >= csc->nrelocs) {
        uint32_t size;
        csc->nrelocs += 10;

        size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->crelocs].bo = NULL;
    csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
    radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->crelocs];
    reloc->handle = bo->handle;
    reloc->read_domains = rd;
    reloc->write_domain = wd;
    reloc->flags = priority / 4;

    csc->reloc_indices_hashlist[hash] = csc->crelocs;

    csc->chunks[1].length_dw += RELOC_DWORDS;

    *added_domains = rd | wd;
    return csc->crelocs++;
}
Beispiel #11
0
static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
                                 struct radeon_bo *bo,
                                 enum radeon_bo_usage usage,
                                 enum radeon_bo_domain domains,
                                 unsigned priority,
                                 enum radeon_bo_domain *added_domains)
{
    struct radeon_cs_context *csc = cs->csc;
    struct drm_radeon_cs_reloc *reloc;
    unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
    enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
    enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
    bool update_hash = TRUE;
    int i;

    priority = MIN2(priority, 15);
    *added_domains = 0;

    if (csc->is_handle_added[hash]) {
        i = csc->reloc_indices_hashlist[hash];
        reloc = &csc->relocs[i];

        if (reloc->handle != bo->handle) {
            /* Hash collision, look for the BO in the list of relocs linearly. */
            for (i = csc->crelocs - 1; i >= 0; i--) {
                reloc = &csc->relocs[i];
                if (reloc->handle == bo->handle) {
                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
                    break;
                }
            }
        }

        if (i >= 0) {
            update_reloc(reloc, rd, wd, priority, added_domains);

            /* For async DMA, every add_reloc call must add a buffer to the list
             * no matter how many duplicates there are. This is due to the fact
             * the DMA CS checker doesn't use NOP packets for offset patching,
             * but always uses the i-th buffer from the list to patch the i-th
             * offset. If there are N offsets in a DMA CS, there must also be N
             * buffers in the relocation list.
             *
             * This doesn't have to be done if virtual memory is enabled,
             * because there is no offset patching with virtual memory.
             */
            if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
                csc->reloc_indices_hashlist[hash] = i;
                return i;
            }
            update_hash = FALSE;
        }
    }

    /* New relocation, check if the backing array is large enough. */
    if (csc->crelocs >= csc->nrelocs) {
        uint32_t size;
        csc->nrelocs += 10;

        size = csc->nrelocs * sizeof(struct radeon_bo*);
        csc->relocs_bo = realloc(csc->relocs_bo, size);

        size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
        csc->relocs = realloc(csc->relocs, size);

        csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
    }

    /* Initialize the new relocation. */
    csc->relocs_bo[csc->crelocs] = NULL;
    radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
    p_atomic_inc(&bo->num_cs_references);
    reloc = &csc->relocs[csc->crelocs];
    reloc->handle = bo->handle;
    reloc->read_domains = rd;
    reloc->write_domain = wd;
    reloc->flags = priority;

    csc->is_handle_added[hash] = TRUE;
    if (update_hash) {
        csc->reloc_indices_hashlist[hash] = csc->crelocs;
    }

    csc->chunks[1].length_dw += RELOC_DWORDS;

    *added_domains = rd | wd;
    return csc->crelocs++;
}