예제 #1
0
bool
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
{
   struct nouveau_heap *heap;
   int ret;
   uint32_t size = align(prog->code_size, 0x40);

   switch (prog->type) {
   case PIPE_SHADER_VERTEX:   heap = nv50->screen->vp_code_heap; break;
   case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break;
   case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
   default:
      assert(!"invalid program type");
      return false;
   }

   ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
   if (ret) {
      /* Out of space: evict everything to compactify the code segment, hoping
       * the working set is much smaller and drifts slowly. Improve me !
       */
      while (heap->next) {
         struct nv50_program *evict = heap->next->priv;
         if (evict)
            nouveau_heap_free(&evict->mem);
      }
      debug_printf("WARNING: out of code space, evicting all shaders.\n");
      ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
      if (ret) {
         NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
         return false;
      }
   }
   prog->code_base = prog->mem->start;

   ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
   if (ret < 0) {
      nouveau_heap_free(&prog->mem);
      return false;
   }
   if (ret > 0)
      nv50->state.new_tls_space = true;

   if (prog->fixups)
      nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);

   nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
                       (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
                       NOUVEAU_BO_VRAM, prog->code_size, prog->code);

   BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
   PUSH_DATA (nv50->base.pushbuf, 0);

   return true;
}
예제 #2
0
static void
nv30_vertprog_destroy(struct nv30_vertprog *vp)
{
   util_dynarray_fini(&vp->branch_relocs);
   nouveau_heap_free(&vp->exec);
   FREE(vp->insns);
   vp->insns = NULL;
   vp->nr_insns = 0;

   util_dynarray_fini(&vp->const_relocs);
   nouveau_heap_free(&vp->data);
   FREE(vp->consts);
   vp->consts = NULL;
   vp->nr_consts = 0;

   vp->translated = FALSE;
}
예제 #3
0
static void
nv30_render_destroy(struct vbuf_render *render)
{
   struct nv30_render *r = nv30_render(render);

   if (r->transfer)
      pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer);
   pipe_resource_reference(&r->buffer, NULL);
   nouveau_heap_free(&r->vertprog);
   FREE(render);
}
static void
nv30_query_object_del(struct nv30_screen *screen, struct nv30_query_object **po)
{
   struct nv30_query_object *qo = *po; *po = NULL;
   if (qo) {
      volatile uint32_t *ntfy = nv30_ntfy(screen, qo);
      while (ntfy[3] & 0xff000000) {
      }
      nouveau_heap_free(&qo->hw);
      LIST_DEL(&qo->list);
      FREE(qo);
   }
}
예제 #5
0
void
nv50_fragprog_validate(struct nv50_context *nv50)
{
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   struct nv50_program *fp = nv50->fragprog;
   struct pipe_rasterizer_state *rast = &nv50->rast->pipe;

   if (fp->fp.force_persample_interp != rast->force_persample_interp) {
      /* Force the program to be reuploaded, which will trigger interp fixups
       * to get applied
       */
      if (fp->mem)
         nouveau_heap_free(&fp->mem);

      fp->fp.force_persample_interp = rast->force_persample_interp;
   }

   if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))
      return;

   if (!nv50_program_validate(nv50, fp))
      return;
   nv50_program_update_context_state(nv50, fp, 1);

   BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1);
   PUSH_DATA (push, fp->max_gpr);
   BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1);
   PUSH_DATA (push, fp->max_out);
   BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1);
   PUSH_DATA (push, fp->fp.flags[0]);
   BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1);
   PUSH_DATA (push, fp->fp.flags[1]);
   BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
   PUSH_DATA (push, fp->code_base);

   if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) {
      BEGIN_NV04(push, SUBC_3D(NVA3_3D_FP_MULTISAMPLE), 1);
      if (nv50->min_samples > 1 || fp->fp.has_samplemask)
         PUSH_DATA(push,
                   NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE |
                   (NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK *
                    fp->fp.has_samplemask));
      else
         PUSH_DATA(push, 0);
   }
}
예제 #6
0
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
   const struct pipe_shader_state pipe = p->pipe;
   const ubyte type = p->type;

   if (p->mem)
      nouveau_heap_free(&p->mem);

   FREE(p->code);

   FREE(p->fixups);

   FREE(p->so);

   memset(p, 0, sizeof(*p));

   p->pipe = pipe;
   p->type = type;
}
예제 #7
0
static inline struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context *nv30)
{
   struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
   struct nouveau_heap *vp;

   vp = nv30->blit_vp;
   if (!vp) {
      if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) {
         while (heap->next && heap->size < 2) {
            struct nouveau_heap **evict = heap->next->priv;
            nouveau_heap_free(evict);
         }

         if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp))
            return NULL;
      }

      vp = nv30->blit_vp;
      if (vp) {
         struct nouveau_pushbuf *push = nv30->base.pushbuf;

         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
         PUSH_DATA (push, vp->start);
         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
         PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */
         PUSH_DATA (push, 0x0040000d);
         PUSH_DATA (push, 0x8106c083);
         PUSH_DATA (push, 0x6041ff80);
         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
         PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */
         PUSH_DATA (push, 0x0040080d);
         PUSH_DATA (push, 0x8106c083);
         PUSH_DATA (push, 0x6041ff9d);
      }
   }

   return vp;
}
예제 #8
0
static boolean
nv30_render_validate(struct nv30_context *nv30)
{
   struct nv30_render *r = nv30_render(nv30->draw->render);
   struct nv30_rasterizer_stateobj *rast = nv30->rast;
   struct pipe_screen *pscreen = &nv30->screen->base.base;
   struct nouveau_pushbuf *push = nv30->screen->base.pushbuf;
   struct nouveau_object *eng3d = nv30->screen->eng3d;
   struct nv30_vertprog *vp = nv30->vertprog.program;
   struct vertex_info *vinfo = &r->vertex_info;
   unsigned vp_attribs = 0;
   unsigned vp_results = 0;
   unsigned attrib = 0;
   unsigned pntc;
   int i;

   if (!r->vertprog) {
      struct nouveau_heap *heap = nv30_screen(pscreen)->vp_exec_heap;
      if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog)) {
         while (heap->next && heap->size < 16) {
            struct nouveau_heap **evict = heap->next->priv;
            nouveau_heap_free(evict);
         }

         if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog))
            return FALSE;
      }
   }

   vinfo->num_attribs = 0;
   vinfo->size = 0;

   /* setup routing for all necessary vp outputs */
   for (i = 0; i < vp->info.num_outputs && attrib < 16; i++) {
      uint semantic = vp->info.output_semantic_name[i];
      uint index = vp->info.output_semantic_index[i];
      if (vroute_add(r, attrib, semantic, &index)) {
         vp_attribs |= (1 << attrib++);
         vp_results |= index;
      }
   }

   /* setup routing for replaced point coords not written by vp */
   if (rast && rast->pipe.point_quad_rasterization)
      pntc = rast->pipe.sprite_coord_enable & 0x000002ff;
   else
      pntc = 0;

   while (pntc && attrib < 16) {
      uint index = ffs(pntc) - 1; pntc &= ~(1 << index);
      if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) {
         vp_attribs |= (1 << attrib++);
         vp_results |= index;
      }
   }

   /* modify vertex format for correct stride, and stub out unused ones */
   BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
   PUSH_DATA (push, r->vertprog->start);
   r->vtxprog[attrib - 1][3] |= 1;
   for (i = 0; i < attrib; i++) {
      BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
      PUSH_DATAp(push, r->vtxprog[i], 4);
      r->vtxfmt[i] |= vinfo->size << 8;
   }
   for (; i < 16; i++)
      r->vtxfmt[i]  = NV30_3D_VTXFMT_TYPE_V32_FLOAT;

   BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
   PUSH_DATAf(push, 0.0);
   PUSH_DATAf(push, 0.0);
   PUSH_DATAf(push, 0.0);
   PUSH_DATAf(push, 0.0);
   PUSH_DATAf(push, 1.0);
   PUSH_DATAf(push, 1.0);
   PUSH_DATAf(push, 1.0);
   PUSH_DATAf(push, 1.0);
   BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);
   PUSH_DATAf(push, 0.0);
   PUSH_DATAf(push, 1.0);

   BEGIN_NV04(push, NV30_3D(VTXFMT(0)), 16);
   PUSH_DATAp(push, r->vtxfmt, 16);

   BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
   PUSH_DATA (push, r->vertprog->start);
   BEGIN_NV04(push, NV30_3D(ENGINE), 1);
   PUSH_DATA (push, 0x00000103);
   if (eng3d->oclass >= NV40_3D_CLASS) {
      BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
      PUSH_DATA (push, vp_attribs);
      PUSH_DATA (push, vp_results);
   }

   vinfo->size /= 4;
   return TRUE;
}
예제 #9
0
void
nvc0_fragprog_validate(struct nvc0_context *nvc0)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_program *fp = nvc0->fragprog;
   struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;

   if (fp->fp.force_persample_interp != rast->force_persample_interp) {
      /* Force the program to be reuploaded, which will trigger interp fixups
       * to get applied
       */
      if (fp->mem)
         nouveau_heap_free(&fp->mem);

      fp->fp.force_persample_interp = rast->force_persample_interp;
   }

   /* Shade model works well enough when both colors follow it. However if one
    * (or both) is explicitly set, then we have to go the patching route.
    */
   bool has_explicit_color = fp->fp.colors &&
      (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
       ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
   bool hwflatshade = false;
   if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
      /* Force re-upload */
      if (fp->mem)
         nouveau_heap_free(&fp->mem);

      fp->fp.flatshade = rast->flatshade;

      /* Always smooth-shade in this mode, the shader will decide on its own
       * when to flat-shade.
       */
   } else if (!has_explicit_color) {
      hwflatshade = rast->flatshade;

      /* No need to binary-patch the shader each time, make sure that it's set
       * up for the default behaviour.
       */
      fp->fp.flatshade = 0;
   }

   if (hwflatshade != nvc0->state.flatshade) {
      nvc0->state.flatshade = hwflatshade;
      BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
      PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
                                     NVC0_3D_SHADE_MODEL_SMOOTH);
   }

   if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
      return;
   }

   if (!nvc0_program_validate(nvc0, fp))
         return;
   nvc0_program_update_context_state(nvc0, fp, 4);

   if (fp->fp.early_z != nvc0->state.early_z_forced) {
      nvc0->state.early_z_forced = fp->fp.early_z;
      IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
   }

   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
   PUSH_DATA (push, 0x51);
   PUSH_DATA (push, fp->code_base);
   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
   PUSH_DATA (push, fp->num_gprs);

   BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
   PUSH_DATA (push, 0x20164010);
   PUSH_DATA (push, 0x20);
   BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
   PUSH_DATA (push, fp->flags[0]);
}
예제 #10
0
void
nv30_vertprog_validate(struct nv30_context *nv30)
{
   struct nouveau_pushbuf *push = nv30->base.pushbuf;
   struct nouveau_object *eng3d = nv30->screen->eng3d;
   struct nv30_vertprog *vp = nv30->vertprog.program;
   struct nv30_fragprog *fp = nv30->fragprog.program;
   boolean upload_code = FALSE;
   boolean upload_data = FALSE;
   unsigned i;

   if (nv30->dirty & NV30_NEW_FRAGPROG) {
      if (memcmp(vp->texcoord, fp->texcoord, sizeof(vp->texcoord))) {
         if (vp->translated)
            nv30_vertprog_destroy(vp);
         memcpy(vp->texcoord, fp->texcoord, sizeof(vp->texcoord));
      }
   }

   if (nv30->rast && nv30->rast->pipe.clip_plane_enable != vp->enabled_ucps) {
      vp->enabled_ucps = nv30->rast->pipe.clip_plane_enable;
      if (vp->translated)
         nv30_vertprog_destroy(vp);
   }

   if (!vp->translated) {
      vp->translated = _nvfx_vertprog_translate(eng3d->oclass, vp);
      if (!vp->translated) {
         nv30->draw_flags |= NV30_NEW_VERTPROG;
         return;
      }
      nv30->dirty |= NV30_NEW_VERTPROG;
   }

   if (!vp->exec) {
      struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
      struct nv30_shader_reloc *reloc = vp->branch_relocs.data;
      unsigned nr_reloc = vp->branch_relocs.size / sizeof(*reloc);
      uint32_t *inst, target;

      if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) {
         while (heap->next && heap->size < vp->nr_insns) {
            struct nouveau_heap **evict = heap->next->priv;
            nouveau_heap_free(evict);
         }

         if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) {
            nv30->draw_flags |= NV30_NEW_VERTPROG;
            return;
         }
      }

      if (eng3d->oclass < NV40_3D_CLASS) {
         while (nr_reloc--) {
            inst     = vp->insns[reloc->location].data;
            target   = vp->exec->start + reloc->target;

            inst[2] &= ~0x000007fc;
            inst[2] |= target << 2;
            reloc++;
         }
      } else {
         while (nr_reloc--) {
            inst     = vp->insns[reloc->location].data;
            target   = vp->exec->start + reloc->target;

            inst[2] &= ~0x0000003f;
            inst[2] |= target >> 3;
            inst[3] &= ~0xe0000000;
            inst[3] |= target << 29;
            reloc++;
         }
      }

      upload_code = TRUE;
   }

   if (vp->nr_consts && !vp->data) {
      struct nouveau_heap *heap = nv30->screen->vp_data_heap;
      struct nv30_shader_reloc *reloc = vp->const_relocs.data;
      unsigned nr_reloc = vp->const_relocs.size / sizeof(*reloc);
      uint32_t *inst, target;

      if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) {
         while (heap->next && heap->size < vp->nr_consts) {
            struct nv30_vertprog *evp = heap->next->priv;
            nouveau_heap_free(&evp->data);
         }

         if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) {
            nv30->draw_flags |= NV30_NEW_VERTPROG;
            return;
         }
      }

      if (eng3d->oclass < NV40_3D_CLASS) {
         while (nr_reloc--) {
            inst     = vp->insns[reloc->location].data;
            target   = vp->data->start + reloc->target;

            inst[1] &= ~0x0007fc000;
            inst[1] |= (target & 0x1ff) << 14;
            reloc++;
         }
      } else {
         while (nr_reloc--) {
            inst     = vp->insns[reloc->location].data;
            target   = vp->data->start + reloc->target;

            inst[1] &= ~0x0001ff000;
            inst[1] |= (target & 0x1ff) << 12;
            reloc++;
         }
      }

      upload_code = TRUE;
      upload_data = TRUE;
   }

   if (vp->nr_consts) {
      struct nv04_resource *res = nv04_resource(nv30->vertprog.constbuf);

      for (i = 0; i < vp->nr_consts; i++) {
         struct nv30_vertprog_data *data = &vp->consts[i];

         if (data->index < 0) {
            if (!upload_data)
               continue;
         } else {
            float *constbuf = (float *)res->data;
            if (!upload_data &&
                !memcmp(data->value, &constbuf[data->index * 4], 16))
               continue;
            memcpy(data->value, &constbuf[data->index * 4], 16);
         }

         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
         PUSH_DATA (push, vp->data->start + i);
         PUSH_DATAp(push, data->value, 4);
      }
   }

   if (upload_code) {
      BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
      PUSH_DATA (push, vp->exec->start);
      for (i = 0; i < vp->nr_insns; i++) {
         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
         PUSH_DATAp(push, vp->insns[i].data, 4);
      }
   }

   if (nv30->dirty & (NV30_NEW_VERTPROG | NV30_NEW_FRAGPROG)) {
      BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
      PUSH_DATA (push, vp->exec->start);
      if (eng3d->oclass < NV40_3D_CLASS) {
         BEGIN_NV04(push, NV30_3D(ENGINE), 1);
         PUSH_DATA (push, 0x00000013); /* vp instead of ff, somehow */
      } else {
         BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
         PUSH_DATA (push, vp->ir);
         PUSH_DATA (push, vp->or | fp->vp_or);
         BEGIN_NV04(push, NV30_3D(ENGINE), 1);
         PUSH_DATA (push, 0x00000011);
      }
   }
}