Exemplo n.º 1
0
ok_status vector_exp(vector * v)
{
	uint i;
	OK_CHECK_VECTOR(v);
	// #ifdef _OPENMP
	// #pragma omp parallel for
	// #endif
	for (i = 0; i < v->size; ++i)
		v->data[i * v->stride] = MATH(exp)(v->data[i * v->stride]);
	return OPTKIT_SUCCESS;
}
Exemplo n.º 2
0
ok_status vector_pow(vector * v, const ok_float x)
{
	uint i;
	OK_CHECK_VECTOR(v);
	// #ifdef _OPENMP
	// #pragma omp parallel for
	// #endif
	for (i = 0; i < v->size; ++i)
		v->data[i * v->stride] = MATH(pow)(v->data[i * v->stride], x);
	return OPTKIT_SUCCESS;
}
Exemplo n.º 3
0
static ok_float __dist_lInf_A_minus_UC_i(const ok_float * A,
	const int * strideA, const size_t * iterA, const upsamplingvec * u,
	const ok_float * C, const int * strideC, const size_t * iterC,
	ok_float * A_blk, const size_t * strideBlk, const size_t * strideIter,
	const size_t * block, const size_t * i, const int * row_length)
{
	ok_float dist;
	size_t idx;
	CBLAS(copy)(*row_length, A + (*i) * (*iterA), (*strideA),
		A_blk + (*i - *block) * (*strideIter),
		(const int) (*strideBlk));
	CBLAS(axpy)(*row_length, -kOne,
		C + u->indices[(*i) * u->stride] * (*iterC), (*strideC),
		A_blk + (*i - *block) * (*strideIter),
		(const int) (*strideBlk));
	idx = (size_t) CBLASI(amax)(*row_length,
		A_blk + (*i - *block) * (*strideIter),
		(const int) (*strideBlk));
	dist = A_blk[(*i - *block) * (*strideIter) + idx * (*strideBlk)];
	return MATH(fabs)(dist);
}
Exemplo n.º 4
0
void
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
                                               struct brw_reg dst,
                                               struct brw_reg *src)
{
   vec4_instruction *ir = (vec4_instruction *) instruction;

   if (dst.width == BRW_WIDTH_4) {
      /* This happens in attribute fixups for "dual instanced" geometry
       * shaders, since they use attributes that are vec4's.  Since the exec
       * width is only 4, it's essential that the caller set
       * force_writemask_all in order to make sure the instruction is executed
       * regardless of which channels are enabled.
       */
      assert(ir->force_writemask_all);

      /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
       * the following register region restrictions (from Graphics BSpec:
       * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
       * > Register Region Restrictions)
       *
       *     1. ExecSize must be greater than or equal to Width.
       *
       *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
       *        to Width * HorzStride."
       */
      for (int i = 0; i < 3; i++) {
         if (src[i].file == BRW_GENERAL_REGISTER_FILE)
            src[i] = stride(src[i], 4, 4, 1);
      }
   }

   switch (ir->opcode) {
   case BRW_OPCODE_MOV:
      MOV(dst, src[0]);
      break;

   case BRW_OPCODE_ADD:
      ADD(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MUL:
      MUL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MACH:
      MACH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MAD:
      MAD(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_FRC:
      FRC(dst, src[0]);
      break;

   case BRW_OPCODE_RNDD:
      RNDD(dst, src[0]);
      break;

   case BRW_OPCODE_RNDE:
      RNDE(dst, src[0]);
      break;

   case BRW_OPCODE_RNDZ:
      RNDZ(dst, src[0]);
      break;

   case BRW_OPCODE_AND:
      AND(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_OR:
      OR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_XOR:
      XOR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_NOT:
      NOT(dst, src[0]);
      break;

   case BRW_OPCODE_ASR:
      ASR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHR:
      SHR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHL:
      SHL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_CMP:
      CMP(dst, ir->conditional_mod, src[0], src[1]);
      break;

   case BRW_OPCODE_SEL:
      SEL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DPH:
      DPH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP4:
      DP4(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP3:
      DP3(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP2:
      DP2(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_F32TO16:
      F32TO16(dst, src[0]);
      break;

   case BRW_OPCODE_F16TO32:
      F16TO32(dst, src[0]);
      break;

   case BRW_OPCODE_LRP:
      LRP(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFREV:
      /* BFREV only supports UD type for src and dst. */
      BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
            retype(src[0], BRW_REGISTER_TYPE_UD));
      break;

   case BRW_OPCODE_FBH:
      /* FBH only supports UD type for dst. */
      FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_FBL:
      /* FBL only supports UD type for dst. */
      FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_CBIT:
      /* CBIT only supports UD type for dst. */
      CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_ADDC:
      ADDC(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SUBB:
      SUBB(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFE:
      BFE(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFI1:
      BFI1(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFI2:
      BFI2(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_IF:
      IF(ir->predicate);
      break;

   case BRW_OPCODE_ELSE:
      ELSE();
      break;

   case BRW_OPCODE_ENDIF:
      ENDIF();
      break;

   case BRW_OPCODE_DO:
      DO();
      break;

   case BRW_OPCODE_BREAK:
      BREAK();
      break;

   case BRW_OPCODE_CONTINUE:
      CONTINUE();
      break;

   case BRW_OPCODE_WHILE:
      WHILE();
      break;

   case SHADER_OPCODE_RCP:
      MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
      break;

   case SHADER_OPCODE_RSQ:
      MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
      break;

   case SHADER_OPCODE_SQRT:
      MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
      break;

   case SHADER_OPCODE_EXP2:
      MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
      break;

   case SHADER_OPCODE_LOG2:
      MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
      break;

   case SHADER_OPCODE_SIN:
      MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
      break;

   case SHADER_OPCODE_COS:
      MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
      break;

   case SHADER_OPCODE_POW:
      MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_QUOTIENT:
      MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_REMAINDER:
      MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_TEX:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
   case SHADER_OPCODE_TG4:
   case SHADER_OPCODE_TG4_OFFSET:
      generate_tex(ir, dst);
      break;

   case VS_OPCODE_URB_WRITE:
      generate_urb_write(ir, true);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_READ:
      generate_scratch_read(ir, dst, src[0]);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
      generate_scratch_write(ir, dst, src[0], src[1]);
      break;

   case VS_OPCODE_PULL_CONSTANT_LOAD:
   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
      generate_pull_constant_load(ir, dst, src[0], src[1]);
      break;

   case GS_OPCODE_URB_WRITE:
      generate_urb_write(ir, false);
      break;

   case GS_OPCODE_THREAD_END:
      generate_gs_thread_end(ir);
      break;

   case GS_OPCODE_SET_WRITE_OFFSET:
      generate_gs_set_write_offset(dst, src[0], src[1]);
      break;

   case GS_OPCODE_SET_VERTEX_COUNT:
      generate_gs_set_vertex_count(dst, src[0]);
      break;

   case GS_OPCODE_SET_DWORD_2_IMMED:
      generate_gs_set_dword_2_immed(dst, src[0]);
      break;

   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
      generate_gs_prepare_channel_masks(dst);
      break;

   case GS_OPCODE_SET_CHANNEL_MASKS:
      generate_gs_set_channel_masks(dst, src[0]);
      break;

   case SHADER_OPCODE_SHADER_TIME_ADD:
      assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
      break;

   case SHADER_OPCODE_UNTYPED_ATOMIC:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
      break;

   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
      break;

   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
      assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
      break;

   default:
      if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
         _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
                       opcode_descs[ir->opcode].name);
      } else {
         _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
      }
      abort();
   }
}
Exemplo n.º 5
0
Obj ModMacfloat( Obj fl, Obj fr )
{
    return NEW_MACFLOAT(MATH(fmod)(VAL_MACFLOAT(fl),VAL_MACFLOAT(fr)));
}
Exemplo n.º 6
0
Obj PowMacfloat( Obj fl, Obj fr )
{
    return NEW_MACFLOAT(MATH(pow)(VAL_MACFLOAT(fl),VAL_MACFLOAT(fr)));
}