Пример #1
0
void GetVector(byte *picture, int width, int height, byte ref_block[16][16], int ref_block_num)
{
  int i, j, i_max, j_max, vectorX = 0, vectorY = 0, refX, refY;
  int MAD0 = 0, mad;

  refX = (((ref_block_num+1) * 16) % width)-16;
  refY = ((ref_block_num+1) * 16 / width);

  i = refX - 16;
  j = refY - 16;
  i_max = refX + 16;
  j_max = refY + 16;

  if (i < 0) i = 0;
  if (j < 0) j = 0;
  if (i_max > width) i_max = width;
  if (j_max > height) j_max = height;

  MAD0 = MAD((byte*)ref_block, picture + ref_block_num * 256);

  for (i; i < i_max; i++)
  {
    for (j; j < j_max; j++)
    {
      mad = MAD((byte*)ref_block, picture + i * i_max + j);
      if (mad < MAD0)
      {
        MAD0 = mad;
        vectorX = i - refX;
        vectorY = j - refY;
      }
    }
  }

  printf("blok %d : (%d,%d)\n", ref_block_num, vectorX, vectorY);
}
void refine_fast_s(double **x, double *y, double *weights,
			int n, int p, double *res,
			double *tmp, double *tmp2,
			double **tmp_mat, double **tmp_mat2,
			double *beta_cand, int kk,
			int conv, double b, double rhoc,
			double *is,
			double *beta_ref, double *scale)
{
/*
// weights = vector of length n
// res = vector of length n
// x = matrix with the data
// y = vector with responses
// tmp = aux vector of length n
// tmp2 = aux vector of length n
// tmp_mat = aux matrix p x p
// tmp_mat2 = aux matrix p x (p+1)
*/
void fast_s_irwls(double **x, double *y,
		double *weights, int n, int p, double *beta_ref,
		double **tmp_mat, double *tmp, double *tmp2);
double norm_diff(double *x, double *y, int n);
double norm(double *x, int n);
int lu(double **a,int *P, double *x);
void get_weights_rhop(double *r, double s, int n,
		double rhoc, double *w);
void r_sum_w_x(double **x, double *w, int n, int p,
			double *tmp,
			double *sum);
void r_sum_w_x_xprime(double **x, double *w, int n, int p,
			double **tmp, double **ans);
double loss_rho(double *r, double scale, int n, int p, double rhoc);
double MAD(double *a, int n, int center, double *tmp,
			double *tmp2);
double vecprime_vec(double *a, double *b, int n);
register int i,j;
int zeroes=0;
double initial_scale = *is, s0;

for(j=0;j<n;j++)
		if( fabs(res[j] = y[j] - vecprime_vec(x[j], beta_cand, p))
				< ZERO ) zeroes++;
/* if "perfect fit", return it with a 0 assoc. scale */
/* if( zeroes > (((double)n + (double)p)/2.) ) */
if( zeroes > ((double)n /2.) )
{
	// Rprintf("\nToo many zeroes, refine_fast_s\n");
	for(i=0;i<p;i++) beta_ref[i] = beta_cand[i];
	*scale = 0.0;
	return;
};

if( initial_scale < 0.0 )
	initial_scale = MAD(res, n, 0, tmp, tmp2);

s0 = initial_scale;
if( conv > 0 )
		kk = MAX_ITER_FAST_S;
if(kk > 0) {
for(i=0; i < kk; i++) {

	/* one step for the scale */
	s0 = s0 * sqrt( loss_rho(res,
					s0, n, p, rhoc) / b );
	/* compute weights for IRWLS */
	get_weights_rhop(res, s0, n, rhoc, weights);
	/* compute the matrix for IRWLS */
	r_sum_w_x_xprime(x, weights, n, p, tmp_mat,
				tmp_mat2);
	/* compute the vector for IRWLS */
	for(j=0; j<n; j++)
			weights[j] = weights[j] * y[j];
	r_sum_w_x(x, weights, n, p, tmp, tmp2);
	for(j=0; j<p; j++)
		tmp_mat2[j][p] = tmp2[j];
	/* solve the system for IRWLS */
	lu(tmp_mat2, &p, beta_ref);
	/* check for convergence? */
	if(conv > 0) {
		if(norm_diff(beta_cand, beta_ref, p) /
					norm(beta_cand, p) < EPSILON ) {
			// Rprintf("\nRelative norm less than EPSILON\n");
			break;
		};
	};
	for(j=0;j<n;j++)
		res[j] = y[j] - vecprime_vec(x[j], beta_ref , p);
	for(j=0; j<p; j++)
		beta_cand[j] = beta_ref[j];
};
};
*scale = s0;
}
Пример #3
0
void
gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
                                               struct brw_reg dst,
                                               struct brw_reg *src)
{
   vec4_instruction *ir = (vec4_instruction *) instruction;

   if (dst.width == BRW_WIDTH_4) {
      /* This happens in attribute fixups for "dual instanced" geometry
       * shaders, since they use attributes that are vec4's.  Since the exec
       * width is only 4, it's essential that the caller set
       * force_writemask_all in order to make sure the instruction is executed
       * regardless of which channels are enabled.
       */
      assert(ir->force_writemask_all);

      /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
       * the following register region restrictions (from Graphics BSpec:
       * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
       * > Register Region Restrictions)
       *
       *     1. ExecSize must be greater than or equal to Width.
       *
       *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
       *        to Width * HorzStride."
       */
      for (int i = 0; i < 3; i++) {
         if (src[i].file == BRW_GENERAL_REGISTER_FILE)
            src[i] = stride(src[i], 4, 4, 1);
      }
   }

   switch (ir->opcode) {
   case BRW_OPCODE_MOV:
      MOV(dst, src[0]);
      break;

   case BRW_OPCODE_ADD:
      ADD(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MUL:
      MUL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MACH:
      MACH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_MAD:
      MAD(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_FRC:
      FRC(dst, src[0]);
      break;

   case BRW_OPCODE_RNDD:
      RNDD(dst, src[0]);
      break;

   case BRW_OPCODE_RNDE:
      RNDE(dst, src[0]);
      break;

   case BRW_OPCODE_RNDZ:
      RNDZ(dst, src[0]);
      break;

   case BRW_OPCODE_AND:
      AND(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_OR:
      OR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_XOR:
      XOR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_NOT:
      NOT(dst, src[0]);
      break;

   case BRW_OPCODE_ASR:
      ASR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHR:
      SHR(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SHL:
      SHL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_CMP:
      CMP(dst, ir->conditional_mod, src[0], src[1]);
      break;

   case BRW_OPCODE_SEL:
      SEL(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DPH:
      DPH(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP4:
      DP4(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP3:
      DP3(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_DP2:
      DP2(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_F32TO16:
      F32TO16(dst, src[0]);
      break;

   case BRW_OPCODE_F16TO32:
      F16TO32(dst, src[0]);
      break;

   case BRW_OPCODE_LRP:
      LRP(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFREV:
      /* BFREV only supports UD type for src and dst. */
      BFREV(retype(dst, BRW_REGISTER_TYPE_UD),
            retype(src[0], BRW_REGISTER_TYPE_UD));
      break;

   case BRW_OPCODE_FBH:
      /* FBH only supports UD type for dst. */
      FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_FBL:
      /* FBL only supports UD type for dst. */
      FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_CBIT:
      /* CBIT only supports UD type for dst. */
      CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
      break;

   case BRW_OPCODE_ADDC:
      ADDC(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_SUBB:
      SUBB(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFE:
      BFE(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_BFI1:
      BFI1(dst, src[0], src[1]);
      break;

   case BRW_OPCODE_BFI2:
      BFI2(dst, src[0], src[1], src[2]);
      break;

   case BRW_OPCODE_IF:
      IF(ir->predicate);
      break;

   case BRW_OPCODE_ELSE:
      ELSE();
      break;

   case BRW_OPCODE_ENDIF:
      ENDIF();
      break;

   case BRW_OPCODE_DO:
      DO();
      break;

   case BRW_OPCODE_BREAK:
      BREAK();
      break;

   case BRW_OPCODE_CONTINUE:
      CONTINUE();
      break;

   case BRW_OPCODE_WHILE:
      WHILE();
      break;

   case SHADER_OPCODE_RCP:
      MATH(BRW_MATH_FUNCTION_INV, dst, src[0]);
      break;

   case SHADER_OPCODE_RSQ:
      MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]);
      break;

   case SHADER_OPCODE_SQRT:
      MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]);
      break;

   case SHADER_OPCODE_EXP2:
      MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]);
      break;

   case SHADER_OPCODE_LOG2:
      MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]);
      break;

   case SHADER_OPCODE_SIN:
      MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]);
      break;

   case SHADER_OPCODE_COS:
      MATH(BRW_MATH_FUNCTION_COS, dst, src[0]);
      break;

   case SHADER_OPCODE_POW:
      MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_QUOTIENT:
      MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_INT_REMAINDER:
      MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]);
      break;

   case SHADER_OPCODE_TEX:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
   case SHADER_OPCODE_TG4:
   case SHADER_OPCODE_TG4_OFFSET:
      generate_tex(ir, dst);
      break;

   case VS_OPCODE_URB_WRITE:
      generate_urb_write(ir, true);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_READ:
      generate_scratch_read(ir, dst, src[0]);
      break;

   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
      generate_scratch_write(ir, dst, src[0], src[1]);
      break;

   case VS_OPCODE_PULL_CONSTANT_LOAD:
   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
      generate_pull_constant_load(ir, dst, src[0], src[1]);
      break;

   case GS_OPCODE_URB_WRITE:
      generate_urb_write(ir, false);
      break;

   case GS_OPCODE_THREAD_END:
      generate_gs_thread_end(ir);
      break;

   case GS_OPCODE_SET_WRITE_OFFSET:
      generate_gs_set_write_offset(dst, src[0], src[1]);
      break;

   case GS_OPCODE_SET_VERTEX_COUNT:
      generate_gs_set_vertex_count(dst, src[0]);
      break;

   case GS_OPCODE_SET_DWORD_2_IMMED:
      generate_gs_set_dword_2_immed(dst, src[0]);
      break;

   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
      generate_gs_prepare_channel_masks(dst);
      break;

   case GS_OPCODE_SET_CHANNEL_MASKS:
      generate_gs_set_channel_masks(dst, src[0]);
      break;

   case SHADER_OPCODE_SHADER_TIME_ADD:
      assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time");
      break;

   case SHADER_OPCODE_UNTYPED_ATOMIC:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC");
      break;

   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
      assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ");
      break;

   case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
      assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+.");
      break;

   default:
      if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) {
         _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
                       opcode_descs[ir->opcode].name);
      } else {
         _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode);
      }
      abort();
   }
}
Пример #4
0
void RunStatsCommand(ProgramData *p, int lcindex, int threadindex, _Stats *s)
{
  int i, j, k, Npct;
  double *tmpdata = NULL, *tmpweight = NULL;
  if(p->NJD[threadindex] <= 0) {
    for(i=0, k=0; i < s->Nvar; i++) {
      for(j=0; j < s->Nstats; j++, k++) {
	s->statsout[threadindex][k] = 0.0;
      }
    }
    return;
  }
  if((tmpdata = (double *) malloc(p->NJD[threadindex]*sizeof(double))) == NULL) {
    error(ERR_MEMALLOC);
  }
  for(i = 0, k=0; i < s->Nvar; i++) {
    if(s->vars[i]->vectortype != VARTOOLS_VECTORTYPE_LC) {
      error(ERR_BADVARIABLETYPE_STATSCOMMAND);
    }
    for(j=0; j < p->NJD[threadindex]; j++) {
      tmpdata[j] = EvaluateVariable_Double(lcindex, threadindex, j, s->vars[i]);
    }
    Npct = 0;
    for(j = 0; j < s->Nstats; j++, k++) {
      switch(s->statstocalc[j]) {
      case VARTOOLS_STATSTYPE_MEAN:
	s->statsout[threadindex][k] = getmean(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_WEIGHTEDMEAN:
	s->statsout[threadindex][k] = getweightedmean(p->NJD[threadindex], tmpdata, p->sig[threadindex]);
	break;
      case VARTOOLS_STATSTYPE_MEDIAN:
	s->statsout[threadindex][k] = median(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDIAN_WEIGHT:
	s->statsout[threadindex][k] = median_weight(p->NJD[threadindex], tmpdata, p->sig[threadindex]);
	break;
      case VARTOOLS_STATSTYPE_STDDEV:
	s->statsout[threadindex][k] = stddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDDEV:
	s->statsout[threadindex][k] = meddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MEDMEDDEV:
	s->statsout[threadindex][k] = medmeddev(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MAD:
	s->statsout[threadindex][k] = MAD(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_KURTOSIS:
	s->statsout[threadindex][k] = kurtosis(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_SKEWNESS:
	s->statsout[threadindex][k] = skewness(p->NJD[threadindex], tmpdata);
	break;
      case VARTOOLS_STATSTYPE_PERCENTILE:
	s->statsout[threadindex][k] = percentile(p->NJD[threadindex], 
							tmpdata,
							s->pctval[Npct]);
	Npct++;
	break;
      case VARTOOLS_STATSTYPE_PERCENTILE_WEIGHT:
	s->statsout[threadindex][k] = percentile_weight(p->NJD[threadindex], 
							tmpdata,
							       p->sig[threadindex],
							s->pctval[Npct]);
	Npct++;
	break;
      case VARTOOLS_STATSTYPE_MAXIMUM:
	s->statsout[threadindex][k] = getmaximum(p->NJD[threadindex],tmpdata);
	break;
      case VARTOOLS_STATSTYPE_MINIMUM:
	s->statsout[threadindex][k] = getminimum(p->NJD[threadindex],tmpdata);
	break;
      case VARTOOLS_STATSTYPE_SUM:
	s->statsout[threadindex][k] = getsum(p->NJD[threadindex],tmpdata);
	break;
      default:
	error(ERR_CODEERROR);
      }
    }
  }
  if(tmpdata != NULL)
    free(tmpdata);
}