Exemple #1
0
void vp9_fwht4x4_msa(const int16_t *input, int16_t *output,
                     int32_t src_stride) {
  v8i16 in0, in1, in2, in3, in4;

  LD_SH4(input, src_stride, in0, in1, in2, in3);

  in0 += in1;
  in3 -= in2;
  in4 = (in0 - in3) >> 1;
  SUB2(in4, in1, in4, in2, in1, in2);
  in0 -= in2;
  in3 += in1;

  TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);

  in0 += in2;
  in1 -= in3;
  in4 = (in0 - in1) >> 1;
  SUB2(in4, in2, in4, in3, in2, in3);
  in0 -= in3;
  in1 += in2;

  SLLI_4V(in0, in1, in2, in3, 2);

  TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);

  ST4x2_UB(in0, output, 4);
  ST4x2_UB(in3, output + 4, 4);
  ST4x2_UB(in1, output + 8, 4);
  ST4x2_UB(in2, output + 12, 4);
}
Exemple #2
0
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
                                            const uint8_t* pred,
                                            uint8_t* dst, int length) {
  v16u8 src0, pred0, dst0;
  assert(length >= 0);
  while (length >= 32) {
    v16u8 src1, pred1, dst1;
    LD_UB2(src, 16, src0, src1);
    LD_UB2(pred, 16, pred0, pred1);
    SUB2(src0, pred0, src1, pred1, dst0, dst1);
    ST_UB2(dst0, dst1, dst, 16);
    src += 32;
    pred += 32;
    dst += 32;
    length -= 32;
  }
  if (length > 0) {
    int i;
    if (length >= 16) {
      src0 = LD_UB(src);
      pred0 = LD_UB(pred);
      dst0 = src0 - pred0;
      ST_UB(dst0, dst);
      src += 16;
      pred += 16;
      dst += 16;
      length -= 16;
    }
    for (i = 0; i < length; i++) {
      dst[i] = src[i] - pred[i];
    }
  }
}
Exemple #3
0
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
                                            const uint8_t* ppred,
                                            uint8_t* poutput, int stride,
                                            int size) {
  int w;
  const v16i8 zero = { 0 };
  while (size >= 16) {
    v16u8 pred0, dst0;
    v8i16 a0, a1, b0, b1, c0, c1;
    const v16u8 tmp0 = LD_UB(ppred - 1);
    const v16u8 tmp1 = LD_UB(ppred - stride);
    const v16u8 tmp2 = LD_UB(ppred - stride - 1);
    const v16u8 src0 = LD_UB(pinput);
    ILVRL_B2_SH(zero, tmp0, a0, a1);
    ILVRL_B2_SH(zero, tmp1, b0, b1);
    ILVRL_B2_SH(zero, tmp2, c0, c1);
    ADD2(a0, b0, a1, b1, a0, a1);
    SUB2(a0, c0, a1, c1, a0, a1);
    CLIP_SH2_0_255(a0, a1);
    pred0 = (v16u8)__msa_pckev_b((v16i8)a1, (v16i8)a0);
    dst0 = src0 - pred0;
    ST_UB(dst0, poutput);
    ppred += 16;
    pinput += 16;
    poutput += 16;
    size -= 16;
  }
  for (w = 0; w < size; ++w) {
    const int pred = ppred[w - 1] + ppred[w - stride] - ppred[w - stride - 1];
    poutput[w] = pinput[w] - (pred < 0 ? 0 : pred > 255 ? 255 : pred);
  }
}
void __dubcos(double x, double dx, double v[]) {
  double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee,
    sn,ssn,cs,ccs,ds,dss,dc,dcc;
#if 0
  double xx,y,yy,z,zz;
#endif
  mynumber u;
  int4 k;
  u.x=x+big.x;
  k = u.i[LOW_HALF]<<2;
  x=x-(u.x-big.x);
  d=x+dx;
  dd=(x-d)+dx;  /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
  MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
  sn=sincos.x[k];     /*                                  */
  ssn=sincos.x[k+1];  /*      sin(Xi) and cos(Xi)         */
  cs=sincos.x[k+2];   /*                                  */
  ccs=sincos.x[k+3];  /*                                  */
  MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,d,dd,ds,dss,r,s);

  MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);

  MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
  MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);

  MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,d,dd,ds,dss,r,s);
  MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  MUL2(sn,ssn,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
  MUL2(dc,dcc,cs,ccs,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  ADD2(e,ee,dc,dcc,e,ee,r,s);
  SUB2(cs,ccs,e,ee,e,ee,r,s);

  v[0]=e;
  v[1]=ee;
}
Exemple #5
0
static void shDrawPaintMesh(VGContext *c, SHVector2 *min, SHVector2 *max,
                            VGPaintMode mode, GLenum texUnit)
{
  SHPaint *p;
  SHVector2 pmin, pmax;
  SHfloat K = 1.0f;
  
  /* Pick the right paint */
  if (mode == VG_FILL_PATH) {
    p = (c->fillPaint ? c->fillPaint : &c->defaultPaint);
  }else if (mode == VG_STROKE_PATH) {
    p = (c->strokePaint ? c->strokePaint : &c->defaultPaint);
    K = SH_CEIL(c->strokeMiterLimit * c->strokeLineWidth) + 1.0f;
  }
  
  /* We want to be sure to cover every pixel of this path so better
     take a pixel more than leave some out (multisampling is tricky). */
  SET2V(pmin, (*min)); SUB2(pmin, K,K);
  SET2V(pmax, (*max)); ADD2(pmax, K,K);

  /* Construct appropriate OpenGL primitives so as
     to fill the stencil mask with select paint */

  switch (p->type) {
  case VG_PAINT_TYPE_LINEAR_GRADIENT:
    shDrawLinearGradientMesh(p, min, max, mode, texUnit);
    break;

  case VG_PAINT_TYPE_RADIAL_GRADIENT:
    shDrawRadialGradientMesh(p, min, max, mode, texUnit);
    break;
    
  case VG_PAINT_TYPE_PATTERN:
    if (p->pattern != VG_INVALID_HANDLE) {
      shDrawPatternMesh(p, min, max, mode, texUnit);
      break;
    }/* else behave as a color paint */
  
  case VG_PAINT_TYPE_COLOR:
    glColor4fv((GLfloat*)&p->color);
    glBegin(GL_QUADS);
    glVertex2f(pmin.x, pmin.y);
    glVertex2f(pmax.x, pmin.y);
    glVertex2f(pmax.x, pmax.y);
    glVertex2f(pmin.x, pmax.y);
    glEnd();
    break;
  }
}
Exemple #6
0
void
SECTION
__dubsin (double x, double dx, double v[])
{
  double r, s, c, cc, d, dd, d2, dd2, e, ee,
	 sn, ssn, cs, ccs, ds, dss, dc, dcc;
#ifndef DLA_FMS
  double p, hx, tx, hy, ty, q;
#endif
  mynumber u;
  int4 k;

  u.x = x + big.x;
  k = u.i[LOW_HALF] << 2;
  x = x - (u.x - big.x);
  d = x + dx;
  dd = (x - d) + dx;
  /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
  MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc);
  sn = __sincostab.x[k];       /*                                  */
  ssn = __sincostab.x[k + 1];  /*      sin(Xi) and cos(Xi)         */
  cs = __sincostab.x[k + 2];   /*                                  */
  ccs = __sincostab.x[k + 3];  /*                                  */
  /* Taylor series for sin ds=sin(t) */
  MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
  MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (ds, dss, d, dd, ds, dss, r, s);

  /* Taylor series for cos dc=cos(t) */
  MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
  ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);

  MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
  MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
  SUB2 (e, ee, dc, dcc, e, ee, r, s);
  ADD2 (e, ee, sn, ssn, e, ee, r, s);                    /* e+ee=sin(x+dx) */

  v[0] = e;
  v[1] = ee;
}
Exemple #7
0
void __dubsin(Double x, Double dx, Double v[]) {
  Double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee,
    sn,ssn,cs,ccs,ds,dss,dc,dcc;
#if 0
  Double xx,y,yy,z,zz;
#endif
  mynumber u;
  int4 k;

  u.x()=x+big.x();
  k = u.i[LOW_HALF]<<2;
  x=x-(u.x()-big.x());
  d=x+dx;
  dd=(x-d)+dx;
         /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
  MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
  sn=sincos.x(k);     /*                                  */
  ssn=sincos.x(k+1);  /*      sin(Xi) and cos(Xi)         */
  cs=sincos.x(k+2);   /*                                  */
  ccs=sincos.x(k+3);  /*                                  */
  MUL2(d2,dd2,s7.x(),ss7.x(),ds,dss,p,hx,tx,hy,ty,q,c,cc);  /* Taylor    */
  ADD2(ds,dss,s5.x(),ss5.x(),ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);      /* series    */
  ADD2(ds,dss,s3.x(),ss3.x(),ds,dss,r,s);
  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);      /* for sin   */
  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
  ADD2(ds,dss,d,dd,ds,dss,r,s);                         /* ds=sin(t) */

  MUL2(d2,dd2,c8.x(),cc8.x(),dc,dcc,p,hx,tx,hy,ty,q,c,cc); ;/* Taylor    */
  ADD2(dc,dcc,c6.x(),cc6.x(),dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* series    */
  ADD2(dc,dcc,c4.x(),cc4.x(),dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* for cos   */
  ADD2(dc,dcc,c2.x(),cc2.x(),dc,dcc,r,s);
  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* dc=cos(t) */

  MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
  MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
  SUB2(e,ee,dc,dcc,e,ee,r,s);
  ADD2(e,ee,sn,ssn,e,ee,r,s);                    /* e+ee=sin(x+dx) */

  v[0]=e;
  v[1]=ee;
}
Exemple #8
0
static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
                                        const uint8_t* top) {
  if (left != NULL) {
    if (top != NULL) {
      int j;
      v8i16 d1, d2;
      const v16i8 zero = { 0 };
      const v8i16 TL = (v8i16)__msa_fill_h(left[-1]);
      const v16u8 T = LD_UB(top);
      ILVRL_B2_SH(zero, T, d1, d2);
      SUB2(d1, TL, d2, TL, d1, d2);
      for (j = 0; j < 16; j += 4) {
        v16i8 t0, t1, t2, t3;
        v8i16 r0, r1, r2, r3, r4, r5, r6, r7;
        const v8i16 L0 = (v8i16)__msa_fill_h(left[j + 0]);
        const v8i16 L1 = (v8i16)__msa_fill_h(left[j + 1]);
        const v8i16 L2 = (v8i16)__msa_fill_h(left[j + 2]);
        const v8i16 L3 = (v8i16)__msa_fill_h(left[j + 3]);
        ADD4(d1, L0, d1, L1, d1, L2, d1, L3, r0, r1, r2, r3);
        ADD4(d2, L0, d2, L1, d2, L2, d2, L3, r4, r5, r6, r7);
        CLIP_SH4_0_255(r0, r1, r2, r3);
        CLIP_SH4_0_255(r4, r5, r6, r7);
        PCKEV_B4_SB(r4, r0, r5, r1, r6, r2, r7, r3, t0, t1, t2, t3);
        ST_SB4(t0, t1, t2, t3, dst, BPS);
        dst += 4 * BPS;
      }
    } else {
      HorizontalPred16x16(dst, left);
    }
  } else {
    if (top != NULL) {
      VerticalPred16x16(dst, top);
    } else {
      const v16u8 out = (v16u8)__msa_fill_b(0x81);
      STORE16x16(out, dst);
    }
  }
}
Exemple #9
0
int shDrawRadialGradientMesh(SHPaint *p, SHVector2 *min, SHVector2 *max,
                             VGPaintMode mode, GLenum texUnit)
{
  SHint i, j;
  float a, n;
  
  SHfloat cx = p->radialGradient[0];
  SHfloat cy = p->radialGradient[1];
  SHfloat fx = p->radialGradient[2];
  SHfloat fy = p->radialGradient[3];
  float r = p->radialGradient[4];
  float fcx, fcy, rr, C;
  
  SHVector2 ux;
  SHVector2 uy;
  SHVector2 c, f;
  SHVector2 cf;

  SHMatrix3x3 *m;
  SHMatrix3x3 mi;
  SHint invertible;
  SHVector2 corners[4];
  SHVector2 fcorners[4];
  SHfloat minOffset=0.0f;
  SHfloat maxOffset=0.0f;
  
  SHint maxI=0, maxJ=0;
  SHfloat maxA=0.0f;
  SHfloat startA=0.0f;
  
  int numsteps = 100;
  float step = 2*PI/numsteps;
  SHVector2 tmin, tmax;
  SHVector2 min1, max1, min2, max2;
  
  /* Pick paint transform matrix */
  SH_GETCONTEXT(0);
  if (mode == VG_FILL_PATH)
    m = &context->fillTransform;
  else if (mode == VG_STROKE_PATH)
    m = &context->strokeTransform;
  
  /* Move focus into circle if outside */
  SET2(cf, fx,fy);
  SUB2(cf, cx,cy);
  n = NORM2(cf);
  if (n > r) {
    DIV2(cf, n);
    fx = cx + 0.995f * r * cf.x;
    fy = cy + 0.995f * r * cf.y;
  }
  
  /* Precalculations */
  rr = r*r;
  fcx = fx - cx;
  fcy = fy - cy;
  C = fcx*fcx + fcy*fcy - rr;
  
  /* Apply paint-to-user transformation
     to focus and unit vectors */
  SET2(f, fx, fy);
  SET2(c, cx, cy);
  SET2(ux, 1, 0);
  SET2(uy, 0, 1);
  ADD2(ux, cx, cy);
  ADD2(uy, cx, cy);
  TRANSFORM2(f, (*m));
  TRANSFORM2(c, (*m));
  TRANSFORM2(ux, (*m));
  TRANSFORM2(uy, (*m));
  SUB2V(ux, c); SUB2V(uy, c);
  
  /* Boundbox corners */
  SET2(corners[0], min->x, min->y);
  SET2(corners[1], max->x, min->y);
  SET2(corners[2], max->x, max->y);
  SET2(corners[3], min->x, max->y);
  
  /* Find inverse transformation (back to paint space) */
  invertible = shInvertMatrix(m, &mi);
  if (!invertible || r <= 0.0f) {
    
    /* Fill boundbox with color at offset 1 */
    SHColor *c = &p->stops.items[p->stops.size-1].color;
    glColor4fv((GLfloat*)c); glBegin(GL_QUADS);
    for (i=0; i<4; ++i) glVertex2fv((GLfloat*)&corners[i]);
    glEnd();
    return 1;
  }
  
  /*--------------------------------------------------------*/
  
  /* Find min/max offset */
  for (i=0; i<4; ++i) {
    
    /* Transform to paint space */
    SHfloat ax,ay, A,B,D,t, off;
    TRANSFORM2TO(corners[i], mi, fcorners[i]);
    SUB2(fcorners[i], fx, fy);
    n = NORM2(fcorners[i]);
    if (n == 0.0f) {
      
      /* Avoid zero-length vectors */
      off = 0.0f;
      
    }else{
      
      /* Distance from focus to circle at corner angle */
      DIV2(fcorners[i], n);
      ax = fcorners[i].x;
      ay = fcorners[i].y;
      A = ax*ax + ay*ay;
      B = 2 * (fcx*ax + fcy*ay);
      D = B*B - 4*A*C;
      t = (-B + SH_SQRT(D)) / (2*A);
      
      /* Relative offset of boundbox corner */
      if (D <= 0.0f) off = 1.0f;
      else off = n / t;
    }
    
    /* Find smallest and largest offset */
    if (off < minOffset || i==0) minOffset = off;
    if (off > maxOffset || i==0) maxOffset = off;
  }
  
  /* Is transformed focus inside original boundbox? */
  if (f.x >= min->x && f.x <= max->x &&
      f.y >= min->y && f.y <= max->y) {
    
    /* Draw whole circle */
    minOffset = 0.0f;
    startA = 0.0f;
    maxA = 2*PI;
    
  }else{
    
    /* Find most distant corner pair */
    for (i=0; i<3; ++i) {
      if (ISZERO2(fcorners[i])) continue;
      for (j=i+1; j<4; ++j) {
        if (ISZERO2(fcorners[j])) continue;
        a = ANGLE2N(fcorners[i], fcorners[j]);
        if (a > maxA || maxA == 0.0f)
          {maxA=a; maxI=i; maxJ=j;}
      }}
    
    /* Pick starting angle */
    if (CROSS2(fcorners[maxI],fcorners[maxJ]) > 0.0f)
      startA = shVectorOrientation(&fcorners[maxI]);
    else startA = shVectorOrientation(&fcorners[maxJ]);
  }
  
  /*---------------------------------------------------------*/
  
  /* TODO: for minOffset we'd actually need to find minimum
     of the gradient function when X and Y are substitued
     with a line equation for each bound-box edge. As a
     workaround we use 0.0f for now. */
  minOffset = 0.0f;
  step = PI/50;
  numsteps = (SHint)SH_CEIL(maxA / step) + 1;
  
  glActiveTexture(texUnit);
  shSetGradientTexGLState(p);
  
  glEnable(GL_TEXTURE_1D);
  glBegin(GL_QUADS);
  
  /* Walk the steps and draw gradient mesh */
  for (i=0, a=startA; i<numsteps; ++i, a+=step) {
    
    /* Distance from focus to circle border
         at current angle (gradient space) */
    float ax = SH_COS(a);
    float ay = SH_SIN(a);
    float A = ax*ax + ay*ay;
    float B = 2 * (fcx*ax + fcy*ay);
    float D = B*B - 4*A*C;
    float t = (-B + SH_SQRT(D)) / (2*A);
    if (D <= 0.0f) t = 0.0f;
    
    /* Vectors pointing towards minimum and maximum
         offset at current angle (gradient space) */
    tmin.x = ax * t * minOffset;
    tmin.y = ay * t * minOffset;
    tmax.x = ax * t * maxOffset;
    tmax.y = ay * t * maxOffset;
    
    /* Transform back to user space */
    min2.x = f.x + tmin.x * ux.x + tmin.y * uy.x;
    min2.y = f.y + tmin.x * ux.y + tmin.y * uy.y;
    max2.x = f.x + tmax.x * ux.x + tmax.y * uy.x;
    max2.y = f.y + tmax.x * ux.y + tmax.y * uy.y;
    
    /* Draw quad */
    if (i!=0) {
      glMultiTexCoord1f(texUnit, minOffset);
      glVertex2fv((GLfloat*)&min1);
      glVertex2fv((GLfloat*)&min2);
      glMultiTexCoord1f(texUnit, maxOffset);
      glVertex2fv((GLfloat*)&max2);
      glVertex2fv((GLfloat*)&max1);
    }
    
    /* Save prev points */
    min1 = min2;
    max1 = max2;
  }
  
  glEnd();
  glDisable(GL_TEXTURE_1D);

  return 1;
}
static void fdct8x32_1d_row_odd_rd(int16_t *temp, int16_t *interm_ptr,
                                   int16_t *out) {
  v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
  v8i16 in24, in25, in26, in27, in28, in29, in30, in31;
  v8i16 vec4, vec5;

  in20 = LD_SH(temp + 32);
  in21 = LD_SH(temp + 40);
  in26 = LD_SH(temp + 80);
  in27 = LD_SH(temp + 88);

  DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
  DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);

  FDCT_POSTPROC_2V_NEG_H(in20, in21);
  FDCT_POSTPROC_2V_NEG_H(in26, in27);

  in18 = LD_SH(temp + 16);
  in19 = LD_SH(temp + 24);
  in28 = LD_SH(temp + 96);
  in29 = LD_SH(temp + 104);

  FDCT_POSTPROC_2V_NEG_H(in18, in19);
  FDCT_POSTPROC_2V_NEG_H(in28, in29);

  vec4 = in19 - in20;
  ST_SH(vec4, interm_ptr + 32);
  vec4 = in18 - in21;
  ST_SH(vec4, interm_ptr + 88);
  vec4 = in29 - in26;
  ST_SH(vec4, interm_ptr + 64);
  vec4 = in28 - in27;
  ST_SH(vec4, interm_ptr + 56);

  ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26);

  in22 = LD_SH(temp + 48);
  in23 = LD_SH(temp + 56);
  in24 = LD_SH(temp + 64);
  in25 = LD_SH(temp + 72);

  DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
  DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
  FDCT_POSTPROC_2V_NEG_H(in22, in23);
  FDCT_POSTPROC_2V_NEG_H(in24, in25);

  in16 = LD_SH(temp);
  in17 = LD_SH(temp + 8);
  in30 = LD_SH(temp + 112);
  in31 = LD_SH(temp + 120);

  FDCT_POSTPROC_2V_NEG_H(in16, in17);
  FDCT_POSTPROC_2V_NEG_H(in30, in31);

  vec4 = in17 - in22;
  ST_SH(vec4, interm_ptr + 40);
  vec4 = in30 - in25;
  ST_SH(vec4, interm_ptr + 48);
  vec4 = in31 - in24;
  ST_SH(vec4, interm_ptr + 72);
  vec4 = in16 - in23;
  ST_SH(vec4, interm_ptr + 80);

  ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
  DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
  DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
  ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
  DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
  ADD2(in27, in26, in25, in24, in23, in20);
  DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
  ST_SH(vec5, out);
  ST_SH(vec4, out + 120);

  SUB2(in27, in26, in25, in24, in22, in21);
  DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
  ST_SH(vec5, out + 112);
  ST_SH(vec4, out + 8);

  SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
  DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
  SUB2(in26, in27, in24, in25, in23, in20);
  DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
  ST_SH(vec4, out + 16);
  ST_SH(vec5, out + 104);

  ADD2(in26, in27, in24, in25, in22, in21);
  DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
  ST_SH(vec4, out + 24);
  ST_SH(vec5, out + 96);

  in20 = LD_SH(interm_ptr + 32);
  in21 = LD_SH(interm_ptr + 88);
  in27 = LD_SH(interm_ptr + 56);
  in26 = LD_SH(interm_ptr + 64);

  in16 = in20;
  in17 = in21;
  DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
  DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);

  in22 = LD_SH(interm_ptr + 40);
  in25 = LD_SH(interm_ptr + 48);
  in24 = LD_SH(interm_ptr + 72);
  in23 = LD_SH(interm_ptr + 80);

  SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
  DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
  in16 = in28 + in29;
  in19 = in31 + in30;
  DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
  ST_SH(vec5, out + 32);
  ST_SH(vec4, out + 88);

  SUB2(in28, in29, in31, in30, in17, in18);
  DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
  ST_SH(vec5, out + 40);
  ST_SH(vec4, out + 80);

  ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
  DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
  SUB2(in29, in28, in30, in31, in16, in19);
  DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
  ST_SH(vec5, out + 72);
  ST_SH(vec4, out + 48);

  ADD2(in29, in28, in30, in31, in17, in18);
  DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
  ST_SH(vec4, out + 56);
  ST_SH(vec5, out + 64);
}
static void fdct8x32_1d_row_even_rd(int16_t *temp, int16_t *out) {
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1;

  /* fdct32 even */
  /* stage 2 */
  LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7);
  LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);

  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
               in8, in9, in10, in11, in12, in13, in14, in15,
               vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
               in8, in9, in10, in11, in12, in13, in14, in15);
  FDCT_POSTPROC_2V_NEG_H(vec0, vec1);
  FDCT_POSTPROC_2V_NEG_H(vec2, vec3);
  FDCT_POSTPROC_2V_NEG_H(vec4, vec5);
  FDCT_POSTPROC_2V_NEG_H(vec6, vec7);
  FDCT_POSTPROC_2V_NEG_H(in8, in9);
  FDCT_POSTPROC_2V_NEG_H(in10, in11);
  FDCT_POSTPROC_2V_NEG_H(in12, in13);
  FDCT_POSTPROC_2V_NEG_H(in14, in15);

  /* Stage 3 */
  ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);

  temp0 = in0 + in3;
  in0 = in0 - in3;
  in3 = in1 + in2;
  in1 = in1 - in2;

  DOTP_CONST_PAIR(temp0, in3, cospi_16_64, cospi_16_64, temp1, temp0);
  ST_SH(temp0, out);
  ST_SH(temp1, out + 8);

  DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
  ST_SH(temp0, out + 16);
  ST_SH(temp1, out + 24);

  SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
  DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
  DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
  ST_SH(temp0, out + 32);
  ST_SH(temp1, out + 56);

  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
  DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
  ST_SH(temp0, out + 40);
  ST_SH(temp1, out + 48);

  DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
  DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
  DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
  ADD2(in0, in1, in2, in3, vec0, vec7);
  DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
  ST_SH(temp0, out + 64);
  ST_SH(temp1, out + 120);

  SUB2(in0, in1, in2, in3, in0, in2);
  DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
  ST_SH(temp0, out + 72);
  ST_SH(temp1, out + 112);

  SUB2(in9, vec2, in14, vec5, vec2, vec5);
  DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
  DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
  ST_SH(temp0, out + 80);
  ST_SH(temp1, out + 104);

  ADD2(in3, in2, in0, in1, vec3, vec4);
  DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
  ST_SH(temp0, out + 96);
  ST_SH(temp1, out + 88);
}
static void fdct8x32_1d_column_odd_store(int16_t *input, int16_t *temp_ptr) {
  v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
  v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5;

  in20 = LD_SH(input + 32);
  in21 = LD_SH(input + 40);
  in26 = LD_SH(input + 80);
  in27 = LD_SH(input + 88);

  DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
  DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);

  in18 = LD_SH(input + 16);
  in19 = LD_SH(input + 24);
  in28 = LD_SH(input + 96);
  in29 = LD_SH(input + 104);

  vec4 = in19 - in20;
  ST_SH(vec4, input + 32);
  vec4 = in18 - in21;
  ST_SH(vec4, input + 40);
  vec4 = in29 - in26;
  ST_SH(vec4, input + 80);
  vec4 = in28 - in27;
  ST_SH(vec4, input + 88);

  in21 = in18 + in21;
  in20 = in19 + in20;
  in27 = in28 + in27;
  in26 = in29 + in26;

  LD_SH4(input + 48, 8, in22, in23, in24, in25);
  DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
  DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);

  in16 = LD_SH(input);
  in17 = LD_SH(input + 8);
  in30 = LD_SH(input + 112);
  in31 = LD_SH(input + 120);

  vec4 = in17 - in22;
  ST_SH(vec4, input + 16);
  vec4 = in16 - in23;
  ST_SH(vec4, input + 24);
  vec4 = in31 - in24;
  ST_SH(vec4, input + 96);
  vec4 = in30 - in25;
  ST_SH(vec4, input + 104);

  ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
  DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
  DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
  ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
  DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
  ADD2(in27, in26, in25, in24, in23, in20);
  DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr);
  ST_SH(vec4, temp_ptr + 960);

  SUB2(in27, in26, in25, in24, in22, in21);
  DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr + 448);
  ST_SH(vec4, temp_ptr + 512);

  SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
  DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
  SUB2(in26, in27, in24, in25, in23, in20);
  DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec4, temp_ptr + 704);
  ST_SH(vec5, temp_ptr + 256);

  ADD2(in26, in27, in24, in25, in22, in21);
  DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec4, temp_ptr + 192);
  ST_SH(vec5, temp_ptr + 768);

  LD_SH4(input + 16, 8, in22, in23, in20, in21);
  LD_SH4(input + 80, 8, in26, in27, in24, in25);
  in16 = in20;
  in17 = in21;
  DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
  DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
  SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
  DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
  ADD2(in28, in29, in31, in30, in16, in19);
  DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr + 832);
  ST_SH(vec4, temp_ptr + 128);

  SUB2(in28, in29, in31, in30, in17, in18);
  DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr + 320);
  ST_SH(vec4, temp_ptr + 640);
  ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
  DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
  SUB2(in29, in28, in30, in31, in16, in19);
  DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr + 576);
  ST_SH(vec4, temp_ptr + 384);

  ADD2(in29, in28, in30, in31, in17, in18);
  DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
  FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
  ST_SH(vec5, temp_ptr + 64);
  ST_SH(vec4, temp_ptr + 896);
}
Exemple #13
0
static void shDrawPaintMesh(VGContext *c, SHVector2 *min, SHVector2 *max,
                            VGPaintMode mode, GLenum texUnit)
{
  SHPaint *p;
  SHVector2 pmin, pmax;
  SHfloat K = 1.0f;
#ifdef ANDROIDVG
	SHColor *color;
	GLfloat v[6][2];
#endif
  
  /* Pick the right paint */
  if (mode == VG_FILL_PATH) {
    p = (c->fillPaint ? c->fillPaint : &c->defaultPaint);
  }else if (mode == VG_STROKE_PATH) {
    p = (c->strokePaint ? c->strokePaint : &c->defaultPaint);
    K = SH_CEIL(c->strokeMiterLimit * c->strokeLineWidth) + 1.0f;
  }
  
  /* We want to be sure to cover every pixel of this path so better
     take a pixel more than leave some out (multisampling is tricky). */
  SET2V(pmin, (*min)); SUB2(pmin, K,K);
  SET2V(pmax, (*max)); ADD2(pmax, K,K);

  /* Construct appropriate OpenGL primitives so as
     to fill the stencil mask with select paint */

  switch (p->type) {
  case VG_PAINT_TYPE_LINEAR_GRADIENT:
    shDrawLinearGradientMesh(p, min, max, mode, texUnit);
    break;

  case VG_PAINT_TYPE_RADIAL_GRADIENT:
    shDrawRadialGradientMesh(p, min, max, mode, texUnit);
    break;
    
  case VG_PAINT_TYPE_PATTERN:
    if (p->pattern != VG_INVALID_HANDLE) {
      shDrawPatternMesh(p, min, max, mode, texUnit);
      break;
    }/* else behave as a color paint */
  
  case VG_PAINT_TYPE_COLOR:
#ifdef ANDROIDVG
	v[0][0] = pmin.x; v[0][1] = pmin.y;
	v[1][0] = pmax.x; v[1][1] = pmin.y;
	v[2][0] = pmax.x; v[2][1] = pmax.y;
	v[3][0] = pmin.x; v[3][1] = pmin.y;
	v[4][0] = pmax.x; v[4][1] = pmax.y;
	v[5][0] = pmin.x; v[5][1] = pmax.y;
	color = &p->color;
	glColor4f(color->r, color->g, color->b, color->a);
	glEnableClientState(GL_VERTEX_ARRAY);
	glVertexPointer(2, GL_FLOAT, 0, v); 
	glDrawArrays(GL_TRIANGLES, 0, 6); 
	glDisableClientState(GL_VERTEX_ARRAY);
#else
	glColor4fv((GLfloat*)&p->color);
	glBegin(GL_QUADS);
	glVertex2f(pmin.x, pmin.y);
	glVertex2f(pmax.x, pmin.y);
	glVertex2f(pmax.x, pmax.y);
	glVertex2f(pmin.x, pmax.y);
	glEnd();
#endif
    break;
  }
}
Exemple #14
0
static void cave_model (Render* render, Cave* cave, int mode, float minimapoffset)
{

	for (int i = 0; i < SEGMENT_COUNT-1; ++i) {

		// aid bread-crumb track
		if (render->aidtrack  &&  mode == DISPLAYMODE_NORMAL && !(i&1)) {
			glDisable(GL_LIGHTING);
			glColor4f(0.5,0.5,1,1);
			glBegin(GL_LINE_STRIP);

			#define CRUMB_SIZE 0.1
			glVertex3f(cave->centers[i][0]-CRUMB_SIZE,cave->centers[i][1]-CRUMB_SIZE,cave->centers[i][2]);
			glVertex3f(cave->centers[i][0]+CRUMB_SIZE,cave->centers[i][1]+CRUMB_SIZE,cave->centers[i][2]);
			glVertex3fv(cave->centers[i]);
			glVertex3f(cave->centers[i][0]+CRUMB_SIZE,cave->centers[i][1]-CRUMB_SIZE,cave->centers[i][2]);
			glVertex3f(cave->centers[i][0]-CRUMB_SIZE,cave->centers[i][1]+CRUMB_SIZE,cave->centers[i][2]);

			glEnd();
		}

		if (render->lighting)
			glEnable(GL_LIGHTING);

		int i0 = (cave->i + i)%SEGMENT_COUNT;

		if (cave->dirty[i0]) {
			for (int mode = 0; mode < DISPLAYMODE_COUNT; ++mode) {
				if (glIsList (render->gl_list[mode][i0]))
					glDeleteLists (render->gl_list[mode][i0], 1);
				render->gl_list[mode][i0] = 0;
			}
			cave->dirty[i0] = false;
		}

		if (render->gl_list[mode][i0] == 0) {
			int id = render->gl_list[mode][i0] = i0 + render->list_start[mode];

			glNewList (id, GL_COMPILE);

			int i1 = (i0 + 1)%SEGMENT_COUNT;
			if (mode == DISPLAYMODE_NORMAL) {
				glBindTexture (GL_TEXTURE_2D,
#ifdef OUTSIDE_TEXTURE_FILE
						cave->segs[0][0][2] < ROOM_LEN/2
						? render->outside_texture_id : 
#endif
						render->wall_texture_id
				);

				glColor4f (1, 1, 1, 0.5);
			}

			glBegin (GL_QUAD_STRIP);
			for (int k = 0; k <= SECTOR_COUNT; ++k) {

				int k0 = k%SECTOR_COUNT;

				if (mode == DISPLAYMODE_NORMAL) {
					glTexCoord2f(
							cave->segs[i0][k0][2]/SEGMENT_LEN/SEGMENT_COUNT, 
							(float)k/SECTOR_COUNT);
				}

				GLfloat thenormal[] = {0, 0, 0};
				SUB2(thenormal, cave->centers[i0], cave->segs[i0][k0]);
				NORM(thenormal);
				glNormal3fv(thenormal);

				glVertex3fv(cave->segs[i0][k0]);

				if (mode == DISPLAYMODE_NORMAL) {
					glTexCoord2f(
							cave->segs[i1][k0][2]/SEGMENT_LEN/SEGMENT_COUNT, 
							(float)k/SECTOR_COUNT);
				}

				SUB2(thenormal, cave->centers[i1], cave->segs[i1][k0]);
				NORM(thenormal);
				glNormal3fv(thenormal);

				glVertex3fv(cave->segs[i1][k0]);
			}
			glEnd();

			glEndList();
		}

		if (mode == DISPLAYMODE_NORMAL) {
			glEnable  (GL_DEPTH_TEST);
			glDisable (GL_BLEND);
			glEnable  (GL_TEXTURE_2D);
		} else {
			glDisable (GL_DEPTH_TEST);
			glEnable  (GL_BLEND);
			glDisable (GL_TEXTURE_2D);
			glDisable(GL_LIGHTING);
		}

		if (mode == DISPLAYMODE_MINIMAP) {
// apparently pow is a lot more complicated than what we need here.
#define FASTPOW6(a) a * a * a * a * a * a
			float ioffset = minimapoffset - (int)(minimapoffset); // i wonder if this works at all?
			float alpha = .12f - .12f * FASTPOW6(((SEGMENT_COUNT / 2.0f - i + ioffset) / SEGMENT_COUNT * 2.0f));

			if(i > render->gauge * SEGMENT_COUNT)
				glColor4f (1, 1, 1, alpha);
			else
				glColor4f (
				           huemap[i][0],
				           huemap[i][1],
				           huemap[i][2], alpha);
		}
		glCallList (render->gl_list[mode][i0]);
	}

}
Exemple #15
0
double
SECTION
__ieee754_atan2 (double y, double x)
{
  int i, de, ux, dx, uy, dy;
  static const int pr[MM] = { 6, 8, 10, 20, 32 };
  double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8,
    z, zz, cor, s1, ss1, s2, ss2;
#ifndef DLA_FMS
  double t4, t5, t6;
#endif
  number num;

  static const int ep = 59768832,	/*  57*16**5   */
    em = -59768832;		/* -57*16**5   */

  /* x=NaN or y=NaN */
  num.d = x;
  ux = num.i[HIGH_HALF];
  dx = num.i[LOW_HALF];
  if ((ux & 0x7ff00000) == 0x7ff00000)
    {
      if (((ux & 0x000fffff) | dx) != 0x00000000)
	return x + x;
    }
  num.d = y;
  uy = num.i[HIGH_HALF];
  dy = num.i[LOW_HALF];
  if ((uy & 0x7ff00000) == 0x7ff00000)
    {
      if (((uy & 0x000fffff) | dy) != 0x00000000)
	return y + y;
    }

  /* y=+-0 */
  if (uy == 0x00000000)
    {
      if (dy == 0x00000000)
	{
	  if ((ux & 0x80000000) == 0x00000000)
	    return 0;
	  else
	    return opi.d;
	}
    }
  else if (uy == 0x80000000)
    {
      if (dy == 0x00000000)
	{
	  if ((ux & 0x80000000) == 0x00000000)
	    return -0.0;
	  else
	    return mopi.d;
	}
    }

  /* x=+-0 */
  if (x == 0)
    {
      if ((uy & 0x80000000) == 0x00000000)
	return hpi.d;
      else
	return mhpi.d;
    }

  /* x=+-INF */
  if (ux == 0x7ff00000)
    {
      if (dx == 0x00000000)
	{
	  if (uy == 0x7ff00000)
	    {
	      if (dy == 0x00000000)
		return qpi.d;
	    }
	  else if (uy == 0xfff00000)
	    {
	      if (dy == 0x00000000)
		return mqpi.d;
	    }
	  else
	    {
	      if ((uy & 0x80000000) == 0x00000000)
		return 0;
	      else
		return -0.0;
	    }
	}
    }
  else if (ux == 0xfff00000)
    {
      if (dx == 0x00000000)
	{
	  if (uy == 0x7ff00000)
	    {
	      if (dy == 0x00000000)
		return tqpi.d;
	    }
	  else if (uy == 0xfff00000)
	    {
	      if (dy == 0x00000000)
		return mtqpi.d;
	    }
	  else
	    {
	      if ((uy & 0x80000000) == 0x00000000)
		return opi.d;
	      else
		return mopi.d;
	    }
	}
    }

  /* y=+-INF */
  if (uy == 0x7ff00000)
    {
      if (dy == 0x00000000)
	return hpi.d;
    }
  else if (uy == 0xfff00000)
    {
      if (dy == 0x00000000)
	return mhpi.d;
    }

  /* either x/y or y/x is very close to zero */
  ax = (x < 0) ? -x : x;
  ay = (y < 0) ? -y : y;
  de = (uy & 0x7ff00000) - (ux & 0x7ff00000);
  if (de >= ep)
    {
      return ((y > 0) ? hpi.d : mhpi.d);
    }
  else if (de <= em)
    {
      if (x > 0)
	{
	  if ((z = ay / ax) < TWOM1022)
	    return normalized (ax, ay, y, z);
	  else
	    return signArctan2 (y, z);
	}
      else
	{
	  return ((y > 0) ? opi.d : mopi.d);
	}
    }

  /* if either x or y is extremely close to zero, scale abs(x), abs(y). */
  if (ax < twom500.d || ay < twom500.d)
    {
      ax *= two500.d;
      ay *= two500.d;
    }

  /* Likewise for large x and y.  */
  if (ax > two500.d || ay > two500.d)
    {
      ax *= twom500.d;
      ay *= twom500.d;
    }

  /* x,y which are neither special nor extreme */
  if (ay < ax)
    {
      u = ay / ax;
      EMULV (ax, u, v, vv, t1, t2, t3, t4, t5);
      du = ((ay - v) - vv) / ax;
    }
  else
    {
      u = ax / ay;
      EMULV (ay, u, v, vv, t1, t2, t3, t4, t5);
      du = ((ax - v) - vv) / ay;
    }

  if (x > 0)
    {
      /* (i)   x>0, abs(y)< abs(x):  atan(ay/ax) */
      if (ay < ax)
	{
	  if (u < inv16.d)
	    {
	      v = u * u;

	      zz = du + u * v * (d3.d
				 + v * (d5.d
					+ v * (d7.d
					       + v * (d9.d
						      + v * (d11.d
							     + v * d13.d)))));

	      if ((z = u + (zz - u1.d * u)) == u + (zz + u1.d * u))
		return signArctan2 (y, z);

	      MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
	      s1 = v * (f11.d + v * (f13.d
				     + v * (f15.d + v * (f17.d + v * f19.d))));
	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);

	      if ((z = s1 + (ss1 - u5.d * s1)) == s1 + (ss1 + u5.d * s1))
		return signArctan2 (y, z);

	      return atan2Mp (x, y, pr);
	    }

	  i = (TWO52 + TWO8 * u) - TWO52;
	  i -= 16;
	  t3 = u - cij[i][0].d;
	  EADD (t3, du, v, dv);
	  t1 = cij[i][1].d;
	  t2 = cij[i][2].d;
	  zz = v * t2 + (dv * t2
			 + v * v * (cij[i][3].d
				    + v * (cij[i][4].d
					   + v * (cij[i][5].d
						  + v * cij[i][6].d))));
	  if (i < 112)
	    {
	      if (i < 48)
		u9 = u91.d;	/* u < 1/4	*/
	      else
		u9 = u92.d;
	    }		/* 1/4 <= u < 1/2 */
	  else
	    {
	      if (i < 176)
		u9 = u93.d;	/* 1/2 <= u < 3/4 */
	      else
		u9 = u94.d;
	    }		/* 3/4 <= u <= 1  */
	  if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1))
	    return signArctan2 (y, z);

	  t1 = u - hij[i][0].d;
	  EADD (t1, du, v, vv);
	  s1 = v * (hij[i][11].d
		    + v * (hij[i][12].d
			   +  v * (hij[i][13].d
				   + v * (hij[i][14].d
					  + v * hij[i][15].d))));
	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);

	  if ((z = s2 + (ss2 - ub.d * s2)) == s2 + (ss2 + ub.d * s2))
	    return signArctan2 (y, z);
	  return atan2Mp (x, y, pr);
	}

      /* (ii)  x>0, abs(x)<=abs(y):  pi/2-atan(ax/ay) */
      if (u < inv16.d)
	{
	  v = u * u;
	  zz = u * v * (d3.d
			+ v * (d5.d
			       + v * (d7.d
				      + v * (d9.d
					     + v * (d11.d
						    + v * d13.d)))));
	  ESUB (hpi.d, u, t2, cor);
	  t3 = ((hpi1.d + cor) - du) - zz;
	  if ((z = t2 + (t3 - u2.d)) == t2 + (t3 + u2.d))
	    return signArctan2 (y, z);

	  MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
	  s1 = v * (f11.d
		    + v * (f13.d
			   + v * (f15.d + v * (f17.d + v * f19.d))));
	  ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
	  SUB2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);

	  if ((z = s2 + (ss2 - u6.d)) == s2 + (ss2 + u6.d))
	    return signArctan2 (y, z);
	  return atan2Mp (x, y, pr);
	}

      i = (TWO52 + TWO8 * u) - TWO52;
      i -= 16;
      v = (u - cij[i][0].d) + du;

      zz = hpi1.d - v * (cij[i][2].d
			 + v * (cij[i][3].d
				+ v * (cij[i][4].d
				       + v * (cij[i][5].d
					      + v * cij[i][6].d))));
      t1 = hpi.d - cij[i][1].d;
      if (i < 112)
	ua = ua1.d;	/* w <  1/2 */
      else
	ua = ua2.d;	/* w >= 1/2 */
      if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
	return signArctan2 (y, z);

      t1 = u - hij[i][0].d;
      EADD (t1, du, v, vv);

      s1 = v * (hij[i][11].d
		+ v * (hij[i][12].d
		       + v * (hij[i][13].d
			      + v * (hij[i][14].d
				     + v * hij[i][15].d))));

      ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
      SUB2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);

      if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
	return signArctan2 (y, z);
      return atan2Mp (x, y, pr);
    }

  /* (iii) x<0, abs(x)< abs(y):  pi/2+atan(ax/ay) */
  if (ax < ay)
    {
      if (u < inv16.d)
	{
	  v = u * u;
	  zz = u * v * (d3.d
			+ v * (d5.d
			       + v * (d7.d
				      + v * (d9.d
					     + v * (d11.d + v * d13.d)))));
	  EADD (hpi.d, u, t2, cor);
	  t3 = ((hpi1.d + cor) + du) + zz;
	  if ((z = t2 + (t3 - u3.d)) == t2 + (t3 + u3.d))
	    return signArctan2 (y, z);

	  MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
	  s1 = v * (f11.d
		    + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
	  ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
	  ADD2 (hpi.d, hpi1.d, s1, ss1, s2, ss2, t1, t2);

	  if ((z = s2 + (ss2 - u7.d)) == s2 + (ss2 + u7.d))
	    return signArctan2 (y, z);
	  return atan2Mp (x, y, pr);
	}

      i = (TWO52 + TWO8 * u) - TWO52;
      i -= 16;
      v = (u - cij[i][0].d) + du;
      zz = hpi1.d + v * (cij[i][2].d
			 + v * (cij[i][3].d
				+ v * (cij[i][4].d
				       + v * (cij[i][5].d
					      + v * cij[i][6].d))));
      t1 = hpi.d + cij[i][1].d;
      if (i < 112)
	ua = ua1.d;	/* w <  1/2 */
      else
	ua = ua2.d;	/* w >= 1/2 */
      if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
	return signArctan2 (y, z);

      t1 = u - hij[i][0].d;
      EADD (t1, du, v, vv);
      s1 = v * (hij[i][11].d
		+ v * (hij[i][12].d
		       + v * (hij[i][13].d
			      + v * (hij[i][14].d
				     + v * hij[i][15].d))));
      ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
      ADD2 (hpi.d, hpi1.d, s2, ss2, s1, ss1, t1, t2);

      if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
	return signArctan2 (y, z);
      return atan2Mp (x, y, pr);
    }

  /* (iv)  x<0, abs(y)<=abs(x):  pi-atan(ax/ay) */
  if (u < inv16.d)
    {
      v = u * u;
      zz = u * v * (d3.d
		    + v * (d5.d
			   + v * (d7.d
				  + v * (d9.d + v * (d11.d + v * d13.d)))));
      ESUB (opi.d, u, t2, cor);
      t3 = ((opi1.d + cor) - du) - zz;
      if ((z = t2 + (t3 - u4.d)) == t2 + (t3 + u4.d))
	return signArctan2 (y, z);

      MUL2 (u, du, u, du, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
      s1 = v * (f11.d + v * (f13.d + v * (f15.d + v * (f17.d + v * f19.d))));
      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
      MUL2 (u, du, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
      ADD2 (u, du, s2, ss2, s1, ss1, t1, t2);
      SUB2 (opi.d, opi1.d, s1, ss1, s2, ss2, t1, t2);

      if ((z = s2 + (ss2 - u8.d)) == s2 + (ss2 + u8.d))
	return signArctan2 (y, z);
      return atan2Mp (x, y, pr);
    }

  i = (TWO52 + TWO8 * u) - TWO52;
  i -= 16;
  v = (u - cij[i][0].d) + du;
  zz = opi1.d - v * (cij[i][2].d
		     + v * (cij[i][3].d
			    + v * (cij[i][4].d
				   + v * (cij[i][5].d + v * cij[i][6].d))));
  t1 = opi.d - cij[i][1].d;
  if (i < 112)
    ua = ua1.d;	/* w <  1/2 */
  else
    ua = ua2.d;	/* w >= 1/2 */
  if ((z = t1 + (zz - ua)) == t1 + (zz + ua))
    return signArctan2 (y, z);

  t1 = u - hij[i][0].d;

  EADD (t1, du, v, vv);

  s1 = v * (hij[i][11].d
	    + v * (hij[i][12].d
		   + v * (hij[i][13].d
			  + v * (hij[i][14].d + v * hij[i][15].d))));

  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
  SUB2 (opi.d, opi1.d, s2, ss2, s1, ss1, t1, t2);

  if ((z = s1 + (ss1 - uc.d)) == s1 + (ss1 + uc.d))
    return signArctan2 (y, z);
  return atan2Mp (x, y, pr);
}
static void fdct8x32_1d_row_even_4x(int16_t *input, int16_t *interm_ptr,
                                    int16_t *out) {
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
  v4i32 vec0_l, vec1_l, vec2_l, vec3_l, vec4_l, vec5_l, vec6_l, vec7_l;
  v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec4_r, vec5_r, vec6_r, vec7_r;
  v4i32 tmp0_w, tmp1_w, tmp2_w, tmp3_w;

  /* fdct32 even */
  /* stage 2 */
  LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
  LD_SH8(input + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);

  BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
               in8, in9, in10, in11, in12, in13, in14, in15,
               vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
               in8, in9, in10, in11, in12, in13, in14, in15);
  ST_SH8(vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, interm_ptr, 8);
  ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, interm_ptr + 64, 8);

  /* Stage 3 */
  UNPCK_SH_SW(vec0, vec0_l, vec0_r);
  UNPCK_SH_SW(vec1, vec1_l, vec1_r);
  UNPCK_SH_SW(vec2, vec2_l, vec2_r);
  UNPCK_SH_SW(vec3, vec3_l, vec3_r);
  UNPCK_SH_SW(vec4, vec4_l, vec4_r);
  UNPCK_SH_SW(vec5, vec5_l, vec5_r);
  UNPCK_SH_SW(vec6, vec6_l, vec6_r);
  UNPCK_SH_SW(vec7, vec7_l, vec7_r);
  ADD4(vec0_r, vec7_r, vec1_r, vec6_r, vec2_r, vec5_r, vec3_r, vec4_r,
       tmp0_w, tmp1_w, tmp2_w, tmp3_w);
  BUTTERFLY_4(tmp0_w, tmp1_w, tmp2_w, tmp3_w, vec4_r, vec6_r, vec7_r, vec5_r);
  ADD4(vec0_l, vec7_l, vec1_l, vec6_l, vec2_l, vec5_l, vec3_l, vec4_l,
       vec0_r, vec1_r, vec2_r, vec3_r);

  tmp3_w = vec0_r + vec3_r;
  vec0_r = vec0_r - vec3_r;
  vec3_r = vec1_r + vec2_r;
  vec1_r = vec1_r - vec2_r;

  DOTP_CONST_PAIR_W(vec4_r, vec6_r, tmp3_w, vec3_r, cospi_16_64,
                    cospi_16_64, vec4_r, tmp3_w, vec6_r, vec3_r);
  FDCT32_POSTPROC_NEG_W(vec4_r);
  FDCT32_POSTPROC_NEG_W(tmp3_w);
  FDCT32_POSTPROC_NEG_W(vec6_r);
  FDCT32_POSTPROC_NEG_W(vec3_r);
  PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
  ST_SH2(vec5, vec4, out, 8);

  DOTP_CONST_PAIR_W(vec5_r, vec7_r, vec0_r, vec1_r, cospi_24_64,
                    cospi_8_64, vec4_r, tmp3_w, vec6_r, vec3_r);
  FDCT32_POSTPROC_NEG_W(vec4_r);
  FDCT32_POSTPROC_NEG_W(tmp3_w);
  FDCT32_POSTPROC_NEG_W(vec6_r);
  FDCT32_POSTPROC_NEG_W(vec3_r);
  PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
  ST_SH2(vec5, vec4, out + 16, 8);

  LD_SH8(interm_ptr, 8, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7);
  SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
  DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
  DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, in5, in4);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 32);
  ST_SH(in5, out + 56);

  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
  DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, in5, in4);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 40);
  ST_SH(in5, out + 48);

  LD_SH8(interm_ptr + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
  DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
  DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
  DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
  ADD2(in0, in1, in2, in3, vec0, vec7);
  DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, in5, in4);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 64);
  ST_SH(in5, out + 120);

  SUB2(in0, in1, in2, in3, in0, in2);
  DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, in5, in4);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 72);
  ST_SH(in5, out + 112);

  SUB2(in9, vec2, in14, vec5, vec2, vec5);
  DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
  DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, in5, in4);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 80);
  ST_SH(in5, out + 104);

  ADD2(in3, in2, in0, in1, vec3, vec4);
  DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, in4, in5);
  FDCT_POSTPROC_2V_NEG_H(in4, in5);
  ST_SH(in4, out + 96);
  ST_SH(in5, out + 88);
}
static void fdct8x32_1d_column_even_store(int16_t *input, int16_t *temp) {
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
  v8i16 temp0, temp1;

  /* fdct even */
  LD_SH4(input, 8, in0, in1, in2, in3);
  LD_SH4(input + 96, 8, in12, in13, in14, in15);
  BUTTERFLY_8(in0, in1, in2, in3, in12, in13, in14, in15,
              vec0, vec1, vec2, vec3, in12, in13, in14, in15);
  LD_SH4(input + 32, 8, in4, in5, in6, in7);
  LD_SH4(input + 64, 8, in8, in9, in10, in11);
  BUTTERFLY_8(in4, in5, in6, in7, in8, in9, in10, in11,
              vec4, vec5, vec6, vec7, in8, in9, in10, in11);

  /* Stage 3 */
  ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
  BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0);
  DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp);
  ST_SH(temp1, temp + 512);

  DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 256);
  ST_SH(temp1, temp + 768);

  SUB4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, vec7, vec6, vec5, vec4);
  DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
  ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
  DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 128);
  ST_SH(temp1, temp + 896);

  SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
  DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 640);
  ST_SH(temp1, temp + 384);

  DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
  DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
  ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
  DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
  ADD2(in0, in1, in2, in3, vec0, vec7);
  DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 64);
  ST_SH(temp1, temp + 960);

  SUB2(in0, in1, in2, in3, in0, in2);
  DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 576);
  ST_SH(temp1, temp + 448);

  SUB2(in9, vec2, in14, vec5, vec2, vec5);
  DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
  SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
  DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 320);
  ST_SH(temp1, temp + 704);

  ADD2(in3, in2, in0, in1, vec3, vec4);
  DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
  FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
  ST_SH(temp0, temp + 192);
  ST_SH(temp1, temp + 832);
}
Exemple #18
0
void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,
                        int32_t src_stride) {
  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
  v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
  v8i16 stp21, stp22, stp23, stp24, stp25, stp26, stp30;
  v8i16 stp31, stp32, stp33, stp34, stp35, stp36, stp37;
  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, cnst0, cnst1, cnst4, cnst5;
  v8i16 coeff = { cospi_16_64, -cospi_16_64, cospi_8_64,  cospi_24_64,
                  -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 };
  v8i16 coeff1 = { cospi_2_64,  cospi_30_64, cospi_14_64, cospi_18_64,
                   cospi_10_64, cospi_22_64, cospi_6_64,  cospi_26_64 };
  v8i16 coeff2 = {
    -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0
  };

  LD_SH16(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9,
          in10, in11, in12, in13, in14, in15);
  SLLI_4V(in0, in1, in2, in3, 2);
  SLLI_4V(in4, in5, in6, in7, 2);
  SLLI_4V(in8, in9, in10, in11, 2);
  SLLI_4V(in12, in13, in14, in15, 2);
  ADD4(in0, in15, in1, in14, in2, in13, in3, in12, tmp0, tmp1, tmp2, tmp3);
  ADD4(in4, in11, in5, in10, in6, in9, in7, in8, tmp4, tmp5, tmp6, tmp7);
  FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
                tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
  ST_SH8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp_ptr, 32);
  SUB4(in0, in15, in1, in14, in2, in13, in3, in12, in15, in14, in13, in12);
  SUB4(in4, in11, in5, in10, in6, in9, in7, in8, in11, in10, in9, in8);

  tmp_ptr += 16;

  /* stp 1 */
  ILVL_H2_SH(in10, in13, in11, in12, vec2, vec4);
  ILVR_H2_SH(in10, in13, in11, in12, vec3, vec5);

  cnst4 = __msa_splati_h(coeff, 0);
  stp25 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst4);

  cnst5 = __msa_splati_h(coeff, 1);
  cnst5 = __msa_ilvev_h(cnst5, cnst4);
  stp22 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst5);
  stp24 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst4);
  stp23 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst5);

  /* stp2 */
  BUTTERFLY_4(in8, in9, stp22, stp23, stp30, stp31, stp32, stp33);
  BUTTERFLY_4(in15, in14, stp25, stp24, stp37, stp36, stp35, stp34);
  ILVL_H2_SH(stp36, stp31, stp35, stp32, vec2, vec4);
  ILVR_H2_SH(stp36, stp31, stp35, stp32, vec3, vec5);
  SPLATI_H2_SH(coeff, 2, 3, cnst0, cnst1);
  cnst0 = __msa_ilvev_h(cnst0, cnst1);
  stp26 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst0);

  cnst0 = __msa_splati_h(coeff, 4);
  cnst1 = __msa_ilvev_h(cnst1, cnst0);
  stp21 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst1);

  BUTTERFLY_4(stp30, stp37, stp26, stp21, in8, in15, in14, in9);
  ILVRL_H2_SH(in15, in8, vec1, vec0);
  SPLATI_H2_SH(coeff1, 0, 1, cnst0, cnst1);
  cnst0 = __msa_ilvev_h(cnst0, cnst1);

  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr);

  cnst0 = __msa_splati_h(coeff2, 0);
  cnst0 = __msa_ilvev_h(cnst1, cnst0);
  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr + 224);

  ILVRL_H2_SH(in14, in9, vec1, vec0);
  SPLATI_H2_SH(coeff1, 2, 3, cnst0, cnst1);
  cnst1 = __msa_ilvev_h(cnst1, cnst0);

  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
  ST_SH(in8, tmp_ptr + 128);

  cnst1 = __msa_splati_h(coeff2, 2);
  cnst0 = __msa_ilvev_h(cnst0, cnst1);
  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr + 96);

  SPLATI_H2_SH(coeff, 2, 5, cnst0, cnst1);
  cnst1 = __msa_ilvev_h(cnst1, cnst0);

  stp25 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);

  cnst1 = __msa_splati_h(coeff, 3);
  cnst1 = __msa_ilvev_h(cnst0, cnst1);
  stp22 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);

  /* stp4 */
  ADD2(stp34, stp25, stp33, stp22, in13, in10);

  ILVRL_H2_SH(in13, in10, vec1, vec0);
  SPLATI_H2_SH(coeff1, 4, 5, cnst0, cnst1);
  cnst0 = __msa_ilvev_h(cnst0, cnst1);
  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr + 64);

  cnst0 = __msa_splati_h(coeff2, 1);
  cnst0 = __msa_ilvev_h(cnst1, cnst0);
  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr + 160);

  SUB2(stp34, stp25, stp33, stp22, in12, in11);
  ILVRL_H2_SH(in12, in11, vec1, vec0);
  SPLATI_H2_SH(coeff1, 6, 7, cnst0, cnst1);
  cnst1 = __msa_ilvev_h(cnst1, cnst0);

  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
  ST_SH(in8, tmp_ptr + 192);

  cnst1 = __msa_splati_h(coeff2, 3);
  cnst0 = __msa_ilvev_h(cnst0, cnst1);
  in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
  ST_SH(in8, tmp_ptr + 32);
}
Exemple #19
0
/* routine computes the correctly rounded (to nearest) value of atan(x). */
double
atan (double x)
{
  double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3,
	 v, vv, w, ww, y, yy, z, zz;
#ifndef DLA_FMS
  double t4, t5, t6;
#endif
  int i, ux, dx;
  static const int pr[M] = { 6, 8, 10, 32 };
  number num;

  num.d = x;
  ux = num.i[HIGH_HALF];
  dx = num.i[LOW_HALF];

  /* x=NaN */
  if (((ux & 0x7ff00000) == 0x7ff00000)
      && (((ux & 0x000fffff) | dx) != 0x00000000))
    return x + x;

  /* Regular values of x, including denormals +-0 and +-INF */
  SET_RESTORE_ROUND (FE_TONEAREST);
  u = (x < 0) ? -x : x;
  if (u < C)
    {
      if (u < B)
	{
	  if (u < A)
	    {
	      math_check_force_underflow_nonneg (u);
	      return x;
	    }
	  else
	    {			/* A <= u < B */
	      v = x * x;
	      yy = d11.d + v * d13.d;
	      yy = d9.d + v * yy;
	      yy = d7.d + v * yy;
	      yy = d5.d + v * yy;
	      yy = d3.d + v * yy;
	      yy *= x * v;

	      if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
		return y;

	      EMULV (x, x, v, vv, t1, t2, t3, t4, t5);	/* v+vv=x^2 */

	      s1 = f17.d + v * f19.d;
	      s1 = f15.d + v * s1;
	      s1 = f13.d + v * s1;
	      s1 = f11.d + v * s1;
	      s1 *= v;

	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7,
		    t8);
	      ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
	      if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
		return y;

	      return atanMp (x, pr);
	    }
	}
      else
	{			/* B <= u < C */
	  i = (TWO52 + TWO8 * u) - TWO52;
	  i -= 16;
	  z = u - cij[i][0].d;
	  yy = cij[i][5].d + z * cij[i][6].d;
	  yy = cij[i][4].d + z * yy;
	  yy = cij[i][3].d + z * yy;
	  yy = cij[i][2].d + z * yy;
	  yy *= z;

	  t1 = cij[i][1].d;
	  if (i < 112)
	    {
	      if (i < 48)
		u2 = U21;	/* u < 1/4        */
	      else
		u2 = U22;
	    }			/* 1/4 <= u < 1/2 */
	  else
	    {
	      if (i < 176)
		u2 = U23;	/* 1/2 <= u < 3/4 */
	      else
		u2 = U24;
	    }			/* 3/4 <= u <= 1  */
	  if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
	    return __signArctan (x, y);

	  z = u - hij[i][0].d;

	  s1 = hij[i][14].d + z * hij[i][15].d;
	  s1 = hij[i][13].d + z * s1;
	  s1 = hij[i][12].d + z * s1;
	  s1 = hij[i][11].d + z * s1;
	  s1 *= z;

	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
	  if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
	    return __signArctan (x, y);

	  return atanMp (x, pr);
	}
    }
  else
    {
      if (u < D)
	{			/* C <= u < D */
	  w = 1 / u;
	  EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
	  ww = w * ((1 - t1) - t2);
	  i = (TWO52 + TWO8 * w) - TWO52;
	  i -= 16;
	  z = (w - cij[i][0].d) + ww;

	  yy = cij[i][5].d + z * cij[i][6].d;
	  yy = cij[i][4].d + z * yy;
	  yy = cij[i][3].d + z * yy;
	  yy = cij[i][2].d + z * yy;
	  yy = HPI1 - z * yy;

	  t1 = HPI - cij[i][1].d;
	  if (i < 112)
	    u3 = U31;           /* w <  1/2 */
	  else
	    u3 = U32;           /* w >= 1/2 */
	  if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
	    return __signArctan (x, y);

	  DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
		t10);
	  t1 = w - hij[i][0].d;
	  EADD (t1, ww, z, zz);

	  s1 = hij[i][14].d + z * hij[i][15].d;
	  s1 = hij[i][13].d + z * s1;
	  s1 = hij[i][12].d + z * s1;
	  s1 = hij[i][11].d + z * s1;
	  s1 *= z;

	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
	  SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
	  if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
	    return __signArctan (x, y);

	  return atanMp (x, pr);
	}
      else
	{
	  if (u < E)
	    {                   /* D <= u < E */
	      w = 1 / u;
	      v = w * w;
	      EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);

	      yy = d11.d + v * d13.d;
	      yy = d9.d + v * yy;
	      yy = d7.d + v * yy;
	      yy = d5.d + v * yy;
	      yy = d3.d + v * yy;
	      yy *= w * v;

	      ww = w * ((1 - t1) - t2);
	      ESUB (HPI, w, t3, cor);
	      yy = ((HPI1 + cor) - ww) - yy;
	      if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
		return __signArctan (x, y);

	      DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
		    t9, t10);
	      MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);

	      s1 = f17.d + v * f19.d;
	      s1 = f15.d + v * s1;
	      s1 = f13.d + v * s1;
	      s1 = f11.d + v * s1;
	      s1 *= v;

	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
	      MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
	      ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
	      SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);

	      if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
		return __signArctan (x, y);

	      return atanMp (x, pr);
	    }
	  else
	    {
	      /* u >= E */
	      if (x > 0)
		return HPI;
	      else
		return MHPI;
	    }
	}
    }
}