Ejemplo n.º 1
0
void zHex2Dec(z *u, z *v)
{
	//convert u[] in hex to v[] in decimal by repeatedly dividing
	//u by 1e9 = 0x3b9aca00
	//the remainder of the ith division is the ith decimal digit.
	//when the quotient = 0, stop

	z a,b;
	fp_digit r = 0;
	int su = abs(u->size);
	int approx_words = (int)((double)su * 1.5);	
	//because decimal takes more room than hex to store

	zInit(&a);
	zInit(&b);

	if (v->alloc < approx_words)
		zGrow(v,approx_words);
	zClear(v);

	if (a.alloc < approx_words)
	{
		zGrow(&a,approx_words);
		zClear(&a);
	}

	if (b.alloc < approx_words)
	{
		zGrow(&b,approx_words);
		zClear(&b);
	}
	
	zCopy(u,&a);
	v->size = 1;
	do
	{
		r = zShortDiv(&a,MAX_DEC_WORD,&b);
		v->val[v->size - 1] = r;
		v->size++;
		zCopy(&b,&a);
	} while (zCompare(&a,&zZero) != 0);
	v->size--;

	if (u->size < 0)
		v->size *= -1;

	zFree(&a);
	zFree(&b);
	return;
}
Ejemplo n.º 2
0
void monty_init(z *n)
{
	//for a input modulus n, initialize constants for 
	//montogomery representation
	//this assumes that n is relatively prime to 2, i.e. is odd.
	z g, b, q, r;

	//global montyconst structure
	zInit(&montyconst.nhat);
	zInit(&montyconst.r);
	zInit(&montyconst.rhat);
	zInit(&montyconst.one);

	
	if (abs(n->size) <= 16) 
	{
		fp_montgomery_setup(n,&montyconst.nhat.val[0]);
		fp_montgomery_calc_normalization(&montyconst.r,n);
		montyconst.one.val[0] = 1;
		montyconst.one.size = 1;
		to_monty(&montyconst.one,n);
		TFM_MONTY = 1;
		return;
	}
	else
		TFM_MONTY = 0;

	zInit(&g);
	zInit(&b);
	zInit(&q);
	zInit(&r);

	b.val[1]=1; b.size=2;

	//find r = b^t > N, where b = 2 ^32
	if (montyconst.r.alloc < n->size + 1)
		zGrow(&montyconst.r,n->size + 1);

	zClear(&montyconst.r);
	montyconst.r.size = n->size + 1;
	montyconst.r.val[montyconst.r.size - 1] = 1;

	//find nhat = -n^-1 mod b
	//nhat = -(n^-1 mod b) mod b = b - n^-1 mod b
	//since b is 2^32, this can be simplified, and made faster.
	xGCD(n,&b,&montyconst.nhat,&montyconst.rhat,&g);
	zSub(&b,&montyconst.nhat,&q);
	zCopy(&q,&montyconst.nhat);

	zCopy(&zOne,&montyconst.one);
	to_monty(&montyconst.one,n);

	zFree(&g);
	zFree(&b);
	zFree(&q);
	zFree(&r);
	return;
}
Ejemplo n.º 3
0
void xGCD(z *a, z *b, z *x, z *y, z *g)
{
	//compute the extended GCD of a, b, returning g = GCD(a,b) and x, y 
	//such that ax + by = GCD(a,b) if a,b are coprime
	z t1,t2,t3,u,v,r,R,q,tmp;

//	int i;
	/*

	Step 1: 
	if a < b then 
	Set u=0, v=1, and r=b 
	Set U=1, V=0, and R=a 
	else 
	Set u=1, v=0, and r=a 
	Set U=0, V=1, and R=b 

	Step 2: 
	if R = 0 then return r (for the gcd) and no inverses exist. 
	if R = 1 then return R (for the gcd), V (for the inverse a(mod b)) and U (for the inverse of b(mod a)). 
	
	Step 3: 
	Calculate q = int(r/R) 
	Calculate t1 = u - U*q 
	Calculate t2 = v - V*q 
	Calculate t3 = r - R*q 
	set u=U, v=V, r=R 
	set U=t1, V=t2, R=t3 
	goto Step 2. 
	*/


	zInit(&tmp);
	zInit(&t1);
	zInit(&t2);
	zInit(&t3);
	zInit(&q);
	zInit(&r);
	zInit(&R);
	zInit(&u);
	zInit(&v);

	//need to check for temp allocation

	zClear(x);
	zClear(y);


	if (zCompare(a,b) < 0)
	{
		u.val[0]=0;
		v.val[0]=1;
		zCopy(b,&r);
		x->val[0]=1;
		y->val[0]=0;
		zCopy(a,&R);
	}
	else
	{
		u.val[0]=1;
		v.val[0]=0;
		zCopy(a,&r);
		x->val[0]=0;
		y->val[0]=1;
		zCopy(b,&R);
	}

	while (1)
	{
		if (zCompare(&zZero,&R) == 0)
		{
			zCopy(&r,g);
			zCopy(&zZero,x);
			zCopy(&zZero,y);
			break;
		}

		if (zCompare(&zOne,&R) == 0)
		{
			zCopy(&R,g);
			break;
		}

		zCopy(&r,&tmp);
		zDiv(&tmp,&R,&q,&t3);		//q = int(r/R), t3 = r % R

		zMul(&q,x,&tmp);			//t1 = u - U*q
		zSub(&u,&tmp,&t1);

		zMul(&q,y,&tmp);			//t2 = v - V*q
		zSub(&v,&tmp,&t2);

		zCopy(x,&u);
		zCopy(y,&v);
		zCopy(&R,&r);

		zCopy(&t1,x);
		zCopy(&t2,y);
		zCopy(&t3,&R);
	}

	if (x->size < 0)
	{
		x->size *= -1;
		zSub(b,x,x);
	}

	if (y->size < 0)
	{
		y->size *= -1;
		zSub(a,y,y);
	}

	zFree(&tmp);
	zFree(&t1);
	zFree(&t2);
	zFree(&t3);
	zFree(&q);
	zFree(&r);
	zFree(&R);
	zFree(&u);
	zFree(&v);
	x->type = UNKNOWN;
	y->type = UNKNOWN;
	g->type = UNKNOWN;
	return;
}
Ejemplo n.º 4
0
void zDec2Hex(z *u, z *v)
{
	//convert u[] in dec to v[] in hex by multiplying the ith digit by (1e9)*i
	//and adding to the previous digits

	z a,b,vv;
	int i,su = abs(u->size);

	zInit(&a);
	zInit(&b);
	zInit(&vv);

	if (v->alloc < su)
		zGrow(v,su);

	if (a.alloc < su)
	{
		zGrow(&a,su);
		zClear(&a);
	}

	if (b.alloc < su)
	{
		zGrow(&b,su);
		zClear(&b);
	}

	if (vv.alloc < su)
	{
		zGrow(&vv,su);
		zClear(&vv);
	}
	vv.size = su;

	//a holds the value of (1e9)*i
	a.size = 1;
	a.val[0] = 1;
	for (i=0;i<su;i++)
	{
		zShortMul(&a,u->val[i],&b);
		zAdd(&vv,&b,&vv);
		zShortMul(&a,MAX_DEC_WORD,&a);
	}

	//v may have unused high order limbs
	for (i=su-1;i>=0;i--)
	{
		if (vv.val[i] != 0)
			break;
	}
	vv.size = i+1;

	if (u->size < 0)
		vv.size *= -1;

	if (vv.size == 0)
		vv.size = 1;

	zCopy(&vv,v);

	zFree(&vv);
	zFree(&a);
	zFree(&b);
	return;
}
Ejemplo n.º 5
0
static void DrawPart(int IsModel, struct L3PartS *PartPtr, int CurColor, float m[4][4])
{
	float          r[4], m1[4][4];
	int            i, Color;
	struct L3LineS *LinePtr;
	vector3d       v3d[4];

#ifdef USE_OPENGL
	float mm[4][4];
	float det = 0;

	if ((ldraw_commandline_opts.F & TYPE_F_STUDLINE_MODE) != 0)
	  if (PartPtr->IsStud)
	  {
	    DrawPartLine(PartPtr, CurColor, m);
	    //DrawPartBox(PartPtr, CurColor, m, 1);
	    return;
	  }

	// Draw only bounding boxes of top level parts if in fast spin mode.
	if (ldraw_commandline_opts.F & TYPE_F_BBOX_MODE) 
	  if (PartPtr->FromPARTS) // (!IsModel)
	  {
	    if (ldraw_commandline_opts.F & TYPE_F_NO_POLYGONS) 
	      DrawPartBox(PartPtr, CurColor, m, 1);
	    else
	      DrawPartBox(PartPtr, CurColor, m, 0);
	    return;
	  }	

	  if (PartPtr->IsStud)
	    det = M3Det(m); // Check the determinant of m to fix mirrored studs.
#endif

	for (LinePtr = PartPtr->FirstLine; LinePtr; LinePtr = LinePtr->NextLine)
	{
#ifdef USE_OPENGL
                char *s;

	        if (Skip1Line(IsModel,LinePtr))
		  continue;
#endif
		hardcolor = 0; // Assume color 16 or 24.
		switch (LinePtr->Color)
		{
		case 16:
			Color = CurColor;
			break;
		case 24:
#ifdef SIXTEEN_EDGE_COLORS
			if (0 <= CurColor  &&  CurColor <= 15)
				Color = edge_color(CurColor);
			else
				Color = 0;
#else
			Color = edge_color(CurColor);
#endif
			break;
		default:
			Color = LinePtr->Color;
#if 0
			if ((LinePtr->LineType == 3) || 
			    (LinePtr->LineType == 4))
			  // Hardcoded color = printed.  Blend me!
			  hardcolor = 1; 
#else
			if (LinePtr->LineType != 1)
			  // Hardcoded color = printed.  Blend me!
			  hardcolor = 1; 
#endif
			break;
		}
		switch (LinePtr->LineType)
		{
		case 0:
#ifdef USE_OPENGL
		        // Skip whitespace
		        for (s = LinePtr->Comment; *s != 0; s++)
		        {
			  if ((*s != ' ') && (*s != '\t'))
			    break;
			}
			if (strnicmp(s,"STEP",4) == 0)
			{
			  // if (include_stack_ptr <= ldraw_commandline_opts.output_depth )
			  {
			    zStep(stepcount,1);
			    stepcount++;
			  }
			}
			else if (strncmp(s,"CLEAR",5) == 0)
			{
			  // if (include_stack_ptr <= ldraw_commandline_opts.output_depth )
			  {
			    zClear();
			  }
			}
			// Experiment with MLCAD extensions
			// Based on info provided on lugnet.
			else if (strncmp(s,"BUFEXCHG A STORE",16) == 0)
			{
			  int k;

			  if (IsModel)
			  {
			    k = BufA1Store(IsModel,LinePtr);
			    printf("BUFEXCHG A STORE %d\n", k);
			  }
			}
			else if (strncmp(s,"BUFEXCHG A RETRIEVE",19) == 0)
			{
			  int j,n, opts, dcp, cp;
			  extern int curpiece;
			  extern int DrawToCurPiece;
			  void DrawModel(void);
			   
			  if (IsModel)
			  {
			    n = BufA1Retrieve(IsModel,LinePtr);
			    printf("BUFEXCHG A RETRIEVE %d\n", n);
			    // clear and redraw model up to saved point.
#if 0		    
			    opts = ldraw_commandline_opts.M;
			    ldraw_commandline_opts.M = 'C';
			    dcp = DrawToCurPiece;
			    DrawToCurPiece = 1;
			    cp = curpiece;
			    zClear();
			    curpiece = n;
			    Init1LineCounter();
			    BufA1Store(IsModel,LinePtr);
			    DrawModel();
			    //for(j = 0; j < n; j++)
			    //  Draw1Part(j, -1);
			    DrawToCurPiece = dcp;
			    curpiece = cp;
			    ldraw_commandline_opts.M = opts;
#endif
			  }
			}
			else if (strncmp(s,"GHOST",5) == 0)
			{
			  if (IsModel)
			  {
			    // Parse the rest of the line as a .DAT line.
			  }
			}
			if (ldraw_commandline_opts.M != 'C')
			{
			  // Non-continuous output stop after each step.
			}

			// Parse global color change meta-commands
			// Should?? be done by ReadMetaLine() in L3Input.cpp			
			// Should SAVE old colors, restore after the for loop.
			// To save space, build a linked list of saved colors.
			// Do NOT resave a color if its already saved.
			if ((strncmp(s,"COLOR",5) == 0) ||
			    (strncmp(s,"COLOUR",6) == 0))
			{
			  if (ldraw_commandline_opts.debug_level == 1)
			    printf("%s\n", s);
			  //0 COLOR 4 red 0 196 0 38 255 196 0 38 255
			  char colorstr[256];
			  char name[256];
			  int n, inverse_index;

			  n = sscanf(s, "%s %d %s %d %f %f %f %f %f %f %f %f",
				     colorstr, &i, name, &inverse_index,
				     &m1[0][0], &m1[0][1], &m1[0][2], &m1[0][3],
				     &m1[1][0], &m1[1][1], &m1[1][2], &m1[1][3]);
			  if (n != 12)
			  {
			    if (ldraw_commandline_opts.debug_level == 1)
			      printf("Illegal COLOR syntax %d\n",n);
			    break;
			  }
			  zcolor_modify(i,name,inverse_index,
					(int)m1[0][0], (int)m1[0][1], (int)m1[0][2], (int)m1[0][3],
					(int)m1[1][0], (int)m1[1][1], (int)m1[1][2], (int)m1[1][3]);
			}

			// Intercept the ldconfig.ldr !COLOUR meta command.
			if (ldlite_parse_colour_meta(s))
			  break;
#else
			if (strncmp(LinePtr->Comment,"STEP",4) == 0)
			{
				// STEP encountered, you may set some flags to enable/disable e.g. drawing
				// (Note that I have not tested this, but this is the way it is supposed to work :-)
			}
#endif
			break;
		case 1:
#ifdef USE_OPENGL
    // NOTE:  I could achieve L3Lab speeds if I delay rendering studs till last
    // then do occlusion tests on the bounding boxes before rendering.
    // Unfortunately GL_OCCLUSION_TEST_HP is an extension (SUN, HP, ???)
    // 
    // Other ideas include substituting GLU cylinder primitives for studs.  
		        if (LinePtr->PartPtr->IsStud)
			{
                          if ((ldraw_commandline_opts.F & TYPE_F_STUDLESS_MODE) != 0)
                            break;
#ifdef USE_OPENGL_OCCLUSION
                          if ((ldraw_commandline_opts.F & TYPE_F_STUDONLY_MODE) != 0)
			  {
			    M4M4Mul(m1,m,LinePtr->v);
			    if (Occluded(LinePtr->PartPtr, Color, m1))
			      break;
			  }
#endif
			}	    
#endif
			M4M4Mul(m1,m,LinePtr->v);
#ifdef USE_OPENGL
			// Negative determinant means its a mirrored stud.
			if (det < 0)
			{
			  // For now, we only support Paul Easters logo.dat texture.
			  // For display speed, we really should precalculate an IsLogo flag
			  // and use that here rather than IsStud.
			  if  (LinePtr->PartPtr && LinePtr->PartPtr->DatName &&
			       !stricmp(LinePtr->PartPtr->DatName, "logo.dat"))
			  {
			    float mirror[4][4] = {
			      {1.0,0.0,0.0,0.0},
			      {0.0,1.0,0.0,0.0},
			      {0.0,0.0,-1.0,0.0},
			      {0.0,0.0,0.0,1.0}
			    };
			    M4M4Mul(mm,m1,mirror);
			    memcpy(m1,mm,sizeof(m1));
			  }
			}
			  
			// implement nesting level counter.
			include_stack_ptr++;
			DrawPart(0,LinePtr->PartPtr,Color,m1);
			include_stack_ptr--;	
			// Do zStep() after the model, not toplevel parts.
#else
			DrawPart(0,LinePtr->PartPtr,Color,m1);
			if (IsModel) zStep(0,0);
#endif
			break;
		case 2:
#ifdef USE_OPENGL_OCCLUSION
		        if ((ldraw_commandline_opts.F & TYPE_F_STUDONLY_MODE) &&
			    !(PartPtr->IsStud))
                            break;
#endif
			for (i=0; i<LinePtr->LineType; i++)
			{
				M4V3Mul(r,m,LinePtr->v[i]);
				v3d[i].x=r[0];
				v3d[i].y=r[1];
				v3d[i].z=r[2];
			}
			render_line(&v3d[0],&v3d[1],Color);
			break;
		case 3:
#ifdef USE_OPENGL_OCCLUSION
		        if ((ldraw_commandline_opts.F & TYPE_F_STUDONLY_MODE) &&
			    !(PartPtr->IsStud))
                            break;
#endif
			for (i=0; i<LinePtr->LineType; i++)
			{
				M4V3Mul(r,m,LinePtr->v[i]);
				v3d[i].x=r[0];
				v3d[i].y=r[1];
				v3d[i].z=r[2];
			}
#ifdef USE_OPENGL
			// Try to render solid Primitives visibly when in XOR wire mode.
			if (ldraw_commandline_opts.F & TYPE_F_XOR_PRIMITIVE)
			{
			    render_line(&v3d[0],&v3d[1],Color);
			    render_line(&v3d[1],&v3d[2],Color);
			    render_line(&v3d[2],&v3d[0],Color);
			    break;
			}
#endif
			render_triangle(&v3d[0],&v3d[1],&v3d[2],Color);
			break;
		case 4:
#ifdef USE_OPENGL_OCCLUSION
		        if ((ldraw_commandline_opts.F & TYPE_F_STUDONLY_MODE) &&
			    !(PartPtr->IsStud))
                            break;
#endif
			for (i=0; i<LinePtr->LineType; i++)
			{
				M4V3Mul(r,m,LinePtr->v[i]);
				v3d[i].x=r[0];
				v3d[i].y=r[1];
				v3d[i].z=r[2];
			}
#ifdef USE_OPENGL
			// Try to render solid Primitives visibly when in XOR wire mode.
			if (ldraw_commandline_opts.F & TYPE_F_XOR_PRIMITIVE)
			{
			    render_line(&v3d[0],&v3d[1],Color);
			    render_line(&v3d[1],&v3d[2],Color);
			    render_line(&v3d[2],&v3d[3],Color);
			    render_line(&v3d[3],&v3d[0],Color);
			    break;
			}
#endif
			render_quad(&v3d[0],&v3d[1],&v3d[2],&v3d[3],Color);
			break;
		case 5:
#ifdef USE_OPENGL_OCCLUSION
		        if ((ldraw_commandline_opts.F & TYPE_F_STUDONLY_MODE) &&
			    !(PartPtr->IsStud))
                            break;
#endif
			for (i=0; i<4; i++)
			{
				M4V3Mul(r,m,LinePtr->v[i]);
				v3d[i].x=r[0];
				v3d[i].y=r[1];
				v3d[i].z=r[2];
			}
			render_five(&v3d[0],&v3d[1],&v3d[2],&v3d[3],Color);
			break;
		}
	}
	if (((zDetailLevel == TYPE_PART) && (PartPtr->FromPARTS)) ||
	    ((zDetailLevel == TYPE_P) && (PartPtr->FromP)))
	  zStep(-1, 0);
}
Ejemplo n.º 6
0
void monty_mul_interleaved(z *a, z *b, z *c, z *n)
{

	fp_digit nhat = montyconst.nhat.val[0], u;
	int i,j,t=n->size;
	int szb = abs(b->size);
	fp_digit k;
	z *t1,*t2;
	z s1,s2;

	zInit(&s1);
	zInit(&s2);
	t1 = &s1;
	t2 = &s2;
	zClear(t1);
	zClear(t2);

	for (i=0;i<t;i++)
	{
		u = (t1->val[0] + a->val[i] * b->val[0]) * nhat;	//truncation will provide mod b
		
		/****** short mul of b with ai, simultaneous with addition of A (in t1) ********/
		for (j=t1->size;j<szb;j++)
			t1->val[j] = 0;		//zero any unused words up to size of b, so we can add
		//mul and add up to size of b
		k=0;
		for (j=0;j<szb ;j++)
			spMulAdd(b->val[j],a->val[i],t1->val[j],k,t2->val + j,&k);
		//continue with add if A has more words
		for (;j<t1->size;j++)
			spAdd(t1->val[j],k,t2->val+j,&k);

		//adjust size
		if (t1->size > szb)
			t2->size = t1->size;
		else
			t2->size = szb;

		//account for carry
		if (k)
		{
			t2->val[t2->size]=k;
			t2->size++;
			j++;
		}
		/****** short mul of b with ai, simultaneous with addition of A (in t1) ********/


		/****** short mul of n with u, simultaneous with add. of prev step (in t2) 
		 and with right shift of one word                                       ********/

		for (;j<t;j++)
			t2->val[j] = 0;		//zero any unused words up to size of n, so we can add
		//mul and add up to size of n, store into one word previous
		k=0;
		//needs first mul to get k set right, answer gets shifted to oblivion
		spMulAdd(n->val[0],u,t2->val[0],k,t1->val,&k);
		for (j=1;j<t;j++)
			spMulAdd(n->val[j],u,t2->val[j],k,t1->val + j - 1,&k);
		//continue if t2 is bigger than n
		for (;j<t2->size;j++)
			spAdd(t2->val[j],k,t1->val+j-1,&k);

		//adjust size
		if (t2->size > t)
			t1->size = t2->size - 1;
		else
			t1->size = t - 1;

		//account for carry
		if (k)
		{
			t1->val[t1->size]=k;
			t1->size++;
		}
		/****** short mul of n with u, simultaneous with add. of prev step (in t2) 
		 and with right shift of one word                                       ********/

	}

	//almost done
	if (zCompare(t1,n) >= 0)
		zSub(t1,n,c);
	else
		zCopy(t1,c);

	zFree(&s1);
	zFree(&s2);
	return;
}
Ejemplo n.º 7
0
void fp_mul_comba(z *A, z *B, z *C)
{
   int       ix, iy, iz, tx, ty, pa, sA, sB;
   fp_digit  c0, c1, c2, *tmpx, *tmpy;
   z    *dst;
   z loc;

   COMBA_START;
   COMBA_CLEAR;
   
   /* get size of output and trim */
   sA = abs(A->size);
   sB = abs(B->size);
   pa = sA + sB;

   if (A == C || B == C) {
	   zInit(&loc);
      //dst = &atmp1;
	   dst = &loc;
   } else {
      dst = C;
   }

	if (dst->alloc < pa)
		zGrow(dst,pa + LIMB_BLKSZ);
	zClear(dst);

   for (ix = 0; ix < pa; ix++) {
      /* get offsets into the two bignums */
      ty = MIN(ix, sB-1);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = A->val + tx;
      tmpy = B->val + ty;

      /* this is the number of times the loop will iterrate, essentially its 
         while (tx++ < a->used && ty-- >= 0) { ... }
       */
      iy = MIN(sA-tx, ty+1);

      /* execute loop */
      COMBA_FORWARD;
      for (iz = 0; iz < iy; ++iz) {
		  MULADD(*tmpx++, *tmpy--);
      }

      /* store term */
      COMBA_STORE(dst->val[ix]);
  }
  COMBA_FINI;

  dst->size = pa;
  if ((A->size * B->size) < 0)
	  dst->size *= -1;

  fp_clamp(dst);
  if (dst != C) {
	zCopy(dst, C);
	zFree(&loc);
  }
}
void fp_sqr_comba(z *A, z *B)
{
  int       pa, ix, iz, sA;
  fp_digit  c0, c1, c2;
  z    *dst;
  z loc;
#ifdef TFM_ISO
  uint64   tt;
#endif    

  /* get size of output and trim */
  sA = abs(A->size);
   pa = sA + sA;

  /* number of output digits to produce */
  COMBA_START;
  CLEAR_CARRY;

  if (A == B) {
     //zClear(&atmp1);
	  zInit(&loc);
     //dst = &atmp1;
	  dst = &loc;
  } else {
     zClear(B);
     dst = B;
  }

	if (dst->alloc < pa)
	{
		zGrow(dst,pa + LIMB_BLKSZ);
	}
	zClear(dst);
	
  for (ix = 0; ix < pa; ix++) { 
      int      tx, ty, iy;
      fp_digit *tmpy, *tmpx;

      /* get offsets into the two bignums */
      ty = MIN(sA-1, ix);
      tx = ix - ty;

      /* setup temp aliases */
      tmpx = A->val + tx;
      tmpy = A->val + ty;

      /* this is the number of times the loop will iterrate,
         while (tx++ < a->used && ty-- >= 0) { ... }
       */
      iy = MIN(sA-tx, ty+1);

      /* now for squaring tx can never equal ty 
       * we halve the distance since they approach 
       * at a rate of 2x and we have to round because 
       * odd cases need to be executed
       */
      iy = MIN(iy, (ty-tx+1)>>1);

      /* forward carries */
      CARRY_FORWARD;

      /* execute loop */
      for (iz = 0; iz < iy; iz++) {
		  SQRADD2(*tmpx++, *tmpy--);
      }

      /* even columns have the square term in them */
      if ((ix&1) == 0) {
		  SQRADD(A->val[ix>>1],A->val[ix>>1]);
      }

      /* store it */
      COMBA_STORE(dst->val[ix]);
  }