Пример #1
0
void
mpn_toom4_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n)
{
  mp_size_t len1, ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, rpn, t4;

  len1 = n;
  ASSERT (n >= 1);

  MPN_NORMALIZE(up, len1);
  
  sn = (n - 1) / 4 + 1;

  /* a0 - a3 are defined in mpn_toom4_mul_n above */
  
   TC4_NORM(a0, a0n, sn);
	TC4_NORM(a1, a1n, sn);
	TC4_NORM(a2, a2n, sn);
	TC4_NORM(a3, a3n, n - 3*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

   tc4_add_unsigned(u5, &n5, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u4, &n4, a2, a2n, a0, a0n); 
	tc4_add_unsigned(u2, &n2, u4, n4, u5, n5); 
   tc4_sub(u3, &n3, u4, n4, u5, n5);

	SQR_TC4(r4, n4, u3, n3);
   SQR_TC4_UNSIGNED(r3, n3, u2, n2);
	
	tc4_lshift(r1, &n1, a0, a0n, 3);
	tc4_addlsh1_unsigned(r1, &n1, a2, a2n);
 	tc4_lshift(r2, &n8, a1, a1n, 2);
   tc4_add(r2, &n8, r2, n8, a3, a3n);
   tc4_add(u4, &n9, r1, n1, r2, n8);
   tc4_sub(u5, &n5, r1, n1, r2, n8);
   
	r30 = r3[0];
	if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
	SQR_TC4(r6, n6, u5, n5);
   SQR_TC4_UNSIGNED(r5, n5, u4, n9);
   r3[1] = r31;

   tc4_lshift(u2, &n8, a3, a3n, 3);
   tc4_addmul_1(u2, &n8, a2, a2n, 4);
	tc4_addlsh1_unsigned(u2, &n8, a1, a1n);
	tc4_add(u2, &n8, u2, n8, a0, a0n);
   
	SQR_TC4_UNSIGNED(r2, n2, u2, n8);
   SQR_TC4_UNSIGNED(r1, n1, a3, a3n);
   SQR_TC4_UNSIGNED(r7, n7, a0, a0n);

	TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1);
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++)
        (r3)[ind] = CNST_LIMB(0);
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

	mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

	if (rpn != 2*n) 
	{
		MPN_ZERO((rp + rpn), 2*n - rpn);
	}

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}
Пример #2
0
/* Multiply {up, un} by {vp, vn} and write the result to
   {prodp, un + vn} assuming vn > 2*ceil(un/5).

   Note that prodp gets un + vn limbs stored, even if the actual 
   result only needs un + vn - 1.
*/
void
mpn_toom53_mul (mp_ptr rp, mp_srcptr up, mp_size_t un,
		          mp_srcptr vp, mp_size_t vn)
{
  mp_size_t ind;
  mp_limb_t cy, r30, r31;
  mp_ptr tp;
  mp_size_t a0n, a1n, a2n, a3n, a4n, b0n, b1n, b2n, sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, rpn, t4;

  sn = (un + 4) / 5;

  ASSERT (vn > 2*sn);
  
#define a0 (up)
#define a1 (up + sn)
#define a2 (up + 2*sn)
#define a3 (up + 3*sn)
#define a4 (up + 4*sn)
#define b0 (vp)
#define b1 (vp + sn)
#define b2 (vp + 2*sn)

   TC4_NORM(a0, a0n, sn);
   TC4_NORM(a1, a1n, sn);
   TC4_NORM(a2, a2n, sn);
   TC4_NORM(a3, a3n, sn);
   TC4_NORM(a4, a4n, un - 4*sn); 
   TC4_NORM(b0, b0n, sn);
   TC4_NORM(b1, b1n, sn);
   TC4_NORM(b2, b2n, vn - 2*sn); 

   t4 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs

   tp = __GMP_ALLOCATE_FUNC_LIMBS(4*t4 + 4*(sn + 1));

#define u2 (tp + 4*t4)
#define u3 (tp + 4*t4 + (sn+1))
#define u4 (tp + 4*t4 + 2*(sn+1))
#define u5 (tp + 4*t4 + 3*(sn+1))

   tc4_add_unsigned(u2, &n2, a3, a3n, a1, a1n); 
   tc4_add_unsigned(u5, &n5, a2, a2n, a0, a0n); 
   tc4_add_unsigned(u5, &n5, u5, n5, a4, a4n); 
   tc4_add_unsigned(u3, &n3, u5, n5, u2, n2); 
   tc4_sub(u4, &n4, u5, n5, u2, n2);

   tc4_add_unsigned(u5, &n5, b2, b2n, b0, b0n);
   tc4_add_unsigned(r2, &n8, u5, n5, b1, b1n); 
   tc4_sub(u5, &n5, u5, n5, b1, b1n);

   MUL_TC4_UNSIGNED(r3, n3, u3, n3, r2, n8); /* 1 */
   MUL_TC4(r4, n4, u4, n4, u5, n5); /* -1 */
   
   tc4_lshift(r1, &n1, a0, a0n, 4);
   tc4_lshift(u3, &n9, a2, a2n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, u3, n9);
   tc4_add_unsigned(r1, &n1, r1, n1, a4, a4n);
   tc4_lshift(r2, &n8, a1, a1n, 3);
   tc4_addlsh1_unsigned(r2, &n8, a3, a3n);
   tc4_add_unsigned(u5, &n5, r1, n1, r2, n8);
   tc4_sub(u3, &n9, r1, n1, r2, n8);

   tc4_lshift(r1, &n1, b0, b0n, 2);
   tc4_add_unsigned(r1, &n1, r1, n1, b2, b2n);
   tc4_lshift(u4, &n10, b1, b1n, 1);
   tc4_add_unsigned(u2, &n2, r1, n1, u4, n10);
   tc4_sub(r2, &n8, r1, n1, u4, n10);
   
   r30 = r3[0];
   if (!n3) r30 = CNST_LIMB(0);
   r31 = r3[1];
   MUL_TC4_UNSIGNED(r5, n5, u5, n5, u2, n2); /* 1/2 */
   MUL_TC4(r6, n6, u3, n9, r2, n8); /* -1/2 */
   r3[1] = r31;

   tc4_lshift(u2, &n2, a4, a4n, 4);
   tc4_addmul_1(u2, &n2, a3, a3n, 8);
   tc4_addmul_1(u2, &n2, a2, a2n, 4);
   tc4_addlsh1_unsigned(u2, &n2, a1, a1n);
   tc4_add(u2, &n2, u2, n2, a0, a0n);

   tc4_lshift(r1, &n1, b2, b2n, 2);
   tc4_addlsh1_unsigned(r1, &n1, b1, b1n);
   tc4_add(r1, &n1, r1, n1, b0, b0n);
   
   MUL_TC4_UNSIGNED(r2, n2, u2, n2, r1, n1); /* 2 */

   MUL_TC4_UNSIGNED(r1, n1, a4, a4n, b2, b2n); /* oo */
   MUL_TC4_UNSIGNED(r7, n7, a0, a0n, b0, b0n); /* 0 */

   TC4_DENORM(r1, n1,  t4 - 1);
   TC4_DENORM(r2, n2,  t4 - 1);
   if (n3)
     TC4_DENORM(r3, n3,  t4 - 1); 
   else {
     /* MPN_ZERO defeats gcc 4.1.2 here, hence the explicit for loop */
     for (ind = 1 ; ind < t4 - 1; ind++) 
        (r3)[ind] = CNST_LIMB(0); 
   }
   TC4_DENORM(r4, n4,  t4 - 1);
   TC4_DENORM(r5, n5,  t4 - 1);
   TC4_DENORM(r6, n6,  t4 - 1);
   TC4_DENORM(r7, n7,  t4 - 2); // we treat r7 differently (it cannot exceed t4-2 in length)

/*	rp        rp1          rp2           rp3          rp4           rp5         rp6           rp7
<----------- r7-----------><------------r5-------------->            
                                                       <-------------r3------------->

              <-------------r6------------->                        < -----------r2------------>{           }
                                         <-------------r4-------------->         <--------------r1---->
*/

   mpn_toom4_interpolate(rp, &rpn, sn, tp, t4 - 1, n4, n6, r30);

   if (rpn != un + vn) 
   {
	  MPN_ZERO((rp + rpn), un + vn - rpn);
   }

   __GMP_FREE_FUNC_LIMBS (tp, 4*t4 + 4*(sn+1));
}