Ejemplo n.º 1
0
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num)
	{
	int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);
	int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num);

	if (sizeof(size_t)==4)
		{
#if 1 || (defined(__APPLE__) && defined(__MACH__))
		if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
			return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
#else
		/* boundary of 32 was experimentally determined on
		   Linux 2.6.22, might have to be adjusted on AIX... */
		if (num>=32 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
			{
			sigset_t oset;
			int ret;

			sigprocmask(SIG_SETMASK,&all_masked,&oset);
			ret=bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
			sigprocmask(SIG_SETMASK,&oset,NULL);

			return ret;
			}
#endif
		}
	else if ((OPENSSL_ppccap_P&PPC_FPU64))
		/* this is a "must" on POWER6, but run-time detection
		 * is not implemented yet... */
		return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);

	return bn_mul_mont_int(rp,ap,bp,np,n0,num);
	}
Ejemplo n.º 2
0
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
	{
	int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
	int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);

	if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
		(SPARCV9_PREFER_FPU|SPARCV9_VIS1))
		return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
	else
		return bn_mul_mont_int(rp,ap,bp,np,n0,num);
	}
Ejemplo n.º 3
0
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                const BN_ULONG *np, const BN_ULONG *n0, int num)
{
    int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);

    if (!(num & 1) && num >= 6) {
        if ((num & 15) == 0 && num <= 64 &&
                (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
                (CFR_MONTMUL | CFR_MONTSQR)) {
            typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
                                          const BN_ULONG *bp,
                                          const BN_ULONG *np,
                                          const BN_ULONG *n0);
            int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap,
                                 const BN_ULONG *bp, const BN_ULONG *np,
                                 const BN_ULONG *n0);
            int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            static const bn_mul_mont_f funcs[4] = {
                bn_mul_mont_t4_8, bn_mul_mont_t4_16,
                bn_mul_mont_t4_24, bn_mul_mont_t4_32
            };
            bn_mul_mont_f worker = funcs[num / 16 - 1];

            if ((*worker) (rp, ap, bp, np, n0))
                return 1;
            /* retry once and fall back */
            if ((*worker) (rp, ap, bp, np, n0))
                return 1;
            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
        }
        if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3))
            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
        else if (num >= 8 &&
                 (OPENSSL_sparcv9cap_P[0] &
                  (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
                 (SPARCV9_PREFER_FPU | SPARCV9_VIS1))
            return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
    }
    return bn_mul_mont_int(rp, ap, bp, np, n0, num);
}
Ejemplo n.º 4
0
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                const BN_ULONG *np, const BN_ULONG *n0, int num)
{
    int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                          const BN_ULONG *np, const BN_ULONG *n0, int num);

    if (num < 4)
        return 0;

    if ((num & 3) == 0)
        return bn_mul4x_mont_int(rp, ap, bp, np, n0, num);

    /*
     * There used to be [optional] call to bn_mul_mont_fpu64 here,
     * but above subroutine is faster on contemporary processors.
     * Formulation means that there might be old processors where
     * FPU code path would be faster, POWER6 perhaps, but there was
     * no opportunity to figure it out...
     */

    return bn_mul_mont_int(rp, ap, bp, np, n0, num);
}
Ejemplo n.º 5
0
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                const BN_ULONG *np, const BN_ULONG *n0, int num)
{
    int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);
    int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                        const BN_ULONG *np, const BN_ULONG *n0, int num);

    if (!(num & 1) && num >= 6) {
        if ((num & 15) == 0 && num <= 64 &&
            (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
            (CFR_MONTMUL | CFR_MONTSQR)) {
            typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
                                          const BN_ULONG *bp,
                                          const BN_ULONG *np,
                                          const BN_ULONG *n0);
            int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap,
                                 const BN_ULONG *bp, const BN_ULONG *np,
                                 const BN_ULONG *n0);
            int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
                                  const BN_ULONG *bp, const BN_ULONG *np,
                                  const BN_ULONG *n0);
            static const bn_mul_mont_f funcs[4] = {
                bn_mul_mont_t4_8, bn_mul_mont_t4_16,
                bn_mul_mont_t4_24, bn_mul_mont_t4_32
            };
            bn_mul_mont_f worker = funcs[num / 16 - 1];

            if ((*worker) (rp, ap, bp, np, n0))
                return 1;
            /* retry once and fall back */
            if ((*worker) (rp, ap, bp, np, n0))
                return 1;
            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
        }
        if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3))
            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
        else if (num >= 8 &&
                 /*
                  * bn_mul_mont_fpu doesn't use FMADD, we just use the
                  * flag to detect when FPU path is preferable in cases
                  * when current heuristics is unreliable. [it works
                  * out because FMADD-capable processors where FPU
                  * code path is undesirable are also VIS3-capable and
                  * VIS3 code path takes precedence.]
                  */
                 ( (OPENSSL_sparcv9cap_P[0] & SPARCV9_FMADD) ||
                   (OPENSSL_sparcv9cap_P[0] &
                    (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
                   (SPARCV9_PREFER_FPU | SPARCV9_VIS1) ))
            return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
    }
    return bn_mul_mont_int(rp, ap, bp, np, n0, num);
}