// perform convolution with the taps FINL vcs Convolute ( const vcs& input, // The input data const vcs *Taps, vcs *temp, // The Temp Table for next convolution int Tap_num ) { register vcs ans[4]; register vcs * pTap = (vcs*)Taps; register vcs * pTemp1 = temp; register vcs * pTemp2 = temp; for ( int i=0; i<4; i ++ ) { ans[i] = add((vcs)(mul_high((vs)(input),(vs)(*pTap))),*pTemp1); pTap ++; pTemp1 ++; } vcs ret = hadd4 (ans[0], ans[1], ans[2], ans[3] ); for (int i = 4; i < Tap_num; i++) { *pTemp2 = add((vcs)(mul_high((vs)(input),(vs)(*pTap))), *pTemp1); pTemp1 ++; pTap ++; pTemp2++; } return ret; }
void xasm_mulm192(ulong *c, ulong *a, ulong *b) { ulong carry, inv, i, tmp[3+1], l, j, x; ulong chk[3], a0, a1, a2, b0, b1, b2; a0=a[0]; b0=b[0]; a1=a[1]; b1=b[1]; a2=a[2]; b2=b[2]; asm_mulm192(chk,a,b); for (i=0; i<3+1; i++) tmp[i]=0; for (i=0; i<3; i++) { x=a[i]; carry=0; for (j=0; j<3; j++) { tmp[j]+=carry; if (tmp[j]<carry) carry=1; else carry=0; l=x*b[j]; tmp[j]+=l; if (tmp[j]<l) carry++; carry+=mul_high(x,b[j]); } tmp[3]+=carry; /* no carry possible */ inv=tmp[0]*montgomery_inv_n; carry=0; for (j=0; j<3; j++) { tmp[j]+=carry; if (tmp[j]<carry) carry=1; else carry=0; l=inv*montgomery_modulo_n[j]; tmp[j]+=l; if (tmp[j]<l) carry++; carry+=mul_high(inv,montgomery_modulo_n[j]); } tmp[3]+=carry; for (j=0; j<3; j++) tmp[j]=tmp[j+1]; if (tmp[2]<carry) tmp[3]=1; else tmp[3]=0; } if (tmp[3]) asm_sub_n192(tmp,montgomery_modulo_n); asm_copy192(c,tmp); asm_add192_ui(c,0); if ((c[0]!=chk[0]) || (c[1]!=chk[1]) || (c[2]!=chk[2])) { printf("mul failed: %lu %lu %lu\n",(ulong)a,(ulong)b,(ulong)c); complain("%lu %lu %lu * %lu %lu %lu\n%lu %lu %lu!= %lu %lu %lu\n%lu %lu %lu %lu\n",a0,a1,a2,b0,b1,b2,c[0],c[1],c[2],chk[0],chk[1],chk[2],montgomery_modulo_n[0],montgomery_modulo_n[1],montgomery_modulo_n[2],montgomery_inv_n); } // else printf("ok"); }
void casm_mulm64(ulong *c, ulong *a, ulong *b) { ulong carry, inv, i, tmp[1+1], l, j, x; ulong chk, a1, b1; a1=a[0]; b1=b[0]; asm_mulm64(&chk,a,b); for (i=0; i<1+1; i++) tmp[i]=0; for (i=0; i<1; i++) { x=a[i]; carry=0; for (j=0; j<1; j++) { tmp[j]+=carry; if (tmp[j]<carry) carry=1; else carry=0; l=x*b[j]; tmp[j]+=l; if (tmp[j]<l) carry++; carry+=mul_high(x,b[j]); } tmp[1]+=carry; /* no carry possible */ inv=tmp[0]*montgomery_inv_n; carry=0; for (j=0; j<1; j++) { tmp[j]+=carry; if (tmp[j]<carry) carry=1; else carry=0; l=inv*montgomery_modulo_n[j]; tmp[j]+=l; if (tmp[j]<l) carry++; carry+=mul_high(inv,montgomery_modulo_n[j]); } tmp[1]+=carry; for (j=0; j<1; j++) tmp[j]=tmp[j+1]; if (tmp[0]<carry) tmp[1]=1; else tmp[1]=0; } if (tmp[1]) asm_sub_n64(tmp,montgomery_modulo_n); asm_copy64(c,tmp); asm_add64_ui(c,0); if (*c!=chk) { printf("mul failed: %lu %lu %lu\n",(ulong)a,(ulong)b,(ulong)c); complain("%lu %lu %lu %lu\n%lu %lu\n",a1,b1,c[0],chk,montgomery_modulo_n[0],montgomery_inv_n); } }