DEM read_dem () { int c; DEM f, a, b, d, x; int i; char buf[200]; DEM s; DEM d1; int flags1; DEM used1; extern DEM used; loop: do c = readchar (); while (c==' ' || c=='\t' || c=='\n' || c==0); switch (c) { case 'I': return I; case 'K': return K; case 'S': return S; case 'E': return E; case 'F': return If; case 'O': return Ord; case '-': f = read_dem (); a = read_dem (); return ap (f, a); case '/': a = read_dem (); b = read_dem (); return transym (a, b); case 'T': a = read_dem (); b = read_dem (); return trans (a, b); case 'X': a = read_dem (); return sym (a); case '#': a = read_dem (); b = read_dem (); return Axm (a, b); case 'i': a = read_dem (); return defI (a); case 'k': a = read_dem (); b = read_dem (); return defK (a, b); case 's': a = read_dem (); b = read_dem (); d = read_dem (); return defS (a, b, d); case ')': a = read_dem (); b = read_dem (); return IfNode (a, b); case '1': return Ext1; case '2': return Ext2; case '3': return Ext3; case '4': return Ext4; case '5': return Ext5; case '6': return Ext6; case 'e': return AE; case 'f': return EA0; /* a = read_dem (); return EA (a); */ case 'm': return MP; case 'a': return AI; case 'b': return AK; case 'c': return AS; case 'r': return RPA; case '0': return ZeroIsOrd; case '+': return SucIsOrd; case 'w': return LimIsOrd; case 'p': return PredIsOrd; case 'n': return StepIsOrd; case 'W': return TfI; case '<': a = read_dem (); return left (a); case '>': a = read_dem (); return right (a); case '\'': a = read_dem (); return rep(a); case '%': /*printf ("*1*");*/ a = read_dem (); /*printf ("*2*");*/ trace_dem ("read", a); /*printf ("*3*");*/ b = red (a); /*printf ("*4*");*/ trace_dem ("red", b); return b; /* return red (a); */ case 'R': a = read_dem (); return red1 (a, 0); case '@': a = read_dem (); return reduc (a, 1); case '~': a = read_dem (); return reduc (a, 0); case '$': a = read_dem (); return redu (a); case 'x': a = read_dem (); b = read_dem (); return ext (a, b); case '\\': a = read_dem (); b = read_dem (); trace_dem ("^(0)", a); trace_dem ("^(1)", b); d = exten (a, b); trace_dem ("^(r)", d); return d; case ']': a = read_dem (); b = read_dem (); d = dbextens (a, b); return d; case 'l': a = read_dem (); b = read_dem (); return Ext (a, b); /* return Lambda (a, b); */ case 'L': a = read_dem (); b = read_dem (); return Lambda (a, b); case '.': a = read_dem (); return DBLambda (a); case '!': a = read_dem (); b = read_dem (); return DB_lambda (a, b); /* return DBLambda (DBname (0, a, b)); */ case '?': a = read_dem (); b = read_dem (); return DB_Subst (a, b); case '_': a = read_dem (); b = read_dem (); d = read_dem (); return Subst (a, b, d); case ':': a = read_dem (); b = read_dem (); d = read_dem (); return ap (exten(a,d) ,b); case 'V': x = read_dem (); d = read_dem (); a = mk_dem (node(d), 0, NULL, DB_lambda (x, subdem(0,d)), DB_lambda (x, subdem(1,d)), subdem(2,d) == NULL ? NULL : DB_lambda (x, subdem(2,d)), NULL, NULL, NULL); return a; case 'A': x = read_dem (); d = read_dem (); a = mk_dem (node(d), 0, NULL, ap (x, subdem(0,d)), ap (x, subdem(1,d)), subdem(2,d) == NULL ? NULL : ap (x, subdem(2,d)), NULL, NULL, NULL); return a; case '"': a = read_dem (); /* return NoRed (a); */ no_red[nnr++] = a; return a; case '|': a = read_dem (); no_red[nnr++] = a; b = read_dem (); return b; case 'u': used1 = used; used = read_dem (); a = read_dem (); used = used1; return a; case '(': flags1 = flags; i = 0; for (;;) { c = readchar (); if (c == ')') break; buf[i++] = c; } buf[i] = 0; sscanf (buf, "%x", &flags); a = read_dem (); if ((flags & FLAG_PERM) == 0) flags = flags1; return a; case ',': a = read_dem (); return step (a); case '*': a = read_dem (); return rstep (a); case '`': a = read_dem (); return list_ap (a, nil); case '&': c = readchar (); switch (c) { case '/': return itransym; case 'i': return idefI; case 'k': return idefK; case 's': return idefS; case '<': return ileft; case '>': return iright; case '=': return ieq; case '#': return inode; case '0': return isubdem0; case '1': return isubdem1; case '2': return isubdem2; case '%': return ired; case '$': return iredu; case '\\': return iext; case ',': return istep; case '*': return irstep; default: fprintf (stderr, "Undefined &%c.\n", c); return I; } break; case '[': /* trace_dem ("read symbol", I); */ for (i=0; i<sizeof(buf); i++) { c = readchar(); if (c == ']') { buf[i] = 0; #ifdef TRACE1 printf ("buf=<%s>\n", buf); #endif if (buf[0] >= '0' && buf[0] <= '9') { #ifdef TRACE printf ("\nDBVar <%s>", buf); #endif d1 = DBVar (atoi(buf)); trace_dem ("", d); return d1; } s = Sym(buf); #ifdef TRACE1 trace_dem ("read symbol", s); #endif if (subdem(0,s) == NULL) { #ifdef TRACE1 trace_dem ("return symbol", s); #endif return s; } else { #ifdef TRACE trace_dem ("return value of", s); #endif return subdem(0,s); } } buf[i] = c; } fprintf (stderr, "Symbol too long\n"); return Sym(buf); default: return defined_dems[(unsigned char)c]; /* printf ("Illegal character 0x%02X\n", c); goto loop; */ } }
void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("inv: nonsquare matrix"); if (n == 0) { set(d); X.SetDims(0, 0); return; } long i, j, k, pos; GF2X t1, t2; GF2X *x, *y; const GF2XModulus& p = GF2E::modulus(); vec_GF2XVec M; M.SetLength(n); for (i = 0; i < n; i++) { M[i].SetSize(2*n, 2*GF2E::WordLength()); for (j = 0; j < n; j++) { M[i][j] = rep(A[i][j]); clear(M[i][n+j]); } set(M[i][n+i]); } GF2X det; set(det); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { rem(t1, M[i][k], p); M[i][k] = t1; if (pos == -1 && !IsZero(t1)) { pos = i; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); } MulMod(det, det, M[k][k], p); // make M[k, k] == -1 mod p, and make row k reduced InvMod(t1, M[k][k], p); for (j = k+1; j < 2*n; j++) { rem(t2, M[k][j], p); MulMod(M[k][j], t2, t1, p); } for (i = k+1; i < n; i++) { // M[i] = M[i] + M[k]*M[i,k] t1 = M[i][k]; // this is already reduced x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); for (j = k+1; j < 2*n; j++, x++, y++) { // *x = *x + (*y)*t1 mul(t2, *y, t1); add(*x, *x, t2); } } } else { clear(d); return; } } X.SetDims(n, n); for (k = 0; k < n; k++) { for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, rep(X[j][k]), M[i][j]); add(t1, t1, t2); } add(t1, t1, M[i][n+k]); conv(X[i][k], t1); } } conv(d, det); }
void kernel(mat_GF2E& X, const mat_GF2E& A) { long m = A.NumRows(); long n = A.NumCols(); mat_GF2E M; long r; transpose(M, A); r = gauss(M); X.SetDims(m-r, m); long i, j, k, s; GF2X t1, t2; GF2E T3; vec_long D; D.SetLength(m); for (j = 0; j < m; j++) D[j] = -1; vec_GF2E inverses; inverses.SetLength(m); j = -1; for (i = 0; i < r; i++) { do { j++; } while (IsZero(M[i][j])); D[j] = i; inv(inverses[j], M[i][j]); } for (k = 0; k < m-r; k++) { vec_GF2E& v = X[k]; long pos = 0; for (j = m-1; j >= 0; j--) { if (D[j] == -1) { if (pos == k) set(v[j]); else clear(v[j]); pos++; } else { i = D[j]; clear(t1); for (s = j+1; s < m; s++) { mul(t2, rep(v[s]), rep(M[i][s])); add(t1, t1, t2); } conv(T3, t1); mul(T3, T3, inverses[j]); v[j] = T3; } } } }
hasher(string s, int _m) : m(_m), h(size(s)+1), p(size(s)+1) { p[0] = 1; h[0] = 0; rep(i,0,size(s)) p[i+1] = (ll)p[i] * b % m; rep(i,0,size(s)) h[i+1] = ((ll)h[i] * b + s[i]) % m; }
void determinant(GF2E& d, const mat_GF2E& M_in) { long k, n; long i, j; long pos; GF2X t1, t2; GF2X *x, *y; const GF2XModulus& p = GF2E::modulus(); n = M_in.NumRows(); if (M_in.NumCols() != n) LogicError("determinant: nonsquare matrix"); if (n == 0) { set(d); return; } vec_GF2XVec M; M.SetLength(n); for (i = 0; i < n; i++) { M[i].SetSize(n, 2*GF2E::WordLength()); for (j = 0; j < n; j++) M[i][j] = rep(M_in[i][j]); } GF2X det; set(det); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { rem(t1, M[i][k], p); M[i][k] = t1; if (pos == -1 && !IsZero(t1)) pos = i; } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); } MulMod(det, det, M[k][k], p); // make M[k, k] == -1 mod p, and make row k reduced InvMod(t1, M[k][k], p); for (j = k+1; j < n; j++) { rem(t2, M[k][j], p); MulMod(M[k][j], t2, t1, p); } for (i = k+1; i < n; i++) { // M[i] = M[i] + M[k]*M[i,k] t1 = M[i][k]; // this is already reduced x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); for (j = k+1; j < n; j++, x++, y++) { // *x = *x + (*y)*t1 mul(t2, *y, t1); add(*x, *x, t2); } } } else { clear(d); return; } } conv(d, det); }
Coord Display::a_height() const { DisplayRep& d = *rep(); return d.height_; }
Display::~Display() { DisplayRep* d = rep(); //Resource::unref_deferred(d->style_); delete d; }
long gauss(mat_zz_p& M, long w) { long k, l; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; long n = M.NumRows(); long m = M.NumCols(); if (w < 0 || w > m) LogicError("gauss: bad args"); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long T1, T2; l = 0; for (k = 0; k < w && l < n; k++) { pos = -1; for (i = l; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { swap(M[pos], M[l]); inv(t3, M[l][k]); negate(t3, t3); for (i = l+1; i < n; i++) { // M[i] = M[i] + M[l]*M[i,k]*t3 mul(t1, M[i][k], t3); T1 = rep(t1); mulmod_precon_t T1pinv = PrepMulModPrecon(T1, p, pinv); clear(M[i][k]); x = M[i].elts() + (k+1); y = M[l].elts() + (k+1); for (j = k+1; j < m; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, T1pinv); T2 = AddMod(T2, rep(*x), p); (*x).LoopHole() = T2; } } l++; } } return l; }
void solve(ZZ_p& d, vec_ZZ_p& X, const mat_ZZ_p& A, const vec_ZZ_p& b) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("solve: nonsquare matrix"); if (b.length() != n) LogicError("solve: dimension mismatch"); if (n == 0) { set(d); X.SetLength(0); return; } long i, j, k, pos; ZZ t1, t2; ZZ *x, *y; const ZZ& p = ZZ_p::modulus(); vec_ZZVec M; sqr(t1, p); mul(t1, t1, n); M.SetLength(n); for (i = 0; i < n; i++) { M[i].SetSize(n+1, t1.size()); for (j = 0; j < n; j++) M[i][j] = rep(A[j][i]); M[i][n] = rep(b[i]); } ZZ det; set(det); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { rem(t1, M[i][k], p); M[i][k] = t1; if (pos == -1 && !IsZero(t1)) { pos = i; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); NegateMod(det, det, p); } MulMod(det, det, M[k][k], p); // make M[k, k] == -1 mod p, and make row k reduced InvMod(t1, M[k][k], p); NegateMod(t1, t1, p); for (j = k+1; j <= n; j++) { rem(t2, M[k][j], p); MulMod(M[k][j], t2, t1, p); } for (i = k+1; i < n; i++) { // M[i] = M[i] + M[k]*M[i,k] t1 = M[i][k]; // this is already reduced x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); for (j = k+1; j <= n; j++, x++, y++) { // *x = *x + (*y)*t1 mul(t2, *y, t1); add(*x, *x, t2); } } } else { clear(d); return; } } X.SetLength(n); for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, rep(X[j]), M[i][j]); add(t1, t1, t2); } sub(t1, t1, M[i][n]); conv(X[i], t1); } conv(d, det); }
static void mul_aux(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B) { long n = A.NumRows(); long l = A.NumCols(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); X.SetDims(n, m); if (m > 1) { // new preconditioning code long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long i, j, k; for (i = 0; i < n; i++) { const zz_p* ap = A[i].elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } zz_p *xp = X[i].elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } } else { // just use the old code, w/o preconditioning long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long i, j, k; long acc, tmp; for (i = 1; i <= n; i++) { for (j = 1; j <= m; j++) { acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(A(i,k)), rep(B(k,j)), p, pinv); acc = AddMod(acc, tmp, p); } X(i,j).LoopHole() = acc; } } } }
void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("inv: nonsquare matrix"); if (n == 0) { set(d); X.SetDims(0, 0); return; } long i, j, k, pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M.SetDims(n, 2*n); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { M[i][j] = A[i][j]; clear(M[i][n+j]); } set(M[i][n+i]); } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); M[k][k] = t3; for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < 2*n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } X.SetDims(n, n); for (k = 0; k < n; k++) { for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, X[j][k], M[i][j]); add(t1, t1, t2); } sub(t1, M[i][n+k], t1); mul(X[i][k], t1, M[i][i]); } } d = det; }
void determinant(zz_p& d, const mat_zz_p& M_in) { long k, n; long i, j; long pos; zz_p t1, t2, t3; zz_p *x, *y; mat_zz_p M; M = M_in; n = M.NumRows(); if (M.NumCols() != n) LogicError("determinant: nonsquare matrix"); if (n == 0) { set(d); return; } zz_p det; set(det); long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { if (!IsZero(M[i][k])) { pos = i; break; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); negate(det, det); } mul(det, det, M[k][k]); inv(t3, M[k][k]); for (i = k+1; i < n; i++) { // M[i] = M[i] - M[k]*M[i,k]*t3 mul(t1, M[i][k], t3); negate(t1, t1); x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); long T1 = rep(t1); mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; long T2; for (j = k+1; j < n; j++, x++, y++) { // *x = *x + (*y)*t1 T2 = MulModPrecon(rep(*y), T1, p, t1pinv); x->LoopHole() = AddMod(rep(*x), T2, p); } } } else { clear(d); return; } } d = det; }
void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B) { long l = a.length(); long m = B.NumCols(); if (l != B.NumRows()) LogicError("matrix mul: dimension mismatch"); if (m == 0) { x.SetLength(0); } else if (m == 1) { long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); long acc, tmp; long k; acc = 0; for(k = 1; k <= l; k++) { tmp = MulMod(rep(a(k)), rep(B(k,1)), p, pinv); acc = AddMod(acc, tmp, p); } x.SetLength(1); x(1).LoopHole() = acc; } else { // m > 1. precondition long p = zz_p::modulus(); mulmod_t pinv = zz_p::ModulusInverse(); vec_long::Watcher watch_mul_aux_vec(mul_aux_vec); mul_aux_vec.SetLength(m); long *acc = mul_aux_vec.elts(); long j, k; const zz_p* ap = a.elts(); for (j = 0; j < m; j++) acc[j] = 0; for (k = 0; k < l; k++) { long aa = rep(ap[k]); if (aa != 0) { const zz_p* bp = B[k].elts(); long T1; mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv); for (j = 0; j < m; j++) { T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv); acc[j] = AddMod(acc[j], T1, p); } } } x.SetLength(m); zz_p *xp = x.elts(); for (j = 0; j < m; j++) xp[j].LoopHole() = acc[j]; } }
static void installFunctions(malEnvPtr env) { for (auto &function : malFunctionTable) { rep(function, env); } }
PixelCoord Display::pheight() const { return rep()->pheight_; }
void debug() { printf("%d %d\n", n, m); rep(i,n) { rep(j,m) cout << a[i][j] << " "; puts(""); }
Coord Display::a_width() const { DisplayRep& d = *rep(); return d.width_; }
inline void conv(NTL::vec_long& to, NTL::vec_zz_p& from) { to.SetLength(from.length()); for (long i=0; i<from.length(); i++) to[i]=rep(from[i]); }
Style* Display::style() const { return rep()->style_; }
void init_is_square() { rep(i,0,64) M |= 1ULL << (63-(i*i)%64); }
//------------------------------------------- // i/o operation (octet string) //------------------------------------------- void bn254_fp2_to_mpz(mpz_t a, const Element x) { mpz_set(a, rep(rep0(x))); // a = rep0 mpz_addmul(a, rep(rep1(x)), field(x)->base->order); //a = a + rep1*p }
void SmackModuleGenerator::generateProgram(llvm::Module& M) { Naming naming; SmackRep rep(&M.getDataLayout(), naming, program, getAnalysis<Regions>()); std::list<Decl*>& decls = program.getDeclarations(); DEBUG(errs() << "Analyzing globals...\n"); for (auto& G : M.globals()) { auto ds = rep.globalDecl(&G); decls.insert(decls.end(), ds.begin(), ds.end()); } DEBUG(errs() << "Analyzing functions...\n"); for (auto& F : M) { // Reset the counters for per-function names naming.reset(); DEBUG(errs() << "Analyzing function: " << naming.get(F) << "\n"); auto ds = rep.globalDecl(&F); decls.insert(decls.end(), ds.begin(), ds.end()); auto procs = rep.procedure(&F); assert(procs.size() > 0); if (naming.get(F) != Naming::DECLARATIONS_PROC) decls.insert(decls.end(), procs.begin(), procs.end()); if (F.isDeclaration()) continue; if (!F.empty() && !F.getEntryBlock().empty()) { DEBUG(errs() << "Analyzing function body: " << naming.get(F) << "\n"); for (auto P : procs) { SmackInstGenerator igen(getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(), rep, *P, naming); DEBUG(errs() << "Generating body for " << naming.get(F) << "\n"); igen.visit(F); DEBUG(errs() << "\n"); // First execute static initializers, in the main procedure. if (F.hasName() && SmackOptions::isEntryPoint(F.getName())) { P->insert(Stmt::call(Naming::INITIALIZE_PROC)); } else if (naming.get(F).find(Naming::INIT_FUNC_PREFIX) == 0) rep.addInitFunc(&F); } DEBUG(errs() << "Finished analyzing function: " << naming.get(F) << "\n\n"); } // MODIFIES // ... to do below, after memory splitting is determined. } auto ds = rep.auxiliaryDeclarations(); decls.insert(decls.end(), ds.begin(), ds.end()); decls.insert(decls.end(), rep.getInitFuncs()); // NOTE we must do this after instruction generation, since we would not // otherwise know how many regions to declare. program.appendPrelude(rep.getPrelude()); std::list<Decl*> kill_list; for (auto D : program) { if (auto P = dyn_cast<ProcDecl>(D)) { if (D->getName().find(Naming::CONTRACT_EXPR) != std::string::npos) { decls.insert(decls.end(), Decl::code(P)); kill_list.push_back(P); } } } for (auto D : kill_list) decls.erase(std::remove(decls.begin(), decls.end(), D), decls.end()); }
void Z80_Exec_ED(Z80_State *state, uint8_t opcode) { uint16_t addr; uint8_t tmp; //uint16_t tmp_16; switch (opcode) { case 0x40: // in b, (c) rB = in(rBC); in_f(rB); break; case 0x41: // out (c), b out(rBC, rB); break; case 0x42: // sbc hl, bc t_states(7); sbc_16(rHL, rBC); break; case 0x43: // ld (**), bc addr = arg_16(); write_16(addr, rBC); break; case 0x44: // neg neg(); break; case 0x45: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x46: // im 0 S(IM) = 0; break; case 0x47: // ld i, a t_states(1); rI = rA; break; case 0x48: // in c, (c) rC = in(rBC); in_f(rC); break; case 0x49: // out (c), c out(rBC, rC); break; case 0x4A: // adc hl, bc t_states(7); adc_16(rHL, rBC); break; case 0x4B: // ld bc, (**) addr = arg_16(); rBC = read_16(addr); break; case 0x4C: // neg neg(); break; case 0x4D: // reti rPC = pop_16(); break; case 0x4E: // im 0/1 S(IM) = 0; break; case 0x4F: // ld r, a t_states(1); rR = rA; break; case 0x50: // in d, (c) rD = in(rBC); in_f(rD); break; case 0x51: // out (c), d out(rBC, rD); break; case 0x52: // sbc hl, de t_states(7); sbc_16(rHL, rDE); break; case 0x53: // ld (**), de addr = arg_16(); write_16(addr, rDE); break; case 0x54: // neg neg(); break; case 0x55: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x56: // im 1 S(IM) = 1; break; case 0x57: // ld a, i t_states(1); rA = rI; ld_f(rA); break; case 0x58: // in e, (c) rE = in(rBC); in_f(rE); break; case 0x59: // out (c), e out(rBC, rE); break; case 0x5A: // adc hl, de t_states(7); adc_16(rHL, rDE); break; case 0x5B: // ld de, (**) addr = arg_16(); rDE = read_16(addr); break; case 0x5C: // neg neg(); break; case 0x5D: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x5E: // im 2 S(IM) = 2; break; case 0x5F: // ld a, r t_states(1); rA = rR; ld_f(rA); break; case 0x60: // in h, (c) rH = in(rBC); in_f(rH); break; case 0x61: // out (c), h out(rBC, rH); break; case 0x62: // sbc hl, hl t_states(7); sbc_16(rHL, rHL); break; case 0x63: // ld (**), hl addr = arg_16(); write_16(addr, rHL); break; case 0x64: // neg neg(); break; case 0x65: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x66: // im 0 S(IM) = 0; break; case 0x67: // rrd tmp = read(rHL); t_states(4); rrd(tmp); write(rHL, tmp); break; case 0x68: // in l, (c) rL = in(rBC); in_f(rL); break; case 0x69: // out (c), l out(rBC, rL); break; case 0x6A: // adc hl, hl t_states(7); adc_16(rHL, rHL); break; case 0x6B: // ld hl, (**) addr = arg_16(); rHL = read_16(addr); break; case 0x6C: // neg neg(); break; case 0x6D: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x6E: // im 0/1 S(IM) = 0; break; case 0x6F: // rld tmp = read(rHL); t_states(4); rld(tmp); write(rHL, tmp); break; case 0x70: // in (c) tmp = in(rBC); in_f(tmp); break; case 0x71: // out (c), 0 out(rBC, 0); break; case 0x72: // sbc hl, sp t_states(7); sbc_16(rHL, rSP); break; case 0x73: // ld (**), sp addr = arg_16(); write_16(addr, rSP); break; case 0x74: // neg neg(); break; case 0x75: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x76: // im 1 S(IM) = 1; break; case 0x78: // in a, (c) rA = in(rBC); in_f(rA); break; case 0x79: // out (c), a out(rBC, rA); break; case 0x7A: // adc hl, sp t_states(7); adc_16(rHL, rSP); break; case 0x7B: // ld sp, (**) addr = arg_16(); rSP = read_16(addr); break; case 0x7C: // neg neg(); break; case 0x7D: // retn S(IFF1) = S(IFF2); rPC = pop_16(); break; case 0x7E: // im 2 S(IM) = 2; break; case 0xA0: // ldi tmp = read(rHL++); write(rDE++, tmp); t_states(2); rBC--; ldr_f(tmp); break; case 0xA1: // cpi tmp = read(rHL++); t_states(5); rBC--; cpr_f(tmp); break; case 0xA2: // ini t_states(1); tmp = in(rBC); write(rHL++, tmp); rB--; inir_f(tmp); break; case 0xA3: // outi t_states(1); tmp = read(rHL++); rB--; out(rBC, tmp); outr_f(tmp); break; case 0xA8: // ldd tmp = read(rHL--); write(rDE--, tmp); t_states(2); rBC--; ldr_f(tmp); break; case 0xA9: // cpd tmp = read(rHL--); t_states(5); rBC--; cpr_f(tmp); break; case 0xAA: // ind t_states(1); tmp = in(rBC); write(rHL--, tmp); rB--; indr_f(tmp); break; case 0xAB: // outd t_states(1); tmp = read(rHL--); rB--; out(rBC, tmp); outr_f(tmp); break; case 0xB0: // ldir tmp = read(rHL++); write(rDE++, tmp); t_states(2); rBC--; ldr_f(tmp); if (rBC) { t_states(5); rep(); } break; case 0xB1: // cpir tmp = read(rHL++); t_states(5); rBC--; cpr_f(tmp); if (rBC && !(rF & fZ)) { t_states(5); rep(); } break; case 0xB2: // inir t_states(1); tmp = in(rBC); write(rHL++, tmp); rB--; inir_f(tmp); if (rB) { t_states(5); rep(); } break; case 0xB3: // otir t_states(1); tmp = read(rHL++); rB--; out(rBC, tmp); outr_f(tmp); if (rB) { t_states(5); rep(); } break; case 0xB8: // lddr tmp = read(rHL--); write(rDE--, tmp); t_states(2); rBC--; ldr_f(tmp); if (rBC) { t_states(5); rep(); } break; case 0xB9: // cpdr tmp = read(rHL--); t_states(5); rBC--; cpr_f(tmp); if (rBC && !(rF & fZ)) { t_states(5); rep(); } break; case 0xBA: // indr t_states(1); tmp = in(rBC); write(rHL--, tmp); rB--; indr_f(tmp); if (rB) { t_states(5); rep(); } break; case 0xBB: // otdr t_states(1); tmp = read(rHL--); rB--; out(rBC, tmp); outr_f(tmp); if (rB) { t_states(5); rep(); } break; } }
Coord Display::width() const { return rep()->width_; }
static void solve_impl(GF2E& d, vec_GF2E& X, const mat_GF2E& A, const vec_GF2E& b, bool trans) { long n = A.NumRows(); if (A.NumCols() != n) LogicError("solve: nonsquare matrix"); if (b.length() != n) LogicError("solve: dimension mismatch"); if (n == 0) { set(d); X.SetLength(0); return; } long i, j, k, pos; GF2X t1, t2; GF2X *x, *y; const GF2XModulus& p = GF2E::modulus(); vec_GF2XVec M; M.SetLength(n); for (i = 0; i < n; i++) { M[i].SetSize(n+1, 2*GF2E::WordLength()); if (trans) for (j = 0; j < n; j++) M[i][j] = rep(A[j][i]); else for (j = 0; j < n; j++) M[i][j] = rep(A[i][j]); M[i][n] = rep(b[i]); } GF2X det; set(det); for (k = 0; k < n; k++) { pos = -1; for (i = k; i < n; i++) { rem(t1, M[i][k], p); M[i][k] = t1; if (pos == -1 && !IsZero(t1)) { pos = i; } } if (pos != -1) { if (k != pos) { swap(M[pos], M[k]); } MulMod(det, det, M[k][k], p); // make M[k, k] == -1 mod p, and make row k reduced InvMod(t1, M[k][k], p); for (j = k+1; j <= n; j++) { rem(t2, M[k][j], p); MulMod(M[k][j], t2, t1, p); } for (i = k+1; i < n; i++) { // M[i] = M[i] + M[k]*M[i,k] t1 = M[i][k]; // this is already reduced x = M[i].elts() + (k+1); y = M[k].elts() + (k+1); for (j = k+1; j <= n; j++, x++, y++) { // *x = *x + (*y)*t1 mul(t2, *y, t1); add(*x, *x, t2); } } } else { clear(d); return; } } X.SetLength(n); for (i = n-1; i >= 0; i--) { clear(t1); for (j = i+1; j < n; j++) { mul(t2, rep(X[j]), M[i][j]); add(t1, t1, t2); } add(t1, t1, M[i][n]); conv(X[i], t1); } conv(d, det); }
Coord Display::height() const { return rep()->height_; }
long gauss(mat_GF2E& M_in, long w) { long k, l; long i, j; long pos; GF2X t1, t2, t3; GF2X *x, *y; long n = M_in.NumRows(); long m = M_in.NumCols(); if (w < 0 || w > m) LogicError("gauss: bad args"); const GF2XModulus& p = GF2E::modulus(); vec_GF2XVec M; M.SetLength(n); for (i = 0; i < n; i++) { M[i].SetSize(m, 2*GF2E::WordLength()); for (j = 0; j < m; j++) { M[i][j] = rep(M_in[i][j]); } } l = 0; for (k = 0; k < w && l < n; k++) { pos = -1; for (i = l; i < n; i++) { rem(t1, M[i][k], p); M[i][k] = t1; if (pos == -1 && !IsZero(t1)) { pos = i; } } if (pos != -1) { swap(M[pos], M[l]); InvMod(t3, M[l][k], p); for (j = k+1; j < m; j++) { rem(M[l][j], M[l][j], p); } for (i = l+1; i < n; i++) { // M[i] = M[i] + M[l]*M[i,k]*t3 MulMod(t1, M[i][k], t3, p); clear(M[i][k]); x = M[i].elts() + (k+1); y = M[l].elts() + (k+1); for (j = k+1; j < m; j++, x++, y++) { // *x = *x + (*y)*t1 mul(t2, *y, t1); add(t2, t2, *x); *x = t2; } } l++; } } for (i = 0; i < n; i++) for (j = 0; j < m; j++) conv(M_in[i][j], M[i][j]); return l; }
PixelCoord Display::pwidth() const { return rep()->pwidth_; }
// The main method void RecryptData::init(const FHEcontext& context, const Vec<long>& mvec_, long t, bool consFlag, bool build_cache_, bool minimal) { if (alMod != NULL) { // were we called for a second time? cerr << "@Warning: multiple calls to RecryptData::init\n"; return; } assert(computeProd(mvec_) == (long)context.zMStar.getM()); // sanity check // Record the arguments to this function mvec = mvec_; conservative = consFlag; build_cache = build_cache_; if (t <= 0) t = defSkHwt+1; // recryption key Hwt hwt = t; long p = context.zMStar.getP(); long phim = context.zMStar.getPhiM(); long r = context.alMod.getR(); long p2r = context.alMod.getPPowR(); double logp = log((double)p); double noise = p2r * sqrt((t+1)*phim/3.0); double gamma = 2*(t+noise)/((t+1)*p2r); // ratio between numerators long logT = ceil(log((double)(t+2))/logp); // ceil(log_p(t+2)) double rho = (t+1)/pow(p,logT); if (!conservative) { // try alpha, e with this "aggresive" setting setAlphaE(alpha, e, rho, gamma, noise, logp, p2r, t); ePrime = e -r +1 -logT; // If e is too large, try again with rho/p instead of rho long bound = (1L << (context.bitsPerLevel-1)); // halfSizePrime/2 if (pow(p,e) > bound) { // try the conservative setting instead cerr << "* p^e="<<pow(p,e)<<" is too big (bound="<<bound<<")\n"; conservative = true; } } if (conservative) { // set alpha, e with a "conservative" rho/p setAlphaE(alpha, e, rho/p, gamma, noise, logp, p2r, t); ePrime = e -r -logT; } // Compute highest key-Hamming-weight that still works (not more than 256) double qOver4 = (pow(p,e)+1)/4; for (t-=10; qOver4>=lowerBound2(p,r,ePrime,t,alpha) && qOver4>=lowerBound1(p,r,ePrime,t,alpha,noise) && t<257; t++); skHwt = t-1; // First part of Bootstrapping works wrt plaintext space p^{r'} alMod = new PAlgebraMod(context.zMStar, e-ePrime+r); ea = new EncryptedArray(context, *alMod); // Polynomial defaults to F0, PAlgebraMod explicitly given p2dConv = new PowerfulDCRT(context, mvec); // Initialize the linear polynomial for unpacking the slots zz_pBak bak; bak.save(); ea->getAlMod().restoreContext(); long nslots = ea->size(); long d = ea->getDegree(); const Mat<zz_p>& CBi=ea->getDerived(PA_zz_p()).getNormalBasisMatrixInverse(); vector<ZZX> LM; LM.resize(d); for (long i = 0; i < d; i++) // prepare the linear polynomial LM[i] = rep(CBi[i][0]); vector<ZZX> C; ea->buildLinPolyCoeffs(C, LM); // "build" the linear polynomial unpackSlotEncoding.resize(d); // encode the coefficients for (long j = 0; j < d; j++) { vector<ZZX> v(nslots); for (long k = 0; k < nslots; k++) v[k] = C[j]; ea->encode(unpackSlotEncoding[j], v); } firstMap = new EvalMap(*ea, minimal, mvec, true, build_cache); secondMap = new EvalMap(*context.ea, minimal, mvec, false, build_cache); }
void ATL_USERMM (const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) { const float *stM; const float *stN; const int incCn = (ldc) - NB; float *pC0; const float *pA0; const float *pB0; const float *pB1; #ifdef BETAX static vector locbeta; #endif #define First12As(i, ib, ic) \ align(); \ vec_mov_mr(&(pBX[ ib + 0 - BBIASX]), reg7); \ rep(); vec_mov_mr(&(pA0[0*KB + 0 + i - ABIAS]), reg4); \ vec_mul_rr(reg7, reg4); \ \ rep(); vec_mov_mr(&(pA0[1*KB + 0 + i - ABIAS]), reg5); \ vec_mul_rr(reg7, reg5); \ vec_acc_rr(reg5, reg4); \ \ rep(); vec_mov_mr(&(pA0[2*KB + 0 + i - ABIAS]), reg5); \ vec_mul_rr(reg7, reg5); \ vec_mov_mr(&(pBX[ ib + 2 - BBIASX]), reg3); \ \ rep(); vec_mov_mr(&(pA0[3*KB + 0 + i - ABIAS]), reg6); \ vec_mul_rr(reg7, reg6); \ vec_acc_rr(reg6, reg5); \ \ rep(); vec_mov_mr(&(pA0[4*KB + 0 + i - ABIAS]), reg6); \ vec_mul_rr(reg7, reg6); \ ATL_pfl1W(&(pC0[6*(ic) + 4 - CBIAS])); \ \ vec_mul_mr(&(pA0[5*KB + 0 + i - ABIAS]), reg7); \ rep(); vec_mov_mr(&(pA0[0*KB + 2 + i - ABIAS]), reg0); \ \ vec_acc_rr(reg7, reg6); \ vec_mul_rr(reg3, reg0); \ rep(); vec_mov_mr(&(pA0[1*KB + 2 + i - ABIAS]), reg7); \ \ vec_mul_rr(reg3, reg7); \ vec_acc_rr(reg7, reg0); \ rep(); vec_mov_mr(&(pA0[2*KB + 2 + i - ABIAS]), reg1); \ \ vec_mul_rr(reg3, reg1); \ rep(); vec_mov_mr(&(pA0[3*KB + 2 + i - ABIAS]), reg7); \ vec_mul_rr(reg3, reg7); \ \ vec_acc_rr(reg7, reg1); \ rep(); vec_mov_mr(&(pA0[4*KB + 2 + i - ABIAS]), reg2); \ vec_mul_rr(reg3, reg2); \ \ vec_mul_mr(&(pA0[5*KB + 2 + i - ABIAS]), reg3); \ vec_acc_rr(reg3, reg2); \ vec_mov_mr(&(pBX[ ib + 4 - BBIASX]), reg7); \ \ ; #define Do6As(i, ib) \ align(); \ vec_add_rr(reg4, reg0); \ rep(); vec_mov_mr(&(pA0[0*KB + i - ABIAS]), reg4); \ vec_mul_rr(reg7, reg4); \ \ rep(); vec_mov_mr(&(pA0[1*KB + i - ABIAS]), reg3); \ vec_mul_rr(reg7, reg3); \ vec_acc_rr(reg3, reg4); \ \ vec_add_rr(reg5, reg1); \ rep(); vec_mov_mr(&(pA0[2*KB + i - ABIAS]), reg5); \ vec_mul_rr(reg7, reg5); \ \ rep(); vec_mov_mr(&(pA0[3*KB + i - ABIAS]), reg3); \ vec_mul_rr(reg7, reg3); \ vec_acc_rr(reg3, reg5); \ \ vec_add_rr(reg6, reg2); \ rep(); vec_mov_mr(&(pA0[4*KB + i - ABIAS]), reg6); \ vec_mul_rr(reg7, reg6); \ \ vec_mul_mr(&(pA0[5*KB + i - ABIAS]), reg7); \ vec_acc_rr(reg7, reg6); \ vec_mov_mr(&(pBX[ ib + 2 - BBIASX]), reg7); \ \ ; #define Last6As(i, ib) \ align(); \ vec_add_rr(reg4, reg0); \ rep(); vec_mov_mr(&(pA0[0*KB + i - ABIAS]), reg4); \ vec_mul_rr(reg7, reg4); \ \ rep(); vec_mov_mr(&(pA0[1*KB + i - ABIAS]), reg3); \ vec_mul_rr(reg7, reg3); \ vec_acc_rr(reg3, reg4); \ \ vec_add_rr(reg5, reg1); \ rep(); vec_mov_mr(&(pA0[2*KB + i - ABIAS]), reg5); \ vec_mul_rr(reg7, reg5); \ \ rep(); vec_mov_mr(&(pA0[3*KB + i - ABIAS]), reg3); \ vec_mul_rr(reg7, reg3); \ vec_acc_rr(reg3, reg5); \ \ vec_add_rr(reg6, reg2); \ rep(); vec_mov_mr(&(pA0[4*KB + i - ABIAS]), reg6); \ vec_mul_rr(reg7, reg6); \ \ vec_mul_mr(&(pA0[5*KB + i - ABIAS]), reg7); \ vec_acc_rr(reg7, reg6); \ vec_add_rr(reg4, reg0); \ \ ; #ifdef BETA0 #define StoreResults(ic) \ align(); \ vec_add_rr(reg5, reg1); \ vec_add_rr(reg6, reg2); \ vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \ vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \ \ vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \ ; #elif defined(BETA1) #define StoreResults(ic) \ align(); \ rep(); vec_add_mr(&(pC0[6*(ic) + 0 - CBIAS]), reg0); \ rep(); vec_add_mr(&(pC0[6*(ic) + 2 - CBIAS]), reg1); \ vec_add_rr(reg5, reg1); \ \ rep(); vec_add_mr(&(pC0[6*(ic) + 4 - CBIAS]), reg2); \ rep(); vec_add_rr(reg6, reg2); \ rep(); vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \ \ vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \ vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \ ; #else #define StoreResults(ic) \ align(); \ rep(); vec_mov_mr(locbeta, reg3); \ vec_mov_mr(&(pC0[6*(ic) + 0 - CBIAS]), reg7); \ vec_mov_mr(&(pC0[6*(ic) + 2 - CBIAS]), reg4); \ \ rep(); vec_mul_rr(reg3, reg7); \ rep(); vec_mul_rr(reg3, reg4); \ rep(); vec_mul_mr(&(pC0[6*(ic) + 4 - CBIAS]), reg3); \ \ vec_add_rr(reg7, reg0); \ vec_add_rr(reg5, reg1); \ vec_add_rr(reg6, reg2); \ vec_add_rr(reg4, reg1); \ \ vec_add_rr(reg3, reg2); \ vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \ vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \ vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \ \ ; #endif #define FirstSteps(i, ib, stage) \ First12As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6), stage) \ Do6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \ ; #define Steps(i, ib, stage) \ Do6As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6)) \ Do6As((((i) - 4) + ((stage - 0) * PA0_INC)), ((ib) - 4)) \ Do6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \ ; #define LastSteps(i, ib, stage) \ Do6As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6)) \ Do6As((((i) - 4) + ((stage - 0) * PA0_INC)), ((ib) - 4)) \ Last6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \ ; vec_enter(); #ifdef BETAX vec_splat(&beta, reg3); vec_mov_rm(reg3, locbeta); #endif { /* block "prefetch" the A submatrix */ register const long long *pAd=(const long long *) A; register const long long *pAe=pAd + (506*8); align(); do { rep(); vec_mov_mr(&(pAd[ 0]), reg0); vec_mov_mr(&(pAd[ 8]), reg1); rep(); pAd += 8*2; } while (pAd != pAe); } stM = A + NB*NB + ABIAS; stN = B + NB*NB + BBIAS0; pC0=C + CBIAS; pA0=A + ABIAS; pB0=B + BBIAS0; pB1=B + BBIAS1; /* block "prefetch" some of the B submatrix */ align(); vec_mov_mr(&(pB0[ 0 - BBIAS0]), reg0); vec_mov_mr(&(pB0[ 16 - BBIAS0]), reg1); rep(); vec_mov_mr(&(pB1[ 32 - BBIAS1]), reg2); rep(); vec_mov_mr(&(pB1[ 48 - BBIAS1]), reg3); vec_mov_mr(&(pB1[ 64 - BBIAS1]), reg4); vec_mov_mr(&(pB1[ 80 - BBIAS1]), reg5); vec_mov_mr(&(pB1[ 96 - BBIAS1]), reg6); vec_mov_mr(&(pB1[112 - BBIAS1]), reg7); rep(); vec_mov_mr(&(pB0[128 - BBIAS0]), reg0); rep(); vec_mov_mr(&(pB1[144 - BBIAS1]), reg1); rep(); vec_mov_mr(&(pB1[160 - BBIAS1]), reg2); rep(); vec_mov_mr(&(pB1[176 - BBIAS1]), reg3); rep(); vec_mov_mr(&(pB1[192 - BBIAS1]), reg4); align(); do { /* N-loop */ rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 0])); rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 16])); align(); do { /* M-loop */ #undef pBX #undef BBIASX #define pBX pB0 #define BBIASX BBIAS0 FirstSteps(6, 6, 0); Steps(12, 12, 0); Steps(18, 18, 0); Steps(24, 24, 0); Steps(30, 30, 0); Steps(36, 36, 0); Steps(42, 42, 0); Steps(48, 48, 0); Steps(54, 54, 0); Steps(60, 60, 0); #undef pBX #undef BBIASX #define pBX pB1 #define BBIASX BBIAS1 Steps(66, 66, 0); Steps(72, 72, 0); Steps(78, 78, 0); Steps(84, 84, 0); LastSteps(90, 90, 0); StoreResults(0); alignN("8"); rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 32])); #undef pBX #undef BBIASX #define pBX pB0 #define BBIASX BBIAS0 FirstSteps(6, 6, 1); Steps(12, 12, 1); Steps(18, 18, 1); Steps(24, 24, 1); Steps(30, 30, 1); Steps(36, 36, 1); Steps(42, 42, 1); Steps(48, 48, 1); Steps(54, 54, 1); Steps(60, 60, 1); #undef pBX #undef BBIASX #define pBX pB1 #define BBIASX BBIAS1 Steps(66, 66, 1); Steps(72, 72, 1); Steps(78, 78, 1); Steps(84, 84, 1); LastSteps(90, 90, 1); StoreResults(1); alignN("8"); rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 48])); #undef pBX #undef BBIASX #define pBX pB0 #define BBIASX BBIAS0 FirstSteps(6, 6, 2); Steps(12, 12, 2); Steps(18, 18, 2); Steps(24, 24, 2); Steps(30, 30, 2); Steps(36, 36, 2); Steps(42, 42, 2); Steps(48, 48, 2); Steps(54, 54, 2); Steps(60, 60, 2); #undef pBX #undef BBIASX #define pBX pB1 #define BBIASX BBIAS1 Steps(66, 66, 2); Steps(72, 72, 2); Steps(78, 78, 2); Steps(84, 84, 2); LastSteps(90, 90, 2); rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 64])); rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 80])); pA0 += PA0_INC*3; rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 88])); StoreResults(2); pC0 += 6*3; #if (NB != 90) #error "NB must be 90" #endif } while(pA0 != stM); pB0 += NB; pB1 += NB; pC0 += incCn; pA0 -= NB*NB; } while(pB0 != stN); vec_exit(); }