示例#1
0
文件: READ.CPP 项目: jbailhache/log
DEM read_dem ()
{
int c;
DEM f, a, b, d, x;
int i;
char buf[200];
DEM s;
DEM d1;
int flags1;
DEM used1;
extern DEM used;
loop:
	do c = readchar ();
	while (c==' ' || c=='\t' || c=='\n' || c==0);

	switch (c)
	{
		case 'I': return I;
		case 'K': return K;
		case 'S': return S;
		case 'E': return E;
		case 'F': return If;
		case 'O': return Ord;

		case '-':
			f = read_dem ();
			a = read_dem ();
			return ap (f, a);

		case '/':
			a = read_dem ();
			b = read_dem ();
			return transym (a, b);

                case 'T':
                        a = read_dem ();
                        b = read_dem ();
                        return trans (a, b);

                case 'X':
                        a = read_dem ();
                        return sym (a);

                case '#':
                        a = read_dem ();
                        b = read_dem ();
                        return Axm (a, b);

		case 'i':
			a = read_dem ();
			return defI (a);

		case 'k':
			a = read_dem ();
			b = read_dem ();
			return defK (a, b);

		case 's':
			a = read_dem ();
			b = read_dem ();
                        d = read_dem ();
                        return defS (a, b, d);           

                case ')':
                        a = read_dem ();
                        b = read_dem ();
                        return IfNode (a, b);

                case '1': return Ext1;
                case '2': return Ext2;
                case '3': return Ext3;
                case '4': return Ext4;
                case '5': return Ext5;
                case '6': return Ext6;

		case 'e': return AE;
                case 'f': return EA0;
                        /*
                        a = read_dem ();        
                        return EA (a);
                        */
                case 'm': return MP;
                case 'a': return AI;
                case 'b': return AK;
                case 'c': return AS;
                case 'r': return RPA;

                case '0': return ZeroIsOrd;
                case '+': return SucIsOrd;
                case 'w': return LimIsOrd;
                case 'p': return PredIsOrd;
                case 'n': return StepIsOrd;
                case 'W': return TfI;                

                case '<':
                        a = read_dem ();
                        return left (a);

                case '>':
                        a = read_dem ();
                        return right (a);

                case '\'':
                        a = read_dem ();
                        return rep(a);

		case '%':
/*printf ("*1*");*/
			a = read_dem ();
/*printf ("*2*");*/
                        trace_dem ("read", a);
/*printf ("*3*");*/
                        b = red (a);
/*printf ("*4*");*/
                        trace_dem ("red", b);
                        return b;
			/* return red (a); */
            
                case 'R':
                        a = read_dem ();
                        return red1 (a, 0);

                case '@':
                        a = read_dem ();
                        return reduc (a, 1);
         
		case '~':
			a = read_dem ();
			return reduc (a, 0);

		case '$':
			a = read_dem ();
			return redu (a);

                case 'x':
			a = read_dem ();
			b = read_dem ();
			return ext (a, b);

                case '\\':
			a = read_dem ();
			b = read_dem ();
                        trace_dem ("^(0)", a);
                        trace_dem ("^(1)", b);
                        d = exten (a, b);
                        trace_dem ("^(r)", d);
                        return d;

                case ']':
                	a = read_dem ();
                        b = read_dem ();
                        d = dbextens (a, b);
                        return d;

                case 'l':
                        a = read_dem ();
                        b = read_dem ();
                        return Ext (a, b);
                        /* return Lambda (a, b); */

                case 'L':
                        a = read_dem ();
                        b = read_dem ();
                        return Lambda (a, b);

                case '.':
                        a = read_dem ();
                        return DBLambda (a);

                case '!':
                        a = read_dem ();
                        b = read_dem ();
                        return DB_lambda (a, b);
                        /* return DBLambda (DBname (0, a, b)); */

                case '?':
                        a = read_dem ();
                        b = read_dem ();
                        return DB_Subst (a, b);

                case '_':
                        a = read_dem ();
                        b = read_dem ();
                        d = read_dem ();
                        return Subst (a, b, d);

                case ':':
                        a = read_dem ();
                        b = read_dem ();
                        d = read_dem ();
                        return ap (exten(a,d) ,b);

                case 'V':
                        x = read_dem ();
                        d = read_dem ();
                        a = mk_dem (node(d), 0, NULL,
                                DB_lambda (x, subdem(0,d)),
                                DB_lambda (x, subdem(1,d)),
                                subdem(2,d) == NULL ? NULL :
                                DB_lambda (x, subdem(2,d)),
                                NULL, NULL, NULL);
                        return a;

                case 'A':
                        x = read_dem ();
                        d = read_dem ();
                        a = mk_dem (node(d), 0, NULL,
                                ap (x, subdem(0,d)),
                                ap (x, subdem(1,d)),
                                subdem(2,d) == NULL ? NULL :
                                ap (x, subdem(2,d)),
                                NULL, NULL, NULL);
                        return a;


                case '"':
                        a = read_dem ();
                        /* return NoRed (a); */
                        no_red[nnr++] = a;
                        return a;

                case '|':
                     a = read_dem ();
                     no_red[nnr++] = a;
                     b = read_dem ();
                     return b;

                case 'u':
                	used1 = used;
                    used = read_dem ();
                    a = read_dem ();
                    used = used1;
                    return a;
                    
                case '(':
                        flags1 = flags;
                        i = 0;
                        for (;;)
                        {
                            c = readchar ();
                            if (c == ')')
                                break;
                            buf[i++] = c;
                        }
                        buf[i] = 0;
                        sscanf (buf, "%x", &flags);
                        a = read_dem ();
                        if ((flags & FLAG_PERM) == 0)
                            flags = flags1;
                        return a;

                case ',':
                        a = read_dem ();
                        return step (a);

                case '*':
                        a = read_dem ();
                        return rstep (a);
                
                case '`':
                        a = read_dem ();
                        return list_ap (a, nil);

                case '&':
                        c = readchar ();
                        switch (c)
                        {
                                case '/': return itransym;
                                case 'i': return idefI;
                                case 'k': return idefK;
                                case 's': return idefS;
                                case '<': return ileft;
                                case '>': return iright;
                                case '=': return ieq;
                                case '#': return inode;
                                case '0': return isubdem0;
                                case '1': return isubdem1;
                                case '2': return isubdem2;
                                case '%': return ired;
                                case '$': return iredu;
                                case '\\': return iext;
                                case ',': return istep;
                                case '*': return irstep;
                                default:
                                        fprintf (stderr, "Undefined &%c.\n",
                                                 c);
                                        return I;
                        }
                        break;

                    case '[':
                        /* trace_dem ("read symbol", I); */
                        for (i=0; i<sizeof(buf); i++)
                        {
                            c = readchar();
                            if (c == ']')
                            {
                                buf[i] = 0;
#ifdef TRACE1
                                printf ("buf=<%s>\n", buf);
#endif
                                if (buf[0] >= '0' && buf[0] <= '9')
                                {
#ifdef TRACE
                                    printf ("\nDBVar <%s>", buf);
#endif
                                    d1 = DBVar (atoi(buf));
                                    trace_dem ("", d);
                                    return d1;
                                }
                                s = Sym(buf);
#ifdef TRACE1
                                trace_dem ("read symbol", s);
#endif
                                if (subdem(0,s) == NULL)
                                {
#ifdef TRACE1
                                    trace_dem ("return symbol", s);
#endif
                                    return s;
                                }
                                else
                                {
#ifdef TRACE
                                    trace_dem ("return value of", s);
#endif
                                    return subdem(0,s);
                                }

                            }
                            buf[i] = c;
                        }
                        fprintf (stderr, "Symbol too long\n");
                        return Sym(buf);

		default:
                        return defined_dems[(unsigned char)c];
                        /*
                        printf ("Illegal character 0x%02X\n", c);
			goto loop;
                        */
	}

}
void inv(GF2E& d, mat_GF2E& X, const mat_GF2E& A)
{
   long n = A.NumRows();
   if (A.NumCols() != n)
      LogicError("inv: nonsquare matrix");

   if (n == 0) {
      set(d);
      X.SetDims(0, 0);
      return;
   }

   long i, j, k, pos;
   GF2X t1, t2;
   GF2X *x, *y;

   const GF2XModulus& p = GF2E::modulus();

   vec_GF2XVec M;

   M.SetLength(n);

   for (i = 0; i < n; i++) {
      M[i].SetSize(2*n, 2*GF2E::WordLength());
      for (j = 0; j < n; j++) {
         M[i][j] = rep(A[i][j]);
         clear(M[i][n+j]);
      }
      set(M[i][n+i]);
   }

   GF2X det;
   set(det);

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         rem(t1, M[i][k], p);
         M[i][k] = t1;
         if (pos == -1 && !IsZero(t1)) {
            pos = i;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
         }

         MulMod(det, det, M[k][k], p);

         // make M[k, k] == -1 mod p, and make row k reduced

         InvMod(t1, M[k][k], p);
         for (j = k+1; j < 2*n; j++) {
            rem(t2, M[k][j], p);
            MulMod(M[k][j], t2, t1, p);
         }

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] + M[k]*M[i,k]

            t1 = M[i][k];   // this is already reduced

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            for (j = k+1; j < 2*n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               mul(t2, *y, t1);
               add(*x, *x, t2);
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   X.SetDims(n, n);
   for (k = 0; k < n; k++) {
      for (i = n-1; i >= 0; i--) {
         clear(t1);
         for (j = i+1; j < n; j++) {
            mul(t2, rep(X[j][k]), M[i][j]);
            add(t1, t1, t2);
         }
         add(t1, t1, M[i][n+k]);
         conv(X[i][k], t1);
      }
   }

   conv(d, det);
}
void kernel(mat_GF2E& X, const mat_GF2E& A)
{
   long m = A.NumRows();
   long n = A.NumCols();

   mat_GF2E M;
   long r;

   transpose(M, A);
   r = gauss(M);

   X.SetDims(m-r, m);

   long i, j, k, s;
   GF2X t1, t2;

   GF2E T3;

   vec_long D;
   D.SetLength(m);
   for (j = 0; j < m; j++) D[j] = -1;

   vec_GF2E inverses;
   inverses.SetLength(m);

   j = -1;
   for (i = 0; i < r; i++) {
      do {
         j++;
      } while (IsZero(M[i][j]));

      D[j] = i;
      inv(inverses[j], M[i][j]); 
   }

   for (k = 0; k < m-r; k++) {
      vec_GF2E& v = X[k];
      long pos = 0;
      for (j = m-1; j >= 0; j--) {
         if (D[j] == -1) {
            if (pos == k)
               set(v[j]);
            else
               clear(v[j]);
            pos++;
         }
         else {
            i = D[j];

            clear(t1);

            for (s = j+1; s < m; s++) {
               mul(t2, rep(v[s]), rep(M[i][s]));
               add(t1, t1, t2);
            }

            conv(T3, t1);
            mul(T3, T3, inverses[j]);
            v[j] = T3;
         }
      }
   }
}
 hasher(string s, int _m) : m(_m), h(size(s)+1), p(size(s)+1) {
     p[0] = 1; h[0] = 0;
     rep(i,0,size(s)) p[i+1] = (ll)p[i] * b % m;
     rep(i,0,size(s)) h[i+1] = ((ll)h[i] * b + s[i]) % m; }
void determinant(GF2E& d, const mat_GF2E& M_in)
{
   long k, n;
   long i, j;
   long pos;
   GF2X t1, t2;
   GF2X *x, *y;

   const GF2XModulus& p = GF2E::modulus();

   n = M_in.NumRows();

   if (M_in.NumCols() != n)
      LogicError("determinant: nonsquare matrix");

   if (n == 0) {
      set(d);
      return;
   }

   vec_GF2XVec M;

   M.SetLength(n);
   for (i = 0; i < n; i++) {
      M[i].SetSize(n, 2*GF2E::WordLength());
      for (j = 0; j < n; j++)
         M[i][j] = rep(M_in[i][j]);
   }

   GF2X det;
   set(det);

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         rem(t1, M[i][k], p);
         M[i][k] = t1;
         if (pos == -1 && !IsZero(t1))
            pos = i;
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
         }

         MulMod(det, det, M[k][k], p);

         // make M[k, k] == -1 mod p, and make row k reduced

         InvMod(t1, M[k][k], p);
         for (j = k+1; j < n; j++) {
            rem(t2, M[k][j], p);
            MulMod(M[k][j], t2, t1, p);
         }

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] + M[k]*M[i,k]

            t1 = M[i][k];   // this is already reduced

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            for (j = k+1; j < n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               mul(t2, *y, t1);
               add(*x, *x, t2);
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   conv(d, det);
}
示例#6
0
文件: display.cpp 项目: PNCG/neuron
Coord Display::a_height() const {
    DisplayRep& d = *rep();
    return d.height_;
}
示例#7
0
文件: display.cpp 项目: PNCG/neuron
Display::~Display()
{
    DisplayRep* d = rep();
    //Resource::unref_deferred(d->style_);
    delete d;
}
示例#8
0
long gauss(mat_zz_p& M, long w)
{
   long k, l;
   long i, j;
   long pos;
   zz_p t1, t2, t3;
   zz_p *x, *y;

   long n = M.NumRows();
   long m = M.NumCols();

   if (w < 0 || w > m)
      LogicError("gauss: bad args");

   long p = zz_p::modulus();
   mulmod_t pinv = zz_p::ModulusInverse();
   long T1, T2;

   l = 0;
   for (k = 0; k < w && l < n; k++) {

      pos = -1;
      for (i = l; i < n; i++) {
         if (!IsZero(M[i][k])) {
            pos = i;
            break;
         }
      }

      if (pos != -1) {
         swap(M[pos], M[l]);

         inv(t3, M[l][k]);
         negate(t3, t3);

         for (i = l+1; i < n; i++) {
            // M[i] = M[i] + M[l]*M[i,k]*t3

            mul(t1, M[i][k], t3);

            T1 = rep(t1);
            mulmod_precon_t T1pinv = PrepMulModPrecon(T1, p, pinv); 

            clear(M[i][k]);

            x = M[i].elts() + (k+1);
            y = M[l].elts() + (k+1);

            for (j = k+1; j < m; j++, x++, y++) {
               // *x = *x + (*y)*t1

               T2 = MulModPrecon(rep(*y), T1, p, T1pinv);
               T2 = AddMod(T2, rep(*x), p);
               (*x).LoopHole() = T2;
            }
         }

         l++;
      }
   }

   return l;
}
示例#9
0
文件: mat_ZZ_p.c 项目: axelexic/NTL
void solve(ZZ_p& d, vec_ZZ_p& X, 
           const mat_ZZ_p& A, const vec_ZZ_p& b)

{
   long n = A.NumRows();
   if (A.NumCols() != n)
      LogicError("solve: nonsquare matrix");

   if (b.length() != n)
      LogicError("solve: dimension mismatch");

   if (n == 0) {
      set(d);
      X.SetLength(0);
      return;
   }

   long i, j, k, pos;
   ZZ t1, t2;
   ZZ *x, *y;

   const ZZ& p = ZZ_p::modulus();

   vec_ZZVec M;
   sqr(t1, p);
   mul(t1, t1, n);

   M.SetLength(n);

   for (i = 0; i < n; i++) {
      M[i].SetSize(n+1, t1.size());
      for (j = 0; j < n; j++) 
         M[i][j] = rep(A[j][i]);
      M[i][n] = rep(b[i]);
   }

   ZZ det;
   set(det);

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         rem(t1, M[i][k], p);
         M[i][k] = t1;
         if (pos == -1 && !IsZero(t1)) {
            pos = i;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
            NegateMod(det, det, p);
         }

         MulMod(det, det, M[k][k], p);

         // make M[k, k] == -1 mod p, and make row k reduced

         InvMod(t1, M[k][k], p);
         NegateMod(t1, t1, p);
         for (j = k+1; j <= n; j++) {
            rem(t2, M[k][j], p);
            MulMod(M[k][j], t2, t1, p);
         }

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] + M[k]*M[i,k]

            t1 = M[i][k];   // this is already reduced

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            for (j = k+1; j <= n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               mul(t2, *y, t1);
               add(*x, *x, t2);
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   X.SetLength(n);
   for (i = n-1; i >= 0; i--) {
      clear(t1);
      for (j = i+1; j < n; j++) {
         mul(t2, rep(X[j]), M[i][j]);
         add(t1, t1, t2);
      }
      sub(t1, t1, M[i][n]);
      conv(X[i], t1);
   }

   conv(d, det);
}
示例#10
0
static 
void mul_aux(mat_zz_p& X, const mat_zz_p& A, const mat_zz_p& B)  
{  
   long n = A.NumRows();  
   long l = A.NumCols();  
   long m = B.NumCols();  
  
   if (l != B.NumRows())  
      LogicError("matrix mul: dimension mismatch");  
  
   X.SetDims(n, m); 

   if (m > 1) {  // new preconditioning code

      long p = zz_p::modulus();
      mulmod_t pinv = zz_p::ModulusInverse();

      
      vec_long::Watcher watch_mul_aux_vec(mul_aux_vec);
      mul_aux_vec.SetLength(m);
      long *acc = mul_aux_vec.elts();

      long i, j, k;

      for (i = 0; i < n; i++) {
         const zz_p* ap = A[i].elts();

         for (j = 0; j < m; j++) acc[j] = 0;

         for (k = 0;  k < l; k++) {   
            long aa = rep(ap[k]);
            if (aa != 0) {
               const zz_p* bp = B[k].elts();
               long T1;
               mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv);

               for (j = 0; j < m; j++) {
        	  T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv);
        	  acc[j] = AddMod(acc[j], T1, p);
               } 
            }
         }

         zz_p *xp = X[i].elts();
         for (j = 0; j < m; j++)
            xp[j].LoopHole() = acc[j];    
      }
   }
   else {  // just use the old code, w/o preconditioning

      long p = zz_p::modulus();
      mulmod_t pinv = zz_p::ModulusInverse();

      long i, j, k;  
      long acc, tmp;  

      for (i = 1; i <= n; i++) {  
	 for (j = 1; j <= m; j++) {  
            acc = 0;  
            for(k = 1; k <= l; k++) {  
               tmp = MulMod(rep(A(i,k)), rep(B(k,j)), p, pinv);  
               acc = AddMod(acc, tmp, p);  
            }  
            X(i,j).LoopHole() = acc;  
	 } 
      }
  
   }
}  
示例#11
0
void inv(zz_p& d, mat_zz_p& X, const mat_zz_p& A)
{
   long n = A.NumRows();
   if (A.NumCols() != n)
      LogicError("inv: nonsquare matrix");

   if (n == 0) {
      set(d);
      X.SetDims(0, 0);
      return;
   }

   long i, j, k, pos;
   zz_p t1, t2, t3;
   zz_p *x, *y;

   mat_zz_p M;
   M.SetDims(n, 2*n);
   for (i = 0; i < n; i++) {
      for (j = 0; j < n; j++) {
         M[i][j] = A[i][j];
         clear(M[i][n+j]);
      }
      set(M[i][n+i]);
   }

   zz_p det;
   set(det);

   long p = zz_p::modulus();
   mulmod_t pinv = zz_p::ModulusInverse();

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         if (!IsZero(M[i][k])) {
            pos = i;
            break;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
            negate(det, det);
         }

         mul(det, det, M[k][k]);

         inv(t3, M[k][k]);
         M[k][k] = t3;

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] - M[k]*M[i,k]*t3

            mul(t1, M[i][k], t3);
            negate(t1, t1);

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            long T1 = rep(t1);
            mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv;
            long T2;

            for (j = k+1; j < 2*n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               T2 = MulModPrecon(rep(*y), T1, p, t1pinv);
               x->LoopHole() = AddMod(rep(*x), T2, p);
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   X.SetDims(n, n);
   for (k = 0; k < n; k++) {
      for (i = n-1; i >= 0; i--) {
         clear(t1);
         for (j = i+1; j < n; j++) {
            mul(t2, X[j][k], M[i][j]);
            add(t1, t1, t2);
         }
         sub(t1, M[i][n+k], t1);
         mul(X[i][k], t1, M[i][i]);
      }
   }

   d = det;
}
示例#12
0
void determinant(zz_p& d, const mat_zz_p& M_in)
{
   long k, n;
   long i, j;
   long pos;
   zz_p t1, t2, t3;
   zz_p *x, *y;

   mat_zz_p M;
   M = M_in;

   n = M.NumRows();

   if (M.NumCols() != n)
      LogicError("determinant: nonsquare matrix");

   if (n == 0) {
      set(d);
      return;
   }

   zz_p det;

   set(det);

   long p = zz_p::modulus();
   mulmod_t pinv = zz_p::ModulusInverse();

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         if (!IsZero(M[i][k])) {
            pos = i;
            break;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
            negate(det, det);
         }

         mul(det, det, M[k][k]);

         inv(t3, M[k][k]);

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] - M[k]*M[i,k]*t3

            mul(t1, M[i][k], t3);
            negate(t1, t1);

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            long T1 = rep(t1);
            mulmod_precon_t t1pinv = PrepMulModPrecon(T1, p, pinv); // T1*pinv; 
            long T2;

            for (j = k+1; j < n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               T2 = MulModPrecon(rep(*y), T1, p, t1pinv);
               x->LoopHole() = AddMod(rep(*x), T2, p); 
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   d = det;
}
示例#13
0
void mul(vec_zz_p& x, const vec_zz_p& a, const mat_zz_p& B)
{
   long l = a.length();
   long m = B.NumCols();
  
   if (l != B.NumRows())  
      LogicError("matrix mul: dimension mismatch");  

   if (m == 0) { 

      x.SetLength(0);
      
   }
   else if (m == 1) {

      long p = zz_p::modulus();
      mulmod_t pinv = zz_p::ModulusInverse();

      long acc, tmp;
      long k;

      acc = 0;  
      for(k = 1; k <= l; k++) {  
         tmp = MulMod(rep(a(k)), rep(B(k,1)), p, pinv);  
         acc = AddMod(acc, tmp, p);  
      } 

      x.SetLength(1);
      x(1).LoopHole()  = acc;
          
   }
   else {  // m > 1.  precondition


      long p = zz_p::modulus();
      mulmod_t pinv = zz_p::ModulusInverse();

      vec_long::Watcher watch_mul_aux_vec(mul_aux_vec);
      mul_aux_vec.SetLength(m);
      long *acc = mul_aux_vec.elts();

      long j, k;


      const zz_p* ap = a.elts();

      for (j = 0; j < m; j++) acc[j] = 0;

      for (k = 0;  k < l; k++) {
         long aa = rep(ap[k]);
         if (aa != 0) {
            const zz_p* bp = B[k].elts();
            long T1;
            mulmod_precon_t aapinv = PrepMulModPrecon(aa, p, pinv);

            for (j = 0; j < m; j++) {
               T1 = MulModPrecon(rep(bp[j]), aa, p, aapinv);
               acc[j] = AddMod(acc[j], T1, p);
            }
         } 
      }

      x.SetLength(m);
      zz_p *xp = x.elts();
      for (j = 0; j < m; j++)
         xp[j].LoopHole() = acc[j];    
   }
}
示例#14
0
static void installFunctions(malEnvPtr env) {
    for (auto &function : malFunctionTable) {
        rep(function, env);
    }
}
示例#15
0
文件: display.cpp 项目: PNCG/neuron
PixelCoord Display::pheight() const {
    return rep()->pheight_;
}
示例#16
0
 void debug() {
     printf("%d %d\n", n, m);
     rep(i,n) {
         rep(j,m) cout << a[i][j] << " ";
         puts("");
     }
示例#17
0
文件: display.cpp 项目: PNCG/neuron
Coord Display::a_width() const
{
    DisplayRep& d = *rep();
    return d.width_;
}
示例#18
0
文件: CModulus.cpp 项目: dwu4/fhe-si
inline void conv(NTL::vec_long& to, NTL::vec_zz_p& from)
{
  to.SetLength(from.length());
  for (long i=0; i<from.length(); i++) to[i]=rep(from[i]);
}
示例#19
0
文件: display.cpp 项目: PNCG/neuron
Style* Display::style() const
{
    return rep()->style_;
}
void init_is_square() {
    rep(i,0,64) M |= 1ULL << (63-(i*i)%64); }
示例#21
0
//-------------------------------------------
//  i/o operation (octet string)
//-------------------------------------------
void bn254_fp2_to_mpz(mpz_t a, const Element x)
{
    mpz_set(a, rep(rep0(x)));   // a = rep0
    mpz_addmul(a, rep(rep1(x)), field(x)->base->order);   //a = a + rep1*p
}
示例#22
0
void SmackModuleGenerator::generateProgram(llvm::Module& M) {

  Naming naming;
  SmackRep rep(&M.getDataLayout(), naming, program, getAnalysis<Regions>());
  std::list<Decl*>& decls = program.getDeclarations();

  DEBUG(errs() << "Analyzing globals...\n");

  for (auto& G : M.globals()) {
    auto ds = rep.globalDecl(&G);
    decls.insert(decls.end(), ds.begin(), ds.end());
  }

  DEBUG(errs() << "Analyzing functions...\n");

  for (auto& F : M) {

    // Reset the counters for per-function names
    naming.reset();

    DEBUG(errs() << "Analyzing function: " << naming.get(F) << "\n");

    auto ds = rep.globalDecl(&F);
    decls.insert(decls.end(), ds.begin(), ds.end());

    auto procs = rep.procedure(&F);
    assert(procs.size() > 0);

    if (naming.get(F) != Naming::DECLARATIONS_PROC)
      decls.insert(decls.end(), procs.begin(), procs.end());

    if (F.isDeclaration())
      continue;

    if (!F.empty() && !F.getEntryBlock().empty()) {
      DEBUG(errs() << "Analyzing function body: " << naming.get(F) << "\n");

      for (auto P : procs) {
        SmackInstGenerator igen(getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(), rep, *P, naming);
        DEBUG(errs() << "Generating body for " << naming.get(F) << "\n");
        igen.visit(F);
        DEBUG(errs() << "\n");

        // First execute static initializers, in the main procedure.
        if (F.hasName() && SmackOptions::isEntryPoint(F.getName())) {
          P->insert(Stmt::call(Naming::INITIALIZE_PROC));

        } else if (naming.get(F).find(Naming::INIT_FUNC_PREFIX) == 0)
          rep.addInitFunc(&F);
      }
      DEBUG(errs() << "Finished analyzing function: " << naming.get(F) << "\n\n");
    }

    // MODIFIES
    // ... to do below, after memory splitting is determined.
  }

  auto ds = rep.auxiliaryDeclarations();
  decls.insert(decls.end(), ds.begin(), ds.end());
  decls.insert(decls.end(), rep.getInitFuncs());

  // NOTE we must do this after instruction generation, since we would not
  // otherwise know how many regions to declare.
  program.appendPrelude(rep.getPrelude());

  std::list<Decl*> kill_list;
  for (auto D : program) {
    if (auto P = dyn_cast<ProcDecl>(D)) {
      if (D->getName().find(Naming::CONTRACT_EXPR) != std::string::npos) {
        decls.insert(decls.end(), Decl::code(P));
        kill_list.push_back(P);
      }
    }
  }
  for (auto D : kill_list)
    decls.erase(std::remove(decls.begin(), decls.end(), D), decls.end());
}
示例#23
0
void Z80_Exec_ED(Z80_State *state, uint8_t opcode)
{
    uint16_t addr;
    uint8_t tmp;
    //uint16_t tmp_16;

    switch (opcode)
    {
    case 0x40: // in b, (c)
        rB = in(rBC);
        in_f(rB);
        break;

    case 0x41: // out (c), b
        out(rBC, rB);
        break;

    case 0x42: // sbc hl, bc
        t_states(7);
        sbc_16(rHL, rBC);
        break;

    case 0x43: // ld (**), bc
        addr = arg_16();
        write_16(addr, rBC);
        break;

    case 0x44: // neg
        neg();
        break;

    case 0x45: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x46: // im 0
        S(IM) = 0;
        break;

    case 0x47: // ld i, a
        t_states(1);
        rI = rA;
        break;

    case 0x48: // in c, (c)
        rC = in(rBC);
        in_f(rC);
        break;

    case 0x49: // out (c), c
        out(rBC, rC);
        break;

    case 0x4A: // adc hl, bc
        t_states(7);
        adc_16(rHL, rBC);
        break;

    case 0x4B: // ld bc, (**)
        addr = arg_16();
        rBC = read_16(addr);
        break;

    case 0x4C: // neg
        neg();
        break;

    case 0x4D: // reti
        rPC = pop_16();
        break;

    case 0x4E: // im 0/1
        S(IM) = 0;
        break;

    case 0x4F: // ld r, a
        t_states(1);
        rR = rA;
        break;

    case 0x50: // in d, (c)
        rD = in(rBC);
        in_f(rD);
        break;

    case 0x51: // out (c), d
        out(rBC, rD);
        break;

    case 0x52: // sbc hl, de
        t_states(7);
        sbc_16(rHL, rDE);
        break;

    case 0x53: // ld (**), de
        addr = arg_16();
        write_16(addr, rDE);
        break;

    case 0x54: // neg
        neg();
        break;

    case 0x55: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x56: // im 1
        S(IM) = 1;
        break;

    case 0x57: // ld a, i
        t_states(1);
        rA = rI;
        ld_f(rA);
        break;

    case 0x58: // in e, (c)
        rE = in(rBC);
        in_f(rE);
        break;

    case 0x59: // out (c), e
        out(rBC, rE);
        break;

    case 0x5A: // adc hl, de
        t_states(7);
        adc_16(rHL, rDE);
        break;

    case 0x5B: // ld de, (**)
        addr = arg_16();
        rDE = read_16(addr);
        break;

    case 0x5C: // neg
        neg();
        break;

    case 0x5D: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x5E: // im 2
        S(IM) = 2;
        break;

    case 0x5F: // ld a, r
        t_states(1);
        rA = rR;
        ld_f(rA);
        break;

    case 0x60: // in h, (c)
        rH = in(rBC);
        in_f(rH);
        break;

    case 0x61: // out (c), h
        out(rBC, rH);
        break;

    case 0x62: // sbc hl, hl
        t_states(7);
        sbc_16(rHL, rHL);
        break;

    case 0x63: // ld (**), hl
        addr = arg_16();
        write_16(addr, rHL);
        break;

    case 0x64: // neg
        neg();
        break;

    case 0x65: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x66: // im 0
        S(IM) = 0;
        break;

    case 0x67: // rrd
        tmp = read(rHL);
        t_states(4);
        rrd(tmp);
        write(rHL, tmp);
        break;

    case 0x68: // in l, (c)
        rL = in(rBC);
        in_f(rL);
        break;

    case 0x69: // out (c), l
        out(rBC, rL);
        break;

    case 0x6A: // adc hl, hl
        t_states(7);
        adc_16(rHL, rHL);
        break;

    case 0x6B: // ld hl, (**)
        addr = arg_16();
        rHL = read_16(addr);
        break;

    case 0x6C: // neg
        neg();
        break;

    case 0x6D: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x6E: // im 0/1
        S(IM) = 0;
        break;

    case 0x6F: // rld
        tmp = read(rHL);
        t_states(4);
        rld(tmp);
        write(rHL, tmp);
        break;

    case 0x70: // in (c)
        tmp = in(rBC);
        in_f(tmp);
        break;

    case 0x71: // out (c), 0
        out(rBC, 0);
        break;

    case 0x72: // sbc hl, sp
        t_states(7);
        sbc_16(rHL, rSP);
        break;

    case 0x73: // ld (**), sp
        addr = arg_16();
        write_16(addr, rSP);
        break;

    case 0x74: // neg
        neg();
        break;

    case 0x75: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x76: // im 1
        S(IM) = 1;
        break;

    case 0x78: // in a, (c)
        rA = in(rBC);
        in_f(rA);
        break;

    case 0x79: // out (c), a
        out(rBC, rA);
        break;

    case 0x7A: // adc hl, sp
        t_states(7);
        adc_16(rHL, rSP);
        break;

    case 0x7B: // ld sp, (**)
        addr = arg_16();
        rSP = read_16(addr);
        break;

    case 0x7C: // neg
        neg();
        break;

    case 0x7D: // retn
        S(IFF1) = S(IFF2);
        rPC = pop_16();
        break;

    case 0x7E: // im 2
        S(IM) = 2;
        break;

    case 0xA0: // ldi
        tmp = read(rHL++);
        write(rDE++, tmp);
        t_states(2);
        rBC--;
        ldr_f(tmp);
        break;

    case 0xA1: // cpi
        tmp = read(rHL++);
        t_states(5);
        rBC--;
        cpr_f(tmp);
        break;

    case 0xA2: // ini
        t_states(1);
        tmp = in(rBC);
        write(rHL++, tmp);
        rB--;
        inir_f(tmp);
        break;

    case 0xA3: // outi
        t_states(1);
        tmp = read(rHL++);
        rB--;
        out(rBC, tmp);
        outr_f(tmp);
        break;

    case 0xA8: // ldd
        tmp = read(rHL--);
        write(rDE--, tmp);
        t_states(2);
        rBC--;
        ldr_f(tmp);
        break;

    case 0xA9: // cpd
        tmp = read(rHL--);
        t_states(5);
        rBC--;
        cpr_f(tmp);
        break;

    case 0xAA: // ind
        t_states(1);
        tmp = in(rBC);
        write(rHL--, tmp);
        rB--;
        indr_f(tmp);
        break;

    case 0xAB: // outd
        t_states(1);
        tmp = read(rHL--);
        rB--;
        out(rBC, tmp);
        outr_f(tmp);
        break;

    case 0xB0: // ldir
        tmp = read(rHL++);
        write(rDE++, tmp);
        t_states(2);
        rBC--;
        ldr_f(tmp);
        if (rBC) {
            t_states(5);
            rep();
        }
        break;

    case 0xB1: // cpir
        tmp = read(rHL++);
        t_states(5);
        rBC--;
        cpr_f(tmp);
        if (rBC && !(rF & fZ)) {
            t_states(5);
            rep();
        }
        break;

    case 0xB2: // inir
        t_states(1);
        tmp = in(rBC);
        write(rHL++, tmp);
        rB--;
        inir_f(tmp);
        if (rB) {
            t_states(5);
            rep();
        }
        break;

    case 0xB3: // otir
        t_states(1);
        tmp = read(rHL++);
        rB--;
        out(rBC, tmp);
        outr_f(tmp);
        if (rB) {
            t_states(5);
            rep();
        }
        break;

    case 0xB8: // lddr
        tmp = read(rHL--);
        write(rDE--, tmp);
        t_states(2);
        rBC--;
        ldr_f(tmp);
        if (rBC) {
            t_states(5);
            rep();
        }
        break;

    case 0xB9: // cpdr
        tmp = read(rHL--);
        t_states(5);
        rBC--;
        cpr_f(tmp);
        if (rBC && !(rF & fZ)) {
            t_states(5);
            rep();
        }
        break;

    case 0xBA: // indr
        t_states(1);
        tmp = in(rBC);
        write(rHL--, tmp);
        rB--;
        indr_f(tmp);
        if (rB) {
            t_states(5);
            rep();
        }
        break;

    case 0xBB: // otdr
        t_states(1);
        tmp = read(rHL--);
        rB--;
        out(rBC, tmp);
        outr_f(tmp);
        if (rB) {
            t_states(5);
            rep();
        }
        break;
    }
}
示例#24
0
文件: display.cpp 项目: PNCG/neuron
Coord Display::width() const {
    return rep()->width_;
}
示例#25
0
static
void solve_impl(GF2E& d, vec_GF2E& X, const mat_GF2E& A, const vec_GF2E& b, bool trans)

{
   long n = A.NumRows();
   if (A.NumCols() != n)
      LogicError("solve: nonsquare matrix");

   if (b.length() != n)
      LogicError("solve: dimension mismatch");

   if (n == 0) {
      set(d);
      X.SetLength(0);
      return;
   }

   long i, j, k, pos;
   GF2X t1, t2;
   GF2X *x, *y;

   const GF2XModulus& p = GF2E::modulus();

   vec_GF2XVec M;

   M.SetLength(n);

   for (i = 0; i < n; i++) {
      M[i].SetSize(n+1, 2*GF2E::WordLength());

      if (trans) 
         for (j = 0; j < n; j++) M[i][j] = rep(A[j][i]);
      else
         for (j = 0; j < n; j++) M[i][j] = rep(A[i][j]);

      M[i][n] = rep(b[i]);
   }

   GF2X det;
   set(det);

   for (k = 0; k < n; k++) {
      pos = -1;
      for (i = k; i < n; i++) {
         rem(t1, M[i][k], p);
         M[i][k] = t1;
         if (pos == -1 && !IsZero(t1)) {
            pos = i;
         }
      }

      if (pos != -1) {
         if (k != pos) {
            swap(M[pos], M[k]);
         }

         MulMod(det, det, M[k][k], p);

         // make M[k, k] == -1 mod p, and make row k reduced

         InvMod(t1, M[k][k], p);
         for (j = k+1; j <= n; j++) {
            rem(t2, M[k][j], p);
            MulMod(M[k][j], t2, t1, p);
         }

         for (i = k+1; i < n; i++) {
            // M[i] = M[i] + M[k]*M[i,k]

            t1 = M[i][k];   // this is already reduced

            x = M[i].elts() + (k+1);
            y = M[k].elts() + (k+1);

            for (j = k+1; j <= n; j++, x++, y++) {
               // *x = *x + (*y)*t1

               mul(t2, *y, t1);
               add(*x, *x, t2);
            }
         }
      }
      else {
         clear(d);
         return;
      }
   }

   X.SetLength(n);
   for (i = n-1; i >= 0; i--) {
      clear(t1);
      for (j = i+1; j < n; j++) {
         mul(t2, rep(X[j]), M[i][j]);
         add(t1, t1, t2);
      }
      add(t1, t1, M[i][n]);
      conv(X[i], t1);
   }

   conv(d, det);
}
示例#26
0
文件: display.cpp 项目: PNCG/neuron
Coord Display::height() const {
    return rep()->height_;
}
示例#27
0
long gauss(mat_GF2E& M_in, long w)
{
   long k, l;
   long i, j;
   long pos;
   GF2X t1, t2, t3;
   GF2X *x, *y;

   long n = M_in.NumRows();
   long m = M_in.NumCols();

   if (w < 0 || w > m)
      LogicError("gauss: bad args");

   const GF2XModulus& p = GF2E::modulus();

   vec_GF2XVec M;

   M.SetLength(n);
   for (i = 0; i < n; i++) {
      M[i].SetSize(m, 2*GF2E::WordLength());
      for (j = 0; j < m; j++) {
         M[i][j] = rep(M_in[i][j]);
      }
   }

   l = 0;
   for (k = 0; k < w && l < n; k++) {

      pos = -1;
      for (i = l; i < n; i++) {
         rem(t1, M[i][k], p);
         M[i][k] = t1;
         if (pos == -1 && !IsZero(t1)) {
            pos = i;
         }
      }

      if (pos != -1) {
         swap(M[pos], M[l]);

         InvMod(t3, M[l][k], p);

         for (j = k+1; j < m; j++) {
            rem(M[l][j], M[l][j], p);
         }

         for (i = l+1; i < n; i++) {
            // M[i] = M[i] + M[l]*M[i,k]*t3

            MulMod(t1, M[i][k], t3, p);

            clear(M[i][k]);

            x = M[i].elts() + (k+1);
            y = M[l].elts() + (k+1);

            for (j = k+1; j < m; j++, x++, y++) {
               // *x = *x + (*y)*t1

               mul(t2, *y, t1);
               add(t2, t2, *x);
               *x = t2;
            }
         }

         l++;
      }
   }
   
   for (i = 0; i < n; i++)
      for (j = 0; j < m; j++)
         conv(M_in[i][j], M[i][j]);

   return l;
}
示例#28
0
文件: display.cpp 项目: PNCG/neuron
PixelCoord Display::pwidth() const {
    return rep()->pwidth_;
}
示例#29
0
// The main method
void RecryptData::init(const FHEcontext& context, const Vec<long>& mvec_,
		       long t, bool consFlag, bool build_cache_, bool minimal)
{
  if (alMod != NULL) { // were we called for a second time?
    cerr << "@Warning: multiple calls to RecryptData::init\n";
    return;
  }
  assert(computeProd(mvec_) == (long)context.zMStar.getM()); // sanity check

  // Record the arguments to this function
  mvec = mvec_;
  conservative = consFlag;
  build_cache = build_cache_;

  if (t <= 0) t = defSkHwt+1; // recryption key Hwt
  hwt = t;
  long p = context.zMStar.getP();
  long phim = context.zMStar.getPhiM();
  long r = context.alMod.getR();
  long p2r = context.alMod.getPPowR();
  double logp = log((double)p);

  double noise = p2r * sqrt((t+1)*phim/3.0);
  double gamma = 2*(t+noise)/((t+1)*p2r); // ratio between numerators

  long logT = ceil(log((double)(t+2))/logp); // ceil(log_p(t+2))
  double rho = (t+1)/pow(p,logT);

  if (!conservative) {   // try alpha, e with this "aggresive" setting
    setAlphaE(alpha, e, rho, gamma, noise, logp, p2r, t);
    ePrime = e -r +1 -logT;

    // If e is too large, try again with rho/p instead of rho
    long bound = (1L << (context.bitsPerLevel-1)); // halfSizePrime/2
    if (pow(p,e) > bound) { // try the conservative setting instead
      cerr << "* p^e="<<pow(p,e)<<" is too big (bound="<<bound<<")\n";
      conservative = true;
    }
  }
  if (conservative) { // set alpha, e with a "conservative" rho/p
    setAlphaE(alpha, e, rho/p, gamma, noise, logp, p2r, t);
    ePrime = e -r -logT;
  }

  // Compute highest key-Hamming-weight that still works (not more than 256)
  double qOver4 = (pow(p,e)+1)/4;
  for (t-=10; qOver4>=lowerBound2(p,r,ePrime,t,alpha)
	 &&  qOver4>=lowerBound1(p,r,ePrime,t,alpha,noise) && t<257; t++);
  skHwt = t-1;

  // First part of Bootstrapping works wrt plaintext space p^{r'}
  alMod = new PAlgebraMod(context.zMStar, e-ePrime+r);
  ea = new EncryptedArray(context, *alMod);
         // Polynomial defaults to F0, PAlgebraMod explicitly given


  p2dConv = new PowerfulDCRT(context, mvec);

  // Initialize the linear polynomial for unpacking the slots
  zz_pBak bak; bak.save(); ea->getAlMod().restoreContext();
  long nslots = ea->size();
  long d = ea->getDegree();

  const Mat<zz_p>& CBi=ea->getDerived(PA_zz_p()).getNormalBasisMatrixInverse();

  vector<ZZX> LM;
  LM.resize(d);
  for (long i = 0; i < d; i++) // prepare the linear polynomial
    LM[i] = rep(CBi[i][0]);

  vector<ZZX> C; 
  ea->buildLinPolyCoeffs(C, LM); // "build" the linear polynomial

  unpackSlotEncoding.resize(d);  // encode the coefficients

  for (long j = 0; j < d; j++) {
    vector<ZZX> v(nslots);
    for (long k = 0; k < nslots; k++) v[k] = C[j];
    ea->encode(unpackSlotEncoding[j], v);
  }
  firstMap = new EvalMap(*ea, minimal, mvec, true, build_cache);
  secondMap = new EvalMap(*context.ea, minimal, mvec, false, build_cache);
}
示例#30
0
void ATL_USERMM
(const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc)
{
   const float *stM;
   const float *stN;
   const int incCn = (ldc) - NB;
   float *pC0;
   const float *pA0;
   const float *pB0;
   const float *pB1;
#ifdef BETAX
static   vector locbeta;
#endif

#define First12As(i, ib, ic) \
	align(); \
	vec_mov_mr(&(pBX[  ib + 0 - BBIASX]), reg7); \
	rep(); vec_mov_mr(&(pA0[0*KB + 0 + i - ABIAS]), reg4); \
	vec_mul_rr(reg7, reg4); \
	\
	rep(); vec_mov_mr(&(pA0[1*KB + 0 + i - ABIAS]), reg5); \
	vec_mul_rr(reg7, reg5); \
	vec_acc_rr(reg5, reg4); \
	\
	rep(); vec_mov_mr(&(pA0[2*KB + 0 + i - ABIAS]), reg5); \
	vec_mul_rr(reg7, reg5); \
	vec_mov_mr(&(pBX[  ib + 2 - BBIASX]), reg3); \
	\
	rep(); vec_mov_mr(&(pA0[3*KB + 0 + i - ABIAS]), reg6); \
	vec_mul_rr(reg7, reg6); \
	vec_acc_rr(reg6, reg5); \
	\
	rep(); vec_mov_mr(&(pA0[4*KB + 0 + i - ABIAS]), reg6); \
	vec_mul_rr(reg7, reg6); \
	ATL_pfl1W(&(pC0[6*(ic) + 4 - CBIAS])); \
	\
	vec_mul_mr(&(pA0[5*KB + 0 + i - ABIAS]), reg7); \
	rep(); vec_mov_mr(&(pA0[0*KB + 2 + i - ABIAS]), reg0); \
	\
	vec_acc_rr(reg7, reg6); \
	vec_mul_rr(reg3, reg0); \
	rep(); vec_mov_mr(&(pA0[1*KB + 2 + i - ABIAS]), reg7); \
	\
	vec_mul_rr(reg3, reg7); \
	vec_acc_rr(reg7, reg0); \
	rep(); vec_mov_mr(&(pA0[2*KB + 2 + i - ABIAS]), reg1); \
	\
	vec_mul_rr(reg3, reg1); \
	rep(); vec_mov_mr(&(pA0[3*KB + 2 + i - ABIAS]), reg7); \
	vec_mul_rr(reg3, reg7); \
	\
	vec_acc_rr(reg7, reg1); \
	rep(); vec_mov_mr(&(pA0[4*KB + 2 + i - ABIAS]), reg2); \
	vec_mul_rr(reg3, reg2); \
	\
	vec_mul_mr(&(pA0[5*KB + 2 + i - ABIAS]), reg3); \
	vec_acc_rr(reg3, reg2); \
	vec_mov_mr(&(pBX[  ib + 4 - BBIASX]), reg7); \
	\
	;

#define Do6As(i, ib) \
	align(); \
	vec_add_rr(reg4, reg0); \
	rep(); vec_mov_mr(&(pA0[0*KB + i - ABIAS]), reg4); \
	vec_mul_rr(reg7, reg4); \
	\
	rep(); vec_mov_mr(&(pA0[1*KB + i - ABIAS]), reg3); \
	vec_mul_rr(reg7, reg3); \
	vec_acc_rr(reg3, reg4); \
	\
	vec_add_rr(reg5, reg1); \
	rep(); vec_mov_mr(&(pA0[2*KB + i - ABIAS]), reg5); \
	vec_mul_rr(reg7, reg5); \
	\
	rep(); vec_mov_mr(&(pA0[3*KB + i - ABIAS]), reg3); \
	vec_mul_rr(reg7, reg3); \
	vec_acc_rr(reg3, reg5); \
	\
	vec_add_rr(reg6, reg2); \
	rep(); vec_mov_mr(&(pA0[4*KB + i - ABIAS]), reg6); \
	vec_mul_rr(reg7, reg6); \
	\
	vec_mul_mr(&(pA0[5*KB     + i - ABIAS]), reg7); \
	vec_acc_rr(reg7, reg6); \
	vec_mov_mr(&(pBX[  ib + 2 - BBIASX]), reg7); \
	\
	;

#define Last6As(i, ib) \
	align(); \
	vec_add_rr(reg4, reg0); \
	rep(); vec_mov_mr(&(pA0[0*KB + i - ABIAS]), reg4); \
	vec_mul_rr(reg7, reg4); \
	\
	rep(); vec_mov_mr(&(pA0[1*KB + i - ABIAS]), reg3); \
	vec_mul_rr(reg7, reg3); \
	vec_acc_rr(reg3, reg4); \
	\
	vec_add_rr(reg5, reg1); \
	rep(); vec_mov_mr(&(pA0[2*KB + i - ABIAS]), reg5); \
	vec_mul_rr(reg7, reg5); \
	\
	rep(); vec_mov_mr(&(pA0[3*KB + i - ABIAS]), reg3); \
	vec_mul_rr(reg7, reg3); \
	vec_acc_rr(reg3, reg5); \
	\
	vec_add_rr(reg6, reg2); \
	rep(); vec_mov_mr(&(pA0[4*KB + i - ABIAS]), reg6); \
	vec_mul_rr(reg7, reg6); \
	\
	vec_mul_mr(&(pA0[5*KB     + i - ABIAS]), reg7); \
	vec_acc_rr(reg7, reg6); \
	vec_add_rr(reg4, reg0); \
	\
	;

#ifdef BETA0
#define StoreResults(ic) \
	align(); \
	vec_add_rr(reg5, reg1); \
	vec_add_rr(reg6, reg2); \
	vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \
	vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \
	\
	vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \
	;
#elif defined(BETA1)
#define StoreResults(ic) \
	align(); \
	rep(); vec_add_mr(&(pC0[6*(ic) + 0 - CBIAS]), reg0); \
	rep(); vec_add_mr(&(pC0[6*(ic) + 2 - CBIAS]), reg1); \
	vec_add_rr(reg5, reg1); \
	\
	rep(); vec_add_mr(&(pC0[6*(ic) + 4 - CBIAS]), reg2); \
	rep(); vec_add_rr(reg6, reg2); \
	rep(); vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \
	\
	vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \
	vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \
	;
#else
#define StoreResults(ic) \
	align(); \
	rep(); vec_mov_mr(locbeta, reg3); \
	vec_mov_mr(&(pC0[6*(ic) + 0 - CBIAS]), reg7); \
	vec_mov_mr(&(pC0[6*(ic) + 2 - CBIAS]), reg4); \
	\
	rep(); vec_mul_rr(reg3, reg7); \
	rep(); vec_mul_rr(reg3, reg4); \
	rep(); vec_mul_mr(&(pC0[6*(ic) + 4 - CBIAS]), reg3); \
	\
	vec_add_rr(reg7, reg0); \
	vec_add_rr(reg5, reg1); \
	vec_add_rr(reg6, reg2); \
	vec_add_rr(reg4, reg1); \
	\
	vec_add_rr(reg3, reg2); \
	vec_mov_rm(reg0, &(pC0[6*(ic) + 0 - CBIAS])); \
	vec_mov_rm(reg1, &(pC0[6*(ic) + 2 - CBIAS])); \
	vec_mov_rm(reg2, &(pC0[6*(ic) + 4 - CBIAS])); \
	\
	;
#endif

#define	FirstSteps(i, ib, stage) \
	First12As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6), stage) \
	Do6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \
	;

#define	Steps(i, ib, stage) \
	Do6As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6)) \
	Do6As((((i) - 4) + ((stage - 0) * PA0_INC)), ((ib) - 4)) \
	Do6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \
	;

#define	LastSteps(i, ib, stage) \
	Do6As((((i) - 6) + ((stage - 0) * PA0_INC)), ((ib) - 6)) \
	Do6As((((i) - 4) + ((stage - 0) * PA0_INC)), ((ib) - 4)) \
	Last6As((((i) - 2) + ((stage - 0) * PA0_INC)), ((ib) - 2)) \
	;

   vec_enter();

#ifdef BETAX
   vec_splat(&beta, reg3);
   vec_mov_rm(reg3, locbeta);
#endif

   { /* block "prefetch" the A submatrix */
      register const long long *pAd=(const long long *) A;
      register const long long *pAe=pAd + (506*8);
      align();
      do {
	rep(); vec_mov_mr(&(pAd[ 0]), reg0);
	vec_mov_mr(&(pAd[ 8]), reg1);
	rep(); pAd += 8*2;
      } while (pAd != pAe);
   }

   stM = A + NB*NB + ABIAS;
   stN = B + NB*NB + BBIAS0;
   pC0=C + CBIAS;
   pA0=A + ABIAS;
   pB0=B + BBIAS0;
   pB1=B + BBIAS1;

   /* block "prefetch" some of the B submatrix */
   align();
          vec_mov_mr(&(pB0[  0 - BBIAS0]), reg0);
          vec_mov_mr(&(pB0[ 16 - BBIAS0]), reg1);
   rep(); vec_mov_mr(&(pB1[ 32 - BBIAS1]), reg2);

   rep(); vec_mov_mr(&(pB1[ 48 - BBIAS1]), reg3);
          vec_mov_mr(&(pB1[ 64 - BBIAS1]), reg4);
          vec_mov_mr(&(pB1[ 80 - BBIAS1]), reg5);

          vec_mov_mr(&(pB1[ 96 - BBIAS1]), reg6);
          vec_mov_mr(&(pB1[112 - BBIAS1]), reg7);
   rep(); vec_mov_mr(&(pB0[128 - BBIAS0]), reg0);

   rep(); vec_mov_mr(&(pB1[144 - BBIAS1]), reg1);
   rep(); vec_mov_mr(&(pB1[160 - BBIAS1]), reg2);

   rep(); vec_mov_mr(&(pB1[176 - BBIAS1]), reg3);
   rep(); vec_mov_mr(&(pB1[192 - BBIAS1]), reg4);

   align();
   do { /* N-loop */
      rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 +  0]));
      rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 16]));

      align();
      do { /* M-loop */

#undef	pBX
#undef	BBIASX
#define	pBX	pB0
#define	BBIASX	BBIAS0

	FirstSteps(6, 6, 0);
	Steps(12, 12, 0);
	Steps(18, 18, 0);
	Steps(24, 24, 0);
	Steps(30, 30, 0);
	Steps(36, 36, 0);
	Steps(42, 42, 0);
	Steps(48, 48, 0);
	Steps(54, 54, 0);
	Steps(60, 60, 0);

#undef	pBX
#undef	BBIASX
#define	pBX	pB1
#define	BBIASX	BBIAS1

	Steps(66, 66, 0);
	Steps(72, 72, 0);
	Steps(78, 78, 0);
	Steps(84, 84, 0);
	LastSteps(90, 90, 0);
	StoreResults(0);

	alignN("8");
	rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 32]));

#undef	pBX
#undef	BBIASX
#define	pBX	pB0
#define	BBIASX	BBIAS0

	FirstSteps(6, 6, 1);
	Steps(12, 12, 1);
	Steps(18, 18, 1);
	Steps(24, 24, 1);
	Steps(30, 30, 1);
	Steps(36, 36, 1);
	Steps(42, 42, 1);
	Steps(48, 48, 1);
	Steps(54, 54, 1);
	Steps(60, 60, 1);

#undef	pBX
#undef	BBIASX
#define	pBX	pB1
#define	BBIASX	BBIAS1

	Steps(66, 66, 1);
	Steps(72, 72, 1);
	Steps(78, 78, 1);
	Steps(84, 84, 1);
	LastSteps(90, 90, 1);
	StoreResults(1);

	alignN("8");
	rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 48]));

#undef	pBX
#undef	BBIASX
#define	pBX	pB0
#define	BBIASX	BBIAS0

	FirstSteps(6, 6, 2);
	Steps(12, 12, 2);
	Steps(18, 18, 2);
	Steps(24, 24, 2);
	Steps(30, 30, 2);
	Steps(36, 36, 2);
	Steps(42, 42, 2);
	Steps(48, 48, 2);
	Steps(54, 54, 2);
	Steps(60, 60, 2);

#undef	pBX
#undef	BBIASX
#define	pBX	pB1
#define	BBIASX	BBIAS1

	Steps(66, 66, 2);
	Steps(72, 72, 2);
	Steps(78, 78, 2);
	Steps(84, 84, 2);
	LastSteps(90, 90, 2);

	rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 64]));
	rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 80]));
        pA0 += PA0_INC*3;
	rep(); ATL_pfl1R(&(pB0[2*KB - BBIAS0 + 88]));

	StoreResults(2);

        pC0 += 6*3;

#if (NB != 90)
 #error "NB must be 90"
#endif

      } while(pA0 != stM);

      pB0 += NB;
      pB1 += NB;
      pC0 += incCn;
      pA0 -= NB*NB;

   } while(pB0 != stN);

   vec_exit();
}