// General setup for verbs with IRS that do not go through jtirs[12] // A verb u["n] using this function checks to see whether it has multiple cells; if so, // it calls here, giving a callback; we split the arguents into cells and call the callback, // which is often the same original function that called here. A jtrank2ex(J jt,A a,A w,A fs,I lr,I rr,AF f2){PROLOG(0042);A y,y0,ya,yw,z;B ab,b,wb; C*u,*uu,*v,*vv;I acn,acr,af,ak,ar,*as,at,k,mn,n=1,p,q,*s,wcn,wcr,wf,wk,wr,*ws,wt,yn,yr,*ys,yt; RZ(a&&w); at=AT(a); wt=AT(w); if(at&SPARSE||wt&SPARSE)R sprank2(a,w,fs,lr,rr,f2); // ?r=rank, ?s->shape, ?cr=effective rank, ?f=#frame, ?b=relative flag, for each argument ar=AR(a); as=AS(a); acr=efr(ar,lr); af=ar-acr; ab=ARELATIVE(a); wr=AR(w); ws=AS(w); wcr=efr(wr,rr); wf=wr-wcr; wb=ARELATIVE(w); if(!af&&!wf)R CALL2(f2,a,w,fs); // if there's only one cell, run on it, that's the result // multiple cells. Loop through them. // ?cn=number of atoms in a cell, ?k=#bytes in a cell, uv point to one cell before aw data // Allocate y? to hold one cell of ?, with uu,vv pointing to the data of y? RE(acn=prod(acr,as+af)); ak=acn*bp(at); u=CAV(a)-ak; NEWYA; RE(wcn=prod(wcr,ws+wf)); wk=wcn*bp(wt); v=CAV(w)-wk; NEWYW; // b means 'w frame is larger'; p=#larger frame; q=#shorter frame; s->larger frame // mn=#cells in larger frame (& therefore #cells in result); n=# times to repeat each cell // from shorter-frame argument b=af<=wf; p=b?wf:af; q=b?af:wf; s=b?ws:as; RE(mn=prod(p,s)); RE(n=prod(p-q,s+q)); ASSERT(!ICMP(as,ws,q),EVLENGTH); // error if frames are not same as prefix // Initialize y? to hold data for the first cell; but if ? is empty, set y? to a cell of fills if(AN(a))MOVEYA else RZ(ya=reshape(vec(INT,acr,as+af),filler(a))); if(AN(w))MOVEYW else RZ(yw=reshape(vec(INT,wcr,ws+wf),filler(w))); #define VALENCE 2 #define TEMPLATE 0 #include "cr_t.h" }
static A jtcants(J jt,A a,A w,A z){A a1,q,y;B*b,*c;I*u,wr,zr;P*wp,*zp; RZ(a&&w&&z); RZ(a=grade1(a)); wr=AR(w); wp=PAV(w); a1=SPA(wp,a); zr=AR(z); zp=PAV(z); ASSERT(wr==zr,EVNONCE); RZ(b=bfi(wr,a1,1)); GA(q,B01,wr,1,0); c=BAV(q); u=AV(a); DO(wr, c[i]=b[u[i]];);
A jtfxeachv(J jt,I r,A w){A*wv,x,z,*zv;I n,wd; RZ(w); n=AN(w); wv=AAV(w); wd=(I)w*ARELATIVE(w); ASSERT(r>=AR(w),EVRANK); ASSERT(n,EVLENGTH); ASSERT(BOX&AT(w),EVDOMAIN); GA(z,BOX,n,AR(w),AS(w)); zv=AAV(z); DO(n, RZ(zv[i]=x=fx(WVR(i))); ASSERT(VERB&AT(x),EVDOMAIN););
static A jtipprep(J jt,A a,A w,I zt,I*pm,I*pn,I*pp){A z=mark;I*as,ar,ar1,m,mn,n,p,*ws,wr,wr1; ar=AR(a); as=AS(a); ar1=ar?ar-1:0; RE(*pm=m=prod(ar1, as)); wr=AR(w); ws=AS(w); wr1=wr?wr-1:0; RE(*pn=n=prod(wr1,1+ws)); RE(mn=mult(m,n)); *pp=p=ar?*(as+ar1):wr?*ws:1; ASSERT(!(ar&&wr)||p==*ws,EVLENGTH); GA(z,zt,mn,ar1+wr1,0); ICPY(AS(z), as,ar1); ICPY(AS(z)+ar1,1+ws,wr1); R z; } /* argument validation & result for an inner product */
// Analysis for inner product // a,w are arguments // zt is type of result // *pm is # 1-cells of a // *pn is # atoms in an item of w // *pp is number of inner-product muladds // (in each, an atom of a multiplies an item of w) static A jtipprep(J jt,A a,A w,I zt,I*pm,I*pn,I*pp){A z=mark;I*as,ar,ar1,m,mn,n,p,*ws,wr,wr1; ar=AR(a); as=AS(a); ar1=ar?ar-1:0; RE(*pm=m=prod(ar1,as)); // m=# 1-cells of a. It could overflow, if there are no atoms wr=AR(w); ws=AS(w); wr1=wr?wr-1:0; RE(*pn=n=prod(wr1,1+ws)); RE(mn=mult(m,n)); // n=#atoms in 1-cell of w; mn = #atoms in result *pp=p=ar?*(as+ar1):wr?*ws:1; // if a is an array, the length of a 1-cell; otherwise, the number of items of w ASSERT(!(ar&&wr)||p==*ws,EVLENGTH); GA(z,zt,mn,ar1+wr1,0); // allocate result area ICPY(AS(z), as,ar1); // Set shape: 1-frame of a followed by shape of item of w ICPY(AS(z)+ar1,1+ws,wr1); R z; } /* argument validation & result for an inner product */
static DF2(rank2) { DECLF; A h=sv->h; I ar,l,r,*v=AV(h),wr; RZ(a&&w); ar=AR(a); l=efr(ar,v[1]); wr=AR(w); r=efr(wr,v[2]); R l<ar||r<wr?rank2ex(a,w,fs,l,r,f2):CALL2(f2,a,w,fs); }
// IRS setup for dyads x op y // a is x, w is y // fs is the f field of the verb (the verb to be applied repeatedly) - or 0 if none // l, r are nominal ranks of fs // f2 is a setup verb (jtover, jtreshape, etc) A jtirs2(J jt,A a,A w,A fs,I l,I r,AF f2){A z;I af,ar,*old=jt->rank,rv[2],wf,wr; // push the jt->rank (pointer to ranks) stack. push/pop may not match, no problem RZ(a&&w); ar=AR(a); rv[0]=l=efr(ar,l); af=ar-l; // get rank, effective rank, length of frame... wr=AR(w); rv[1]=r=efr(wr,r); wf=wr-r; // ...for both args if(!(af||wf))R CALL2(f2,a,w,fs); // if no frame, call setup verb and return result ASSERT(!ICMP(AS(a),AS(w),MIN(af,wf)),EVLENGTH); // verify agreement /* if(af&&wf&&af!=wf)R rank2ex(a,w,fs,l,r,f2); */ jt->rank=rv; z=CALL2(f2,a,w,fs); jt->rank=old; // save ranks, call setup verb, pop rank stack // Not all setup verbs (*f2)() use the fs argument. R z; }
AF jtatcompf(J jt,A a,A w,A self){AF f;I ar,at,m,wr,wt; RZ(a&&w); at=AT(a); ar=AR(a); wt=AT(w); wr=AR(w); m=VAV(self)->flag%256; if(1<ar||1<wr){if(32<=m&&m<=37||40<=m&&m<=45||48<=m&&m<=53)R(AF)jtfslashatg; RZ(7==m%8);} ASSERT(AN(a)==AN(w)||!ar||!wr||5<m%8,EVLENGTH); f=atcompX[m]; if(!f){ if(at&B01+INT+FL&&wt&B01+INT+FL)f=atcompxy[9*m+3*(at&B01?0:at&INT?1:2)+(wt&B01?0:wt&INT?1:2)]; else if(at&LIT&&wt&LIT) f=atcompC[m]; else if(at&SBT&&wt&SBT) f=atcompSB[m]; } R f; } /* function table look-up for comp i. 1: and i.&1@:comp etc. */
static F2(jtfitct){D d;V*sv; RZ(a&&w); ASSERT(!AR(w),EVRANK); sv=VAV(a); RZ(w=cvt(FL,w)); d=*DAV(w); ASSERT(0<=d&&d<5.82076609134675e-11,EVDOMAIN); R CDERIV(CFIT,jtfitct1,jtfitct2,sv->mr,sv->lr,sv->rr); }
static DF1(case1){A u;V*sv; PREF1(case1); sv=VAV(self); RZ(u=from(df1(w,sv->g),sv->h)); ASSERT(!AR(u),EVRANK); R df1(w,*AV(u)); }
static DF2(con2){A h,*hv,*x,z;V*sv; PREF2(con2); sv=VAV(self); h=sv->h; hv=AAV(h); GA(z,BOX,AN(h),AR(h),AS(h)); x=AAV(z); DO(AN(h), RZ(*x++=(VAV(*hv)->f2)(a,w,*hv)); ++hv); R ope(z); }
A jtrank1ex(J jt,A w,A fs,I mr,AF f1) { PROLOG; A y,y0,yw,z; B wb; C*v,*vv; I k,mn,n=1,p,*s,wcn,wcr,wf,wk,wr,*ws,wt,yn,yr,*ys,yt; RZ(w); wt=AT(w); if(wt&SPARSE)R sprank1(w,fs,mr,f1); wr=AR(w); ws=AS(w); wcr=efr(wr,mr); wf=wr-wcr; wb=ARELATIVE(w); if(!wf)R CALL1(f1,w,fs); RE(wcn=prod(wcr,wf+ws)); wk=wcn*bp(wt); v=CAV(w)-wk; NEWYW; p=wf; s=ws; RE(mn=prod(wf,ws)); if(AN(w))MOVEYW else RZ(yw=reshape(vec(INT,wcr,ws+wf),filler(w))); #define VALENCE 1 #define TEMPLATE 0 #include "cr_t.h" }
static A jttayamp(J jt,A w,B nf,A x,A h){A y;B ng=!nf;I j,n;V*v=VAV(h); ASSERT(AR(x)<=(nf?v->lr:v->rr),EVRANK); switch(v->id){ case CPLUS: R tpoly(over(x,one)); case CMINUS: R tpoly(nf?over(x,num[-1]):over(negate(x),one)); case CSTAR: R tpoly(over(zero,x)); case CDIV: ASSERT(ng,EVDOMAIN); R tpoly(over(zero,recip(x))); case CJDOT: R tpoly(nf?over(x,a0j1):over(jdot1(x),one)); case CPOLY: ASSERT(nf,EVDOMAIN); R tpoly(BOX&AT(x)?poly1(x):x); case CHGEOM: ASSERT(nf,EVDOMAIN); RE(j=i0(x)); ASSERT(0<=j,EVDOMAIN); y=IX(j); R tpoly(divide(hgcoeff(y,h),fact(y))); case CBANG: ASSERT(nf,EVDOMAIN); RE(j=i0(x)); ASSERT(0<=j,EVDOMAIN); R tpoly(divide(poly1(box(iota(x))),fact(x))); case CEXP: if(nf)R eva(x,"(^.x)&^ % !"); RE(n=i0(x)); R 0<=n?tpoly(over(reshape(x,zero),one)):atop(ds(CDIV),amp(h,sc(-n))); case CFIT: ASSERT(nf&&CPOLY==ID(v->f),EVDOMAIN); y=over(x,IX(IC(x))); R tpoly(mdiv(df2(x,y,h),atab(CEXP,y,IX(IC(x))))); case CCIRCLE: switch(i0(x)){ case 1: R eval("{&0 1 0 _1@(4&|) % !"); case -3: R eval("{&0 1 0 _1@(4&|) % ]"); case 2: R eval("{&1 0 _1 0@(4&|) % !"); case 5: R eval("2&| % !"); case -7: R eval("2&| % ]"); case 6: R eval("2&|@>: % !"); case -1: R eval("(2&| % ]) * ([: */ (1&+ % 2&+)@(i.@<.&.-:))\"0"); case -5: R eval("({&0 1 0 _1@(4&|) % ]) * ([: */ (1&+ % 2&+)@(i.@<.&.-:))\"0"); }} ASSERT(0,EVDOMAIN); }
void get_light(t_tmp *tmp, t_obj obc, t_cod cor, t_sph *obj_a) { t_sph *tm; t_sph *obj_hit; double old_lx[3]; double old_n[3]; double r; double d; tm = obj_a; obj_hit = obc.nt; init_light(&cor, old_lx, old_n); while (obj_a) { if (obj_a->bri > 0) { set_r_d(&r, &d, &obc, obj_a); set_ray(&cor, obj_a, &obc); rand_vec(&(cor.l_x), (double)AR(atan(r / d))); calc_pt(tmp, &obc, &cor, tm); obc.bounce -= 80; do_lum(&obc, &cor, obj_hit, (2 * M_PI * (1 - (d / (sqrt(pow(d, 2) + pow(r, 2))))))); } obj_a = obj_a->nt; } de_init_light(&cor, old_lx, old_n); }
static DF2(case2){A u;V*sv; PREF2(case2); sv=VAV(self); RZ(u=from(df2(a,w,sv->g),sv->h)); ASSERT(!AR(u),EVRANK); R df2(a,w,*AV(u)); }
A ga(I t, I r, I n, I *s) { I k=WP(t,r,n); A z=a_malloc(k); AT(z)=t; AC(z)=1; AR(z)=r; AN(z)=n; if (r==1) { *AS(z)=n; } else if (r&&s) { ICPY(AS(z),s,r); } gcpush(z); R z; }
static F2(jtpdtspmv){A ax,b,g,x,wx,y,yi,yj,z;B*bv;I m,n,s[2],*u,*v,*yv;P*ap,*wp,*zp; RZ(a&&w); ap=PAV(a); y=SPA(ap,i); yv=AV(y); s[0]=n=*AS(y); s[1]=1; GATV(yj,INT,n,2,s); if(DENSE&AT(w)){ GATV(yi,INT,n,2,s); u=AV(yi); AR(yj)=1; v=AV(yj); DO(n, *u++=*yv++; *v++=*yv++;);
static A jtmerge1(J jt,A w,A ind){A z;B*b;C*wc,*zc;D*wd,*zd;I c,it,j,k,m,r,*s,t,*u,*wi,*zi; RZ(w&&ind); r=MAX(0,AR(w)-1); s=1+AS(w); t=AT(w); k=bp(t); m=IC(w); c=aii(w); ASSERT(!(t&SPARSE),EVNONCE); ASSERT(r==AR(ind),EVRANK); ASSERT(!ICMP(s,AS(ind),r),EVLENGTH); GA(z,t,c,r,s); if(!(AT(ind)&B01+INT))RZ(ind=cvt(INT,ind)); it=AT(ind); u=AV(ind); b=(B*)u; ASSERT(!c||1<m||!(it&B01),EVINDEX); ASSERT(!c||1!=m||!memchr(b,C1,c),EVINDEX); zi=AV(z); zc=(C*)zi; zd=(D*)zc; wi=AV(w); wc=(C*)wi; wd=(D*)wc; switch(MCASE(it,k)){ case MCASE(B01,sizeof(C)): DO(c, *zc++=wc[*b++?i+c:i];); break; case MCASE(B01,sizeof(I)): DO(c, *zi++=wi[*b++?i+c:i];); break;
A gcinit(VO) { I k=WP(BOX,1,NOBJS); A memory; nmem=mtop=bytes=totbytes=0; memory=a_malloc(k); AT(memory)=BOX; AR(memory)=1; AN(memory)=*AS(memory)=NOBJS; objs=AAV(memory); R memory; }
static DF1(jtgsuffix){A h,*hv,z,*zv;I m,n,r; RZ(w); if(jt->rank&&jt->rank[1]<AR(w)){r=jt->rank[1]; jt->rank=0; R rank1ex(w,self,jt->rank[1],jtgsuffix);} jt->rank=0; n=IC(w); h=VAV(self)->h; hv=AAV(h); m=AN(h); GATV(z,BOX,n,1,0); zv=AAV(z); DO(n, RZ(zv[i]=df1(drop(sc(i),w),hv[i%m])););
float *mv_mult(float mat[SIZE][SIZE], float vec[SIZE]) { static float ret[SIZE]; int i, j; v4sf m, v, r, r0={0.,0.,0.,0.}; for (i = 0; i < SIZE; i++) { r = r0; for (j = 0; j < SIZE; j += 4) { r += *(v4sf*)&mat[i][j] * *(v4sf*)&vec[j]; /*m = *(v4sf*)&mat[i][j]; v = *(v4sf*)&vec[j]; v = m * v; r = r + v;*/ } ret[i] = AR(r)[0] + AR(r)[1] + AR(r)[2] + AR(r)[3]; } return ret; }
int main(){ v4sf a={1,2,3,4}, b={5,6,7,8}, c; float mat[SIZE][SIZE], vec[SIZE], *res; int i,j; for(i=0;i<SIZE;i++) for(j=0;j<SIZE;j++) mat[i][j]=0.+i+j; for(i=0;i<SIZE;i++) vec[i]=1.; c = a * b; printf("%f, %f, %f, %f\n", AR(c)[0], AR(c)[1], AR(c)[2], AR(c)[3] ); printf("size = %d\n",sizeof(v4sf)); res = mv_mult( mat, vec); for(i=0;i<SIZE;i++) printf("%d %f \n", i, res[i]); return 0; }
A jtirs1(J jt,A w,A fs,I m,AF f1){A z;I*old=jt->rank,rv[2],wr; RZ(w); wr=AR(w); rv[1]=m=efr(wr,m); if(m>=wr)R CALL1(f1,w,fs); rv[0]=0; jt->rank=rv; z=CALL1(f1,w,fs); jt->rank=old; R z; }
vector compgene(cmatrix& A,cmatrix& B) { int i; int N=A.Rows; int IFAIL=0; int MATV=0; int* ITER=new int[N]; vector ALFR(N); vector ALFI(N); vector BETA(N); double EPS1=0.; matrix AR(N,N); matrix AI(N,N); matrix BR(N,N); matrix BI(N,N); for (i=0;i<N;i++) for (int j=0;j<N;j++) { AR(i,j)=real(A(i,j)); AI(i,j)=imag(A(i,j)); BR(i,j)=real(B(i,j)); BI(i,j)=imag(B(i,j)); } matrix VR(N,N); matrix VI(N,N); /* f02gjf_(&N,AR.TheMatrix,&N,AI.TheMatrix,&N,BR.TheMatrix,&N, BI.TheMatrix,&N,&EPS1,ALFR.TheVector,ALFI.TheVector, BETA.TheVector,&MATV,VR.TheMatrix,&N, VI.TheMatrix,&N,ITER,&IFAIL); */ cerr<<"compgene to be implemented"<<endl; exit(1); if (IFAIL != 0) cerr <<"error in compgene "<<endl; if (!ALFR.TheVector) exit(1); for (i=0;i<N;i++) ALFR(i)=ALFR(i)/BETA(i); // sort ! char ORDER='A'; int M1=1; int M2=N; /* m01caf_(ALFR.TheVector,&M1,&M2,&ORDER,&IFAIL);*/ cerr<<"sort to be implemented"<<endl; exit(1); if (IFAIL != 0) cerr <<"error in m01caf_ "<<endl; return ALFR; }
inline void SyrkLN ( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE PushCallStack("internal::SyrkLN"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Height() != C.Height() || A.Height() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal SyrkLN:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,VR, STAR> A1_VR_STAR(g); DistMatrix<T,STAR,MR > A1Trans_STAR_MR(g); A1_MC_STAR.AlignWith( C ); A1_VR_STAR.AlignWith( C ); A1Trans_STAR_MR.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionRight( A, AL, AR, 0 ); while( AR.Width() > 0 ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); //--------------------------------------------------------------------// A1_VR_STAR = A1_MC_STAR = A1; A1Trans_STAR_MR.TransposeFrom( A1_VR_STAR, conjugate ); LocalTrrk( LOWER, alpha, A1_MC_STAR, A1Trans_STAR_MR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
// This is the derived verb for f/. y static DF1(jtoblique){A x,y;I m,n,r,*u,*v; RZ(w); r=AR(w); // r = rank of w // create y= ,/ w - the _2-cells of w arranged in a list // we just create a header for y, pointing to the data from w RZ(y=gah(MAX(r-1,1),w)); u=AS(w); v=AS(y); // u,v->shape of y if(1>=r){*v=m=AN(w); n=1;}else{m=*u++; n=*u++; *v++=m*n; ICPY(v,u,r-2);} // set shape of y as _2-cells of w // Create x=+"0 1&i./ 2 {. $y RZ(x=irs2(IX(m),IX(n),0L,0L,1L,jtplus)); AR(x)=1; *AS(x)=AN(x); // perform x f/. y, which does the requested operation RZ(x=df2(x,y,sldot(VAV(self)->f))); // Final tweak: the result should have (0 >. <: +/ 2 {. $y) cells. It will, as long as // m and n are both non0: when one is 0, result has 0 cells (but that cell is the correct result // of execution on a fill-cell). Correct the length of the 0 case, when the result length should be nonzero // if((m==0 || n==0) && (m+n>0)){R reitem(sc(m+n-1),x);} This change withdrawn pending further deliberation R x; }
static DF1(rank1) { DECLF; A h=sv->h; I m,*v=AV(h),wr; RZ(w); wr=AR(w); m=efr(wr,v[0]); R m<wr?rank1ex(w,fs,m,f1):CALL1(f1,w,fs); }
A jtirs2(J jt,A a,A w,A fs,I l,I r,AF f2) { A z; I af,ar,*old=jt->rank,rv[2],wf,wr; RZ(a&&w); ar=AR(a); rv[0]=l=efr(ar,l); af=ar-l; wr=AR(w); rv[1]=r=efr(wr,r); wf=wr-r; if(!(af||wf))R CALL2(f2,a,w,fs); ASSERT(!ICMP(AS(a),AS(w),MIN(af,wf)),EVLENGTH); /* if(af&&wf&&af!=wf)R rank2ex(a,w,fs,l,r,f2); */ jt->rank=rv; z=CALL2(f2,a,w,fs); jt->rank=old; R z; }
inline void LocalTrrkKernel ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, T alpha, const DistMatrix<T,STAR,MC >& A, const DistMatrix<T,MR, STAR>& B, T beta, DistMatrix<T,MC, MR >& C ) { #ifndef RELEASE PushCallStack("LocalTrrkKernel"); CheckInput( orientationOfA, orientationOfB, A, B, C ); #endif const Grid& g = C.Grid(); DistMatrix<T,STAR,MC> AL(g), AR(g); DistMatrix<T,MR,STAR> BT(g), BB(g); DistMatrix<T,MC,MR> CTL(g), CTR(g), CBL(g), CBR(g); DistMatrix<T,MC,MR> DTL(g), DBR(g); const int half = C.Height()/2; ScaleTrapezoid( beta, LEFT, uplo, 0, C ); LockedPartitionRight( A, AL, AR, half ); LockedPartitionDown ( B, BT, BB, half ); PartitionDownDiagonal ( C, CTL, CTR, CBL, CBR, half ); DTL.AlignWith( CTL ); DBR.AlignWith( CBR ); DTL.ResizeTo( CTL.Height(), CTL.Width() ); DBR.ResizeTo( CBR.Height(), CBR.Width() ); //------------------------------------------------------------------------// if( uplo == LOWER ) internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BT, T(1), CBL ); else internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BB, T(1), CTR ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AL, BT, T(0), DTL ); AxpyTriangle( uplo, T(1), DTL, CTL ); internal::LocalGemm ( orientationOfA, orientationOfB, alpha, AR, BB, T(0), DBR ); AxpyTriangle( uplo, T(1), DBR, CBR ); //------------------------------------------------------------------------// #ifndef RELEASE PopCallStack(); #endif }
void readFastX(std::string const filename) { reader_type AR(filename); typename reader_type::pattern_type pattern; while ( AR.getNextPatternUnlocked(pattern) ) { std::cerr << pattern.pattern << std::endl; } }