void Date::add_m() { ++mth; if (mth >= mpy) { mth = 0; add_y(); } }
void PlotAgent::add_point(double x, double y, const string& v) { assert(ndim == 3); plot_os << x << '\t' << y << '\t' << v << '\n'; add_x(x); add_y(y); add_v(atof(v.chars())); }
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; FLOAT *ap[4]; BLASLONG n1; BLASLONG m1; BLASLONG m2; BLASLONG n2; FLOAT xbuffer[4],*ybuffer; if ( m < 1 ) return(0); if ( n < 1 ) return(0); ybuffer = buffer; n1 = n / 4 ; n2 = n % 4 ; m1 = m - ( m % 16 ); m2 = (m % NBMAX) - (m % 16) ; y_ptr = y; BLASLONG NB = NBMAX; while ( NB == NBMAX ) { m1 -= NB; if ( m1 < 0) { if ( m2 == 0 ) break; NB = m2; } a_ptr = a; x_ptr = x; zero_y(NB,ybuffer); for( i = 0; i < n1 ; i++) { xbuffer[0] = alpha * x_ptr[0]; x_ptr += inc_x; xbuffer[1] = alpha * x_ptr[0]; x_ptr += inc_x; xbuffer[2] = alpha * x_ptr[0]; x_ptr += inc_x; xbuffer[3] = alpha * x_ptr[0]; x_ptr += inc_x; ap[0] = a_ptr; ap[1] = a_ptr + lda; ap[2] = ap[1] + lda; ap[3] = ap[2] + lda; sgemv_kernel_16x4(NB,ap,xbuffer,ybuffer); a_ptr += 4 * lda; } for( i = 0; i < n2 ; i++) { xbuffer[0] = alpha * x_ptr[0]; x_ptr += inc_x; sgemv_kernel_16x1(NB,a_ptr,xbuffer,ybuffer); a_ptr += 1 * lda; } add_y(NB,ybuffer,y_ptr,inc_y); a += NB; y_ptr += NB * inc_y; } j=0; while ( j < (m % 16)) { a_ptr = a; x_ptr = x; FLOAT temp = 0.0; for( i = 0; i < n; i++ ) { temp += a_ptr[0] * x_ptr[0]; a_ptr += lda; x_ptr += inc_x; } y_ptr[0] += alpha * temp; y_ptr += inc_y; a++; j++; } return(0); }
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; FLOAT *ap[4]; BLASLONG n1; BLASLONG m1; BLASLONG m2; BLASLONG m3; BLASLONG n2; BLASLONG lda4; FLOAT xbuffer[8],*ybuffer; #if 0 printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x,inc_y); #endif if ( m < 1 ) return(0); if ( n < 1 ) return(0); ybuffer = buffer; inc_x *= 2; inc_y *= 2; lda *= 2; lda4 = 4 * lda; n1 = n / 4 ; n2 = n % 4 ; m3 = m % 4; m1 = m - ( m % 4 ); m2 = (m % NBMAX) - (m % 4) ; y_ptr = y; BLASLONG NB = NBMAX; while ( NB == NBMAX ) { m1 -= NB; if ( m1 < 0) { if ( m2 == 0 ) break; NB = m2; } a_ptr = a; ap[0] = a_ptr; ap[1] = a_ptr + lda; ap[2] = ap[1] + lda; ap[3] = ap[2] + lda; x_ptr = x; //zero_y(NB,ybuffer); memset(ybuffer,0,NB*16); if ( inc_x == 2 ) { for( i = 0; i < n1 ; i++) { zgemv_kernel_4x4(NB,ap,x_ptr,ybuffer); ap[0] += lda4; ap[1] += lda4; ap[2] += lda4; ap[3] += lda4; a_ptr += lda4; x_ptr += 8; } if ( n2 & 2 ) { zgemv_kernel_4x2(NB,ap,x_ptr,ybuffer); x_ptr += 4; a_ptr += 2 * lda; } if ( n2 & 1 ) { zgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer); /* x_ptr += 2; a_ptr += lda; */ } } else { for( i = 0; i < n1 ; i++) { xbuffer[0] = x_ptr[0]; xbuffer[1] = x_ptr[1]; x_ptr += inc_x; xbuffer[2] = x_ptr[0]; xbuffer[3] = x_ptr[1]; x_ptr += inc_x; xbuffer[4] = x_ptr[0]; xbuffer[5] = x_ptr[1]; x_ptr += inc_x; xbuffer[6] = x_ptr[0]; xbuffer[7] = x_ptr[1]; x_ptr += inc_x; zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer); ap[0] += lda4; ap[1] += lda4; ap[2] += lda4; ap[3] += lda4; a_ptr += lda4; } for( i = 0; i < n2 ; i++) { xbuffer[0] = x_ptr[0]; xbuffer[1] = x_ptr[1]; x_ptr += inc_x; zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer); a_ptr += 1 * lda; } } add_y(NB,ybuffer,y_ptr,inc_y,alpha_r,alpha_i); a += 2 * NB; y_ptr += NB * inc_y; } if ( m3 == 0 ) return(0); if ( m3 == 1 ) { a_ptr = a; x_ptr = x; FLOAT temp_r = 0.0; FLOAT temp_i = 0.0; if ( lda == 2 && inc_x == 2 ) { for( i=0 ; i < (n & -2); i+=2 ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r += a_ptr[2] * x_ptr[2] - a_ptr[3] * x_ptr[3]; temp_i += a_ptr[2] * x_ptr[3] + a_ptr[3] * x_ptr[2]; #else temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r += a_ptr[2] * x_ptr[2] + a_ptr[3] * x_ptr[3]; temp_i += a_ptr[2] * x_ptr[3] - a_ptr[3] * x_ptr[2]; #endif a_ptr += 4; x_ptr += 4; } for( ; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; #else temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; #endif a_ptr += 2; x_ptr += 2; } } else { for( i = 0; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; #else temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; #endif a_ptr += lda; x_ptr += inc_x; } } #if !defined(XCONJ) y_ptr[0] += alpha_r * temp_r - alpha_i * temp_i; y_ptr[1] += alpha_r * temp_i + alpha_i * temp_r; #else y_ptr[0] += alpha_r * temp_r + alpha_i * temp_i; y_ptr[1] -= alpha_r * temp_i - alpha_i * temp_r; #endif return(0); } if ( m3 == 2 ) { a_ptr = a; x_ptr = x; FLOAT temp_r0 = 0.0; FLOAT temp_i0 = 0.0; FLOAT temp_r1 = 0.0; FLOAT temp_i1 = 0.0; if ( lda == 4 && inc_x == 2 ) { for( i = 0; i < (n & -2); i+=2 ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0]; temp_r0 += a_ptr[4] * x_ptr[2] - a_ptr[5] * x_ptr[3]; temp_i0 += a_ptr[4] * x_ptr[3] + a_ptr[5] * x_ptr[2]; temp_r1 += a_ptr[6] * x_ptr[2] - a_ptr[7] * x_ptr[3]; temp_i1 += a_ptr[6] * x_ptr[3] + a_ptr[7] * x_ptr[2]; #else temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0]; temp_r0 += a_ptr[4] * x_ptr[2] + a_ptr[5] * x_ptr[3]; temp_i0 += a_ptr[4] * x_ptr[3] - a_ptr[5] * x_ptr[2]; temp_r1 += a_ptr[6] * x_ptr[2] + a_ptr[7] * x_ptr[3]; temp_i1 += a_ptr[6] * x_ptr[3] - a_ptr[7] * x_ptr[2]; #endif a_ptr += 8; x_ptr += 4; } for( ; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0]; #else temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0]; #endif a_ptr += 4; x_ptr += 2; } } else { for( i=0 ; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0]; #else temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0]; #endif a_ptr += lda; x_ptr += inc_x; } } #if !defined(XCONJ) y_ptr[0] += alpha_r * temp_r0 - alpha_i * temp_i0; y_ptr[1] += alpha_r * temp_i0 + alpha_i * temp_r0; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r1 - alpha_i * temp_i1; y_ptr[1] += alpha_r * temp_i1 + alpha_i * temp_r1; #else y_ptr[0] += alpha_r * temp_r0 + alpha_i * temp_i0; y_ptr[1] -= alpha_r * temp_i0 - alpha_i * temp_r0; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r1 + alpha_i * temp_i1; y_ptr[1] -= alpha_r * temp_i1 - alpha_i * temp_r1; #endif return(0); } if ( m3 == 3 ) { a_ptr = a; x_ptr = x; FLOAT temp_r0 = 0.0; FLOAT temp_i0 = 0.0; FLOAT temp_r1 = 0.0; FLOAT temp_i1 = 0.0; FLOAT temp_r2 = 0.0; FLOAT temp_i2 = 0.0; if ( lda == 6 && inc_x == 2 ) { for( i=0 ; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0]; temp_r2 += a_ptr[4] * x_ptr[0] - a_ptr[5] * x_ptr[1]; temp_i2 += a_ptr[4] * x_ptr[1] + a_ptr[5] * x_ptr[0]; #else temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0]; temp_r2 += a_ptr[4] * x_ptr[0] + a_ptr[5] * x_ptr[1]; temp_i2 += a_ptr[4] * x_ptr[1] - a_ptr[5] * x_ptr[0]; #endif a_ptr += 6; x_ptr += 2; } } else { for( i = 0; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0]; temp_r2 += a_ptr[4] * x_ptr[0] - a_ptr[5] * x_ptr[1]; temp_i2 += a_ptr[4] * x_ptr[1] + a_ptr[5] * x_ptr[0]; #else temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1]; temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0]; temp_r2 += a_ptr[4] * x_ptr[0] + a_ptr[5] * x_ptr[1]; temp_i2 += a_ptr[4] * x_ptr[1] - a_ptr[5] * x_ptr[0]; #endif a_ptr += lda; x_ptr += inc_x; } } #if !defined(XCONJ) y_ptr[0] += alpha_r * temp_r0 - alpha_i * temp_i0; y_ptr[1] += alpha_r * temp_i0 + alpha_i * temp_r0; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r1 - alpha_i * temp_i1; y_ptr[1] += alpha_r * temp_i1 + alpha_i * temp_r1; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r2 - alpha_i * temp_i2; y_ptr[1] += alpha_r * temp_i2 + alpha_i * temp_r2; #else y_ptr[0] += alpha_r * temp_r0 + alpha_i * temp_i0; y_ptr[1] -= alpha_r * temp_i0 - alpha_i * temp_r0; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r1 + alpha_i * temp_i1; y_ptr[1] -= alpha_r * temp_i1 - alpha_i * temp_r1; y_ptr += inc_y; y_ptr[0] += alpha_r * temp_r2 + alpha_i * temp_i2; y_ptr[1] -= alpha_r * temp_i2 - alpha_i * temp_r2; #endif return(0); } return(0); }
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) { BLASLONG i; BLASLONG j; FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; FLOAT *ap[4]; BLASLONG n1; BLASLONG m1; BLASLONG m2; BLASLONG n2; FLOAT xbuffer[8],*ybuffer; #if 0 printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x,inc_y); #endif if ( m < 1 ) return(0); if ( n < 1 ) return(0); ybuffer = buffer; inc_x *= 2; inc_y *= 2; lda *= 2; n1 = n / 4 ; n2 = n % 4 ; m1 = m - ( m % 16 ); m2 = (m % NBMAX) - (m % 16) ; y_ptr = y; BLASLONG NB = NBMAX; while ( NB == NBMAX ) { m1 -= NB; if ( m1 < 0) { if ( m2 == 0 ) break; NB = m2; } a_ptr = a; x_ptr = x; zero_y(NB,ybuffer); for( i = 0; i < n1 ; i++) { xbuffer[0] = x_ptr[0]; xbuffer[1] = x_ptr[1]; x_ptr += inc_x; xbuffer[2] = x_ptr[0]; xbuffer[3] = x_ptr[1]; x_ptr += inc_x; xbuffer[4] = x_ptr[0]; xbuffer[5] = x_ptr[1]; x_ptr += inc_x; xbuffer[6] = x_ptr[0]; xbuffer[7] = x_ptr[1]; x_ptr += inc_x; ap[0] = a_ptr; ap[1] = a_ptr + lda; ap[2] = ap[1] + lda; ap[3] = ap[2] + lda; zgemv_kernel_16x4(NB,ap,xbuffer,ybuffer); a_ptr += 4 * lda; } for( i = 0; i < n2 ; i++) { xbuffer[0] = x_ptr[0]; xbuffer[1] = x_ptr[1]; x_ptr += inc_x; zgemv_kernel_16x1(NB,a_ptr,xbuffer,ybuffer); a_ptr += 1 * lda; } add_y(NB,ybuffer,y_ptr,inc_y,alpha_r,alpha_i); a += 2 * NB; y_ptr += NB * inc_y; } j=0; while ( j < (m % 16)) { a_ptr = a; x_ptr = x; FLOAT temp_r = 0.0; FLOAT temp_i = 0.0; for( i = 0; i < n; i++ ) { #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0]; #else temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1]; temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0]; #endif a_ptr += lda; x_ptr += inc_x; } #if !defined(XCONJ) y_ptr[0] += alpha_r * temp_r - alpha_i * temp_i; y_ptr[1] += alpha_r * temp_i + alpha_i * temp_r; #else y_ptr[0] += alpha_r * temp_r + alpha_i * temp_i; y_ptr[1] -= alpha_r * temp_i - alpha_i * temp_r; #endif y_ptr += inc_y; a+=2; j++; } return(0); }