Exemple #1
0
void Date::add_m() {
    ++mth;
    if (mth >= mpy) {
        mth = 0;
        add_y();
    }
}
Exemple #2
0
void PlotAgent::add_point(double x, double y, const string& v)
{
    assert(ndim == 3);

    plot_os << x << '\t' << y << '\t' << v << '\n';
    add_x(x);
    add_y(y);
    add_v(atof(v.chars()));
}
Exemple #3
0
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
    BLASLONG i;
    BLASLONG j;
    FLOAT *a_ptr;
    FLOAT *x_ptr;
    FLOAT *y_ptr;
    FLOAT *ap[4];
    BLASLONG n1;
    BLASLONG m1;
    BLASLONG m2;
    BLASLONG n2;
    FLOAT xbuffer[4],*ybuffer;

    if ( m < 1 ) return(0);
    if ( n < 1 ) return(0);

    ybuffer = buffer;

    n1 = n / 4 ;
    n2 = n % 4 ;

    m1 = m - ( m % 16 );
    m2 = (m % NBMAX) - (m % 16) ;

    y_ptr = y;

    BLASLONG NB = NBMAX;

    while ( NB == NBMAX )
    {

        m1 -= NB;
        if ( m1 < 0)
        {
            if ( m2 == 0 ) break;
            NB = m2;
        }

        a_ptr = a;
        x_ptr = x;
        zero_y(NB,ybuffer);
        for( i = 0; i < n1 ; i++)
        {
            xbuffer[0] = alpha * x_ptr[0];
            x_ptr += inc_x;
            xbuffer[1] = alpha * x_ptr[0];
            x_ptr += inc_x;
            xbuffer[2] = alpha * x_ptr[0];
            x_ptr += inc_x;
            xbuffer[3] = alpha * x_ptr[0];
            x_ptr += inc_x;
            ap[0] = a_ptr;
            ap[1] = a_ptr + lda;
            ap[2] = ap[1] + lda;
            ap[3] = ap[2] + lda;
            sgemv_kernel_16x4(NB,ap,xbuffer,ybuffer);
            a_ptr += 4 * lda;
        }

        for( i = 0; i < n2 ; i++)
        {
            xbuffer[0] = alpha * x_ptr[0];
            x_ptr += inc_x;
            sgemv_kernel_16x1(NB,a_ptr,xbuffer,ybuffer);
            a_ptr += 1 * lda;

        }
        add_y(NB,ybuffer,y_ptr,inc_y);
        a     += NB;
        y_ptr += NB * inc_y;
    }
    j=0;
    while ( j < (m % 16))
    {
        a_ptr = a;
        x_ptr = x;
        FLOAT temp = 0.0;
        for( i = 0; i < n; i++ )
        {
            temp += a_ptr[0] * x_ptr[0];
            a_ptr += lda;
            x_ptr += inc_x;
        }
        y_ptr[0] += alpha * temp;
        y_ptr += inc_y;
        a++;
        j++;
    }
    return(0);
}
Exemple #4
0
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
	BLASLONG i;
	FLOAT *a_ptr;
	FLOAT *x_ptr;
	FLOAT *y_ptr;
	FLOAT *ap[4];
	BLASLONG n1;
	BLASLONG m1;
	BLASLONG m2;
	BLASLONG m3;
	BLASLONG n2;
	BLASLONG lda4;
	FLOAT xbuffer[8],*ybuffer;


#if 0
printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x,inc_y);
#endif

	if ( m < 1 ) return(0);
	if ( n < 1 ) return(0);

	ybuffer = buffer;
	
	inc_x *= 2;
	inc_y *= 2;
	lda   *= 2;
	lda4  = 4 * lda;

	n1 = n / 4 ;
	n2 = n % 4 ;
	
	m3 = m % 4;
	m1 = m - ( m % 4 );
	m2 = (m % NBMAX) - (m % 4) ;
	
	y_ptr = y;

	BLASLONG NB = NBMAX;

	while ( NB == NBMAX )
	{
		
		m1 -= NB;
		if ( m1 < 0)
		{
			if ( m2 == 0 ) break;	
			NB = m2;
		}
		
		a_ptr = a;
		ap[0] = a_ptr;
		ap[1] = a_ptr + lda;
		ap[2] = ap[1] + lda;
		ap[3] = ap[2] + lda;
		x_ptr = x;
		//zero_y(NB,ybuffer);
		memset(ybuffer,0,NB*16);

		if ( inc_x == 2 )
		{

			for( i = 0; i < n1 ; i++)
			{
				zgemv_kernel_4x4(NB,ap,x_ptr,ybuffer);
				ap[0] += lda4;
				ap[1] += lda4;
				ap[2] += lda4;
				ap[3] += lda4;
				a_ptr += lda4;
				x_ptr += 8;	
			}

			if ( n2 & 2 )
			{
				zgemv_kernel_4x2(NB,ap,x_ptr,ybuffer);
				x_ptr += 4;	
				a_ptr += 2 * lda;

			}

			if ( n2 & 1 )
			{
				zgemv_kernel_4x1(NB,a_ptr,x_ptr,ybuffer);
				/* x_ptr += 2;	
				a_ptr += lda; */

			}
		}
		else
		{

			for( i = 0; i < n1 ; i++)
			{

				xbuffer[0] = x_ptr[0];
				xbuffer[1] = x_ptr[1];
				x_ptr += inc_x;	
				xbuffer[2] = x_ptr[0];
				xbuffer[3] = x_ptr[1];
				x_ptr += inc_x;	
				xbuffer[4] = x_ptr[0];
				xbuffer[5] = x_ptr[1];
				x_ptr += inc_x;	
				xbuffer[6] = x_ptr[0];
				xbuffer[7] = x_ptr[1];
				x_ptr += inc_x;	

				zgemv_kernel_4x4(NB,ap,xbuffer,ybuffer);
				ap[0] += lda4;
				ap[1] += lda4;
				ap[2] += lda4;
				ap[3] += lda4;
				a_ptr += lda4;
			}

			for( i = 0; i < n2 ; i++)
			{
				xbuffer[0] = x_ptr[0];
				xbuffer[1] = x_ptr[1];
				x_ptr += inc_x;	
				zgemv_kernel_4x1(NB,a_ptr,xbuffer,ybuffer);
				a_ptr += 1 * lda;

			}

		}

		add_y(NB,ybuffer,y_ptr,inc_y,alpha_r,alpha_i);
		a     += 2 * NB;
		y_ptr += NB * inc_y;
	}

	if ( m3 == 0 ) return(0);

	if ( m3 == 1 )
	{
		a_ptr = a;
		x_ptr = x;
		FLOAT temp_r = 0.0;
		FLOAT temp_i = 0.0;

		if ( lda == 2 && inc_x == 2 )
		{


			for( i=0 ; i < (n & -2); i+=2 )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r += a_ptr[2] * x_ptr[2] - a_ptr[3] * x_ptr[3];
				temp_i += a_ptr[2] * x_ptr[3] + a_ptr[3] * x_ptr[2];
#else
				temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r += a_ptr[2] * x_ptr[2] + a_ptr[3] * x_ptr[3];
				temp_i += a_ptr[2] * x_ptr[3] - a_ptr[3] * x_ptr[2];
#endif

				a_ptr += 4;
				x_ptr += 4;
			}



			for( ; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
#else
				temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
#endif

				a_ptr += 2;
				x_ptr += 2;
			}


		}
		else
		{

			for( i = 0; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
#else
				temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
#endif

				a_ptr += lda;
				x_ptr += inc_x;
			}

		}
#if !defined(XCONJ) 
		y_ptr[0] += alpha_r * temp_r - alpha_i * temp_i;
		y_ptr[1] += alpha_r * temp_i + alpha_i * temp_r;
#else
		y_ptr[0] += alpha_r * temp_r + alpha_i * temp_i;
		y_ptr[1] -= alpha_r * temp_i - alpha_i * temp_r;
#endif
		return(0);
	}

	if ( m3 == 2 )
	{
		a_ptr = a;
		x_ptr = x;
		FLOAT temp_r0 = 0.0;
		FLOAT temp_i0 = 0.0;
		FLOAT temp_r1 = 0.0;
		FLOAT temp_i1 = 0.0;

		if ( lda == 4 && inc_x == 2 )
		{

			for( i = 0; i < (n & -2); i+=2 )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )

				temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0];

				temp_r0 += a_ptr[4] * x_ptr[2] - a_ptr[5] * x_ptr[3];
				temp_i0 += a_ptr[4] * x_ptr[3] + a_ptr[5] * x_ptr[2];
				temp_r1 += a_ptr[6] * x_ptr[2] - a_ptr[7] * x_ptr[3];
				temp_i1 += a_ptr[6] * x_ptr[3] + a_ptr[7] * x_ptr[2];

#else
				temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0];

				temp_r0 += a_ptr[4] * x_ptr[2] + a_ptr[5] * x_ptr[3];
				temp_i0 += a_ptr[4] * x_ptr[3] - a_ptr[5] * x_ptr[2];
				temp_r1 += a_ptr[6] * x_ptr[2] + a_ptr[7] * x_ptr[3];
				temp_i1 += a_ptr[6] * x_ptr[3] - a_ptr[7] * x_ptr[2];

#endif

				a_ptr += 8;
				x_ptr += 4;
			}


			for( ; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0];
#else
				temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0];
#endif

				a_ptr += 4;
				x_ptr += 2;
			}


		}
		else
		{

			for( i=0 ; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0];
#else
				temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0];
#endif

				a_ptr += lda;
				x_ptr += inc_x;
			}


		}
#if !defined(XCONJ) 
		y_ptr[0] += alpha_r * temp_r0 - alpha_i * temp_i0;
		y_ptr[1] += alpha_r * temp_i0 + alpha_i * temp_r0;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r1 - alpha_i * temp_i1;
		y_ptr[1] += alpha_r * temp_i1 + alpha_i * temp_r1;
#else
		y_ptr[0] += alpha_r * temp_r0 + alpha_i * temp_i0;
		y_ptr[1] -= alpha_r * temp_i0 - alpha_i * temp_r0;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r1 + alpha_i * temp_i1;
		y_ptr[1] -= alpha_r * temp_i1 - alpha_i * temp_r1;
#endif
		return(0);
	}


	if ( m3 == 3 )
	{
		a_ptr = a;
		x_ptr = x;
		FLOAT temp_r0 = 0.0;
		FLOAT temp_i0 = 0.0;
		FLOAT temp_r1 = 0.0;
		FLOAT temp_i1 = 0.0;
		FLOAT temp_r2 = 0.0;
		FLOAT temp_i2 = 0.0;

		if ( lda == 6 && inc_x == 2 )
		{

			for( i=0 ; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0];
				temp_r2 += a_ptr[4] * x_ptr[0] - a_ptr[5] * x_ptr[1];
				temp_i2 += a_ptr[4] * x_ptr[1] + a_ptr[5] * x_ptr[0];
#else
				temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0];
				temp_r2 += a_ptr[4] * x_ptr[0] + a_ptr[5] * x_ptr[1];
				temp_i2 += a_ptr[4] * x_ptr[1] - a_ptr[5] * x_ptr[0];
#endif

				a_ptr += 6;
				x_ptr += 2;
			}


		}
		else
		{

			for( i = 0; i < n; i++ )
			{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
				temp_r0 += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] - a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] + a_ptr[3] * x_ptr[0];
				temp_r2 += a_ptr[4] * x_ptr[0] - a_ptr[5] * x_ptr[1];
				temp_i2 += a_ptr[4] * x_ptr[1] + a_ptr[5] * x_ptr[0];
#else
				temp_r0 += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
				temp_i0 += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
				temp_r1 += a_ptr[2] * x_ptr[0] + a_ptr[3] * x_ptr[1];
				temp_i1 += a_ptr[2] * x_ptr[1] - a_ptr[3] * x_ptr[0];
				temp_r2 += a_ptr[4] * x_ptr[0] + a_ptr[5] * x_ptr[1];
				temp_i2 += a_ptr[4] * x_ptr[1] - a_ptr[5] * x_ptr[0];
#endif

				a_ptr += lda;
				x_ptr += inc_x;
			}

		}
#if !defined(XCONJ) 
		y_ptr[0] += alpha_r * temp_r0 - alpha_i * temp_i0;
		y_ptr[1] += alpha_r * temp_i0 + alpha_i * temp_r0;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r1 - alpha_i * temp_i1;
		y_ptr[1] += alpha_r * temp_i1 + alpha_i * temp_r1;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r2 - alpha_i * temp_i2;
		y_ptr[1] += alpha_r * temp_i2 + alpha_i * temp_r2;
#else
		y_ptr[0] += alpha_r * temp_r0 + alpha_i * temp_i0;
		y_ptr[1] -= alpha_r * temp_i0 - alpha_i * temp_r0;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r1 + alpha_i * temp_i1;
		y_ptr[1] -= alpha_r * temp_i1 - alpha_i * temp_r1;
		y_ptr    += inc_y;
		y_ptr[0] += alpha_r * temp_r2 + alpha_i * temp_i2;
		y_ptr[1] -= alpha_r * temp_i2 - alpha_i * temp_r2;
#endif
		return(0);
	}





	return(0);
}
Exemple #5
0
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r,FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
{
	BLASLONG i;
	BLASLONG j;
	FLOAT *a_ptr;
	FLOAT *x_ptr;
	FLOAT *y_ptr;
	FLOAT *ap[4];
	BLASLONG n1;
	BLASLONG m1;
	BLASLONG m2;
	BLASLONG n2;
	FLOAT xbuffer[8],*ybuffer;


#if 0
printf("%s %d %d %.16f %.16f %d %d %d\n","zgemv_n",m,n,alpha_r,alpha_i,lda,inc_x,inc_y);
#endif

	if ( m < 1 ) return(0);
	if ( n < 1 ) return(0);

	ybuffer = buffer;
	
	inc_x *= 2;
	inc_y *= 2;
	lda   *= 2;

	n1 = n / 4 ;
	n2 = n % 4 ;
	
	m1 = m - ( m % 16 );
	m2 = (m % NBMAX) - (m % 16) ;
	
	y_ptr = y;

	BLASLONG NB = NBMAX;

	while ( NB == NBMAX )
	{
		
		m1 -= NB;
		if ( m1 < 0)
		{
			if ( m2 == 0 ) break;	
			NB = m2;
		}
		
		a_ptr = a;
		x_ptr = x;
		zero_y(NB,ybuffer);
		for( i = 0; i < n1 ; i++)
		{

			xbuffer[0] = x_ptr[0];
			xbuffer[1] = x_ptr[1];
			x_ptr += inc_x;	
			xbuffer[2] = x_ptr[0];
			xbuffer[3] = x_ptr[1];
			x_ptr += inc_x;	
			xbuffer[4] = x_ptr[0];
			xbuffer[5] = x_ptr[1];
			x_ptr += inc_x;	
			xbuffer[6] = x_ptr[0];
			xbuffer[7] = x_ptr[1];
			x_ptr += inc_x;	

			ap[0] = a_ptr;
			ap[1] = a_ptr + lda;
			ap[2] = ap[1] + lda;
			ap[3] = ap[2] + lda;
			zgemv_kernel_16x4(NB,ap,xbuffer,ybuffer);
			a_ptr += 4 * lda;
		}

		for( i = 0; i < n2 ; i++)
		{
			xbuffer[0] = x_ptr[0];
			xbuffer[1] = x_ptr[1];
			x_ptr += inc_x;	
			zgemv_kernel_16x1(NB,a_ptr,xbuffer,ybuffer);
			a_ptr += 1 * lda;

		}
		add_y(NB,ybuffer,y_ptr,inc_y,alpha_r,alpha_i);
		a     += 2 * NB;
		y_ptr += NB * inc_y;
	}

	j=0;
	while ( j < (m % 16))
	{
		a_ptr = a;
		x_ptr = x;
		FLOAT temp_r = 0.0;
		FLOAT temp_i = 0.0;
		for( i = 0; i < n; i++ )
		{
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
			temp_r += a_ptr[0] * x_ptr[0] - a_ptr[1] * x_ptr[1];
			temp_i += a_ptr[0] * x_ptr[1] + a_ptr[1] * x_ptr[0];
#else
			temp_r += a_ptr[0] * x_ptr[0] + a_ptr[1] * x_ptr[1];
			temp_i += a_ptr[0] * x_ptr[1] - a_ptr[1] * x_ptr[0];
#endif

			a_ptr += lda;
			x_ptr += inc_x;
		}

#if !defined(XCONJ) 
		y_ptr[0] += alpha_r * temp_r - alpha_i * temp_i;
		y_ptr[1] += alpha_r * temp_i + alpha_i * temp_r;
#else
		y_ptr[0] += alpha_r * temp_r + alpha_i * temp_i;
		y_ptr[1] -= alpha_r * temp_i - alpha_i * temp_r;
#endif
		y_ptr += inc_y;
		a+=2;
		j++;
	}
	return(0);
}