void lis_quad_mul(LIS_QUAD *a, const LIS_QUAD *b, const LIS_QUAD *c)
{
	LIS_QUAD_DECLAR;

	#ifndef USE_SSE2
		LIS_QUAD_MUL(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
	#else
		LIS_QUAD_MUL_SSE2(a->hi,a->lo,b->hi,b->lo,c->hi,c->lo);
	#endif
}
Exemple #2
0
LIS_INT lis_vector_scaleex_mm(LIS_QUAD_PTR alpha, LIS_VECTOR vx)
{
	LIS_INT i,n,is,ie,nprocs,my_rank;
	LIS_QUAD_PTR aa;
	LIS_SCALAR *x,*xl;
	LIS_QUAD_DECLAR;

	LIS_DEBUG_FUNC_IN;

	n  = vx->n;
	x  = vx->value;
	xl = vx->value_lo;
	aa.hi = &vx->work[0];
	aa.lo = &vx->work[2];
	#ifndef USE_FMA2_SSE2
	    #pragma cdir nodep
		#pragma omp parallel for private(i,p1,p2,tq,bhi,blo,chi,clo,sh,eh,sl,el,th,tl)
		for(i=0; i<n; i++)
		{
			LIS_QUAD_MUL(x[i],xl[i],x[i],xl[i],alpha.hi[0],alpha.lo[0]);
		}
	#else
		#ifdef _OPENMP
			nprocs = omp_get_max_threads();
		#else
			nprocs = 1;
		#endif
		aa.hi[0] = aa.hi[1] = alpha.hi[0];
		aa.lo[0] = aa.lo[1] = alpha.lo[0];
		#ifdef _OPENMP
		#pragma omp parallel private(i,bh,ch,sh,wh,th,bl,cl,sl,wl,tl,p1,p2,t0,t1,t2,eh,is,ie,my_rank)
		#endif
		{
			#ifdef _OPENMP
				my_rank = omp_get_thread_num();
			#else
				my_rank = 0;
			#endif
			LIS_GET_ISIE(my_rank,nprocs,n,is,ie);
			for(i=is;i<ie-1;i+=2)
			{
				LIS_QUAD_MUL2_SSE2(x[i],xl[i],x[i],xl[i],aa.hi[0],aa.lo[0]);
			}
			for(;i<ie;i++)
			{
				LIS_QUAD_MUL_SSE2(x[i],xl[i],x[i],xl[i],aa.hi[0],aa.lo[0]);
			}
		}
	#endif
	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}