RVector MV_Add( const RVector & r, const XVector & x, const YVector & y) { KokkosArray::View<typename XVector::value_type* , KokkosArray::LayoutRight, typename XVector::device_type> dummy; if(x.dimension(1)>16) return MV_AddVector( r,dummy,x,dummy,y,1,1); return MV_AddUnroll( r,dummy,x,dummy,y,1,1); }
RVector MV_Add( const RVector & r,const aVector &av,const XVector & x, const bVector &bv, const YVector & y, int a=2,int b=2) { if(x.dimension(1)>16) return MV_AddVector( r,av,x,bv,y,a,b); return MV_AddUnroll( r,av,x,bv,y,a,b); }
RVector V_MulScalar( const RVector & r, const typename KokkosArray::View<DataType,Layout,Device,MemoryManagement,Specialisation> & a, const XVector & x) { typedef typename KokkosArray::View<DataType,Layout,Device,MemoryManagement> aVector; if(r==x) { V_MulScalarFunctorSelf<aVector,XVector> op ; op.m_x = x ; op.m_a = a ; KokkosArray::parallel_for( x.dimension(0) , op ); return r; } V_MulScalarFunctor<RVector,aVector,XVector> op ; op.m_r = r ; op.m_x = x ; op.m_a = a ; KokkosArray::parallel_for( x.dimension(0) , op ); return r; }
RVector MV_MulScalar( const RVector & r, const typename XVector::scalar_type &a, const XVector & x) { if(r==x) { MV_MulScalarFunctorSelf<typename XVector::scalar_type,XVector> op ; op.m_x = x ; op.m_a = a ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } MV_MulScalarFunctor<RVector,typename XVector::scalar_type,XVector> op ; op.m_r = r ; op.m_x = x ; op.m_a = a ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; }
RVector V_MulScalar( const RVector & r, const typename XVector::scalar_type &a, const XVector & x) { printf("HUHU\n"); if(r==x) { V_MulScalarFunctorSelf<typename XVector::scalar_type,XVector> op ; op.m_x = x ; op.m_a = a ; KokkosArray::parallel_for( x.dimension(0) , op ); printf("HUHU2\n"); return r; } V_MulScalarFunctor<RVector,typename XVector::scalar_type,XVector> op ; op.m_r = r ; op.m_x = x ; op.m_a = a ; KokkosArray::parallel_for( x.dimension(0) , op ); printf("HUHU2\n"); return r; }
RVector MV_AddUnroll( const RVector & r,const aVector &av,const XVector & x, const bVector &bv, const YVector & y, int a=2,int b=2) { switch (x.dimension(1)){ case 1: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 1>( r,av,x,bv,y,a,b); break; case 2: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 2>( r,av,x,bv,y,a,b); break; case 3: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 3>( r,av,x,bv,y,a,b); break; case 4: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 4>( r,av,x,bv,y,a,b); break; case 5: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 5>( r,av,x,bv,y,a,b); break; case 6: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 6>( r,av,x,bv,y,a,b); break; case 7: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 7>( r,av,x,bv,y,a,b); break; case 8: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 8>( r,av,x,bv,y,a,b); break; case 9: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 9>( r,av,x,bv,y,a,b); break; case 10: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 10>( r,av,x,bv,y,a,b); break; case 11: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 11>( r,av,x,bv,y,a,b); break; case 12: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 12>( r,av,x,bv,y,a,b); break; case 13: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 13>( r,av,x,bv,y,a,b); break; case 14: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 14>( r,av,x,bv,y,a,b); break; case 15: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 15>( r,av,x,bv,y,a,b); break; case 16: MV_AddUnroll<RVector, aVector, XVector, bVector, YVector, 16>( r,av,x,bv,y,a,b); break; } return r; }
rVector MV_Dot(const rVector &r, const XVector & x, const YVector & y) { typedef typename XVector::size_type size_type; const size_type numVecs = x.dimension(1); if(numVecs>16){ MV_DotProduct_Right_FunctorVector<XVector,YVector> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); return r; } else switch(numVecs) { case 16: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,16> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 15: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,15> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 14: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,14> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 13: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,13> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 12: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,12> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 11: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,11> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 10: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,10> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 9: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,9> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 8: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,8> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 7: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,7> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 6: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,6> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 5: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,5> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 4: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,4> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 3: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,3> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 2: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,2> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce( x.dimension(0) , op, r ); break; } case 1: { MV_DotProduct_Right_FunctorUnroll<XVector,YVector,1> op; op.m_x = x; op.m_y = y; op.value_count = numVecs; KokkosArray::parallel_reduce(x.dimension(0) , op, r); break; } } return r; }
RVector MV_Add( const RVector & r, const XVector & x, const bVector & bv, const YVector & y ) { if(x.dimension(1)>16) return MV_AddVector(r,bv,x,bv,y,1,2); return MV_AddUnroll(r,bv,x,bv,y,1,2); }
RVector MV_AddVector( const RVector & r,const aVector &av,const XVector & x, const bVector &bv, const YVector & y, int a=2,int b=2) { if(a==1&&b==1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a==1&&b==-1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,-1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a==-1&&b==1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a==-1&&b==-1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,-1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a*a!=1&&b==1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a*a!=1&&b==-1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,-1> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a==1&&b*b!=1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,1,2> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } if(a==-1&&b*b!=1) { MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,-1,2> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; } MV_AddVectorFunctor<RVector,aVector,XVector,bVector,YVector,2,2> op ; op.m_r = r ; op.m_x = x ; op.m_y = y ; op.m_a = av ; op.m_b = bv ; op.n = x.dimension(1); KokkosArray::parallel_for( x.dimension(0) , op ); return r; }