static void run( float a, const Eigen::MatrixBase<Derived2> & A, const Eigen::MatrixBase<Derived1> & x, float  b,  Eigen::MatrixBase<Derived1>  &y) {

        EIGEN_STATIC_ASSERT(sizeof(PREC) == sizeof(typename Derived1::Scalar), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)

        ASSERTMSG(A.cols() == x.rows() && A.rows() == y.rows() ,"ERROR: Vector/Matrix wrong dimension");

        // b_dev = alpha * A_dev * x_old_dev + beta *b_dev
        //Derived2 C = A;

#if USE_INTEL_BLAS == 1

        CBLAS_ORDER     order;
        CBLAS_TRANSPOSE trans;

        if(Derived1::Flags & Eigen::RowMajorBit) {
            order = CblasRowMajor;
        } else {
            order = CblasColMajor;
        }

        trans = CblasNoTrans;


        mkl_set_dynamic(false);
        mkl_set_num_threads(BLAS_NUM_THREADS);
        //cout << "Threads:" << mkl_get_max_threads();
        cblas_sgemv(order, trans, A.rows(), A.cols(), a, const_cast<double*>(&(A.operator()(0,0))), A.outerStride(), const_cast<double*>(&(x.operator()(0,0))), 1, b, &(y.operator()(0,0)), 1);
        //cblas_dgemm(order,trans,trans, A.rows(), A.cols(), A.cols(), 1.0,  const_cast<double*>(&(A.operator()(0,0))), A.rows(), const_cast<double*>(&(A.operator()(0,0))), A.rows(), 1.0 , &(C.operator()(0,0)), C.rows() );
#else

#if USE_GOTO_BLAS == 1
        /* static DGEMVFunc DGEMV = NULL;
        if (DGEMV == NULL) {
          HINSTANCE hInstLibrary = LoadLibrary("libopenblasp-r0.1alpha2.2.dll");
          DGEMV = (DGEMVFunc)GetProcAddress(hInstLibrary, "DGEMV");
        }*/

        char trans = 'N';
        BLAS_INT idx = 1;
        BLAS_INT m = A.rows();
        BLAS_INT n = A.cols();

        sgemv(&trans, &m, &n, &a, &(A.operator()(0,0)), &m, &(x.operator()(0,0)), &idx, &b, &(y.operator()(0,0)), &idx);

        //   FreeLibrary(hInstLibrary);
#else
        ASSERTMSG(false,"No implementation for BLAS defined!");
#endif

#endif

    }