// ============================================================================= PyObject * Epetra_NumPyMultiVector::ExtractCopy() const { return PyArray_NewCopy(array,NPY_ANYORDER); }
static PyObject * dotblas_matrixproduct(PyObject *dummy, PyObject *args) { PyObject *op1, *op2; PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL; int j, l, lda, ldb, ldc; int typenum, nd; intp ap1stride=0; intp dimensions[MAX_DIMS]; intp numbytes; static const float oneF[2] = {1.0, 0.0}; static const float zeroF[2] = {0.0, 0.0}; static const double oneD[2] = {1.0, 0.0}; static const double zeroD[2] = {0.0, 0.0}; double prior1, prior2; PyTypeObject *subtype; PyArray_Descr *dtype; MatrixShape ap1shape, ap2shape; if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; /* * "Matrix product" using the BLAS. * Only works for float double and complex types. */ typenum = PyArray_ObjectType(op1, 0); typenum = PyArray_ObjectType(op2, typenum); /* This function doesn't handle other types */ if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { return PyArray_Return((PyArrayObject *)PyArray_MatrixProduct(op1, op2)); } dtype = PyArray_DescrFromType(typenum); ap1 = (PyArrayObject *)PyArray_FromAny(op1, dtype, 0, 0, ALIGNED, NULL); if (ap1 == NULL) return NULL; Py_INCREF(dtype); ap2 = (PyArrayObject *)PyArray_FromAny(op2, dtype, 0, 0, ALIGNED, NULL); if (ap2 == NULL) goto fail; if ((ap1->nd > 2) || (ap2->nd > 2)) { /* This function doesn't handle dimensions greater than 2 (or negative striding) -- other than to ensure the dot function is altered */ if (!altered) { /* need to alter dot product */ PyObject *tmp1, *tmp2; tmp1 = PyTuple_New(0); tmp2 = dotblas_alterdot(NULL, tmp1); Py_DECREF(tmp1); Py_DECREF(tmp2); } ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1, (PyObject *)ap2); Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (_bad_strides(ap1)) { op1 = PyArray_NewCopy(ap1, PyArray_ANYORDER); Py_DECREF(ap1); ap1 = (PyArrayObject *)op1; if (ap1 == NULL) goto fail; } if (_bad_strides(ap2)) { op2 = PyArray_NewCopy(ap2, PyArray_ANYORDER); Py_DECREF(ap2); ap2 = (PyArrayObject *)op2; if (ap2 == NULL) goto fail; } ap1shape = _select_matrix_shape(ap1); ap2shape = _select_matrix_shape(ap2); if (ap1shape == _scalar || ap2shape == _scalar) { PyArrayObject *oap1, *oap2; oap1 = ap1; oap2 = ap2; /* One of ap1 or ap2 is a scalar */ if (ap1shape == _scalar) { /* Make ap2 the scalar */ PyArrayObject *t = ap1; ap1 = ap2; ap2 = t; ap1shape = ap2shape; ap2shape = _scalar; } if (ap1shape == _row) ap1stride = ap1->strides[1]; else if (ap1->nd > 0) ap1stride = ap1->strides[0]; if (ap1->nd == 0 || ap2->nd == 0) { intp *thisdims; if (ap1->nd == 0) { nd = ap2->nd; thisdims = ap2->dimensions; } else { nd = ap1->nd; thisdims = ap1->dimensions; } l = 1; for (j=0; j<nd; j++) { dimensions[j] = thisdims[j]; l *= dimensions[j]; } } else { l = oap1->dimensions[oap1->nd-1]; if (oap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd + ap2->nd - 2; /* nd = 0 or 1 or 2 */ /* If nd == 0 do nothing ... */ if (nd == 1) { /* Either ap1->nd is 1 dim or ap2->nd is 1 dim and the other is 2-dim */ dimensions[0] = (oap1->nd == 2) ? oap1->dimensions[0] : oap2->dimensions[1]; l = dimensions[0]; /* Fix it so that dot(shape=(N,1), shape=(1,)) and dot(shape=(1,), shape=(1,N)) both return an (N,) array (but use the fast scalar code) */ } else if (nd == 2) { dimensions[0] = oap1->dimensions[0]; dimensions[1] = oap2->dimensions[1]; /* We need to make sure that dot(shape=(1,1), shape=(1,N)) and dot(shape=(N,1),shape=(1,1)) uses scalar multiplication appropriately */ if (ap1shape == _row) l = dimensions[1]; else l = dimensions[0]; } } } else { /* (ap1->nd <= 2 && ap2->nd <= 2) */ /* Both ap1 and ap2 are vectors or matrices */ l = ap1->dimensions[ap1->nd-1]; if (ap2->dimensions[0] != l) { PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); goto fail; } nd = ap1->nd+ap2->nd-2; if (nd == 1) dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1]; else if (nd == 2) { dimensions[0] = ap1->dimensions[0]; dimensions[1] = ap2->dimensions[1]; } } /* Choose which subtype to return */ if (ap1->ob_type != ap2->ob_type) { prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0); prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0); subtype = (prior2 > prior1 ? ap2->ob_type : ap1->ob_type); } else { prior1 = prior2 = 0.0; subtype = ap1->ob_type; } ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, (PyObject *) (prior2 > prior1 ? ap2 : ap1)); if (ret == NULL) goto fail; numbytes = PyArray_NBYTES(ret); memset(ret->data, 0, numbytes); if (numbytes==0 || l == 0) { Py_DECREF(ap1); Py_DECREF(ap2); return PyArray_Return(ret); } if (ap2shape == _scalar) { /* Multiplication by a scalar -- Level 1 BLAS */ /* if ap1shape is a matrix and we are not contiguous, then we can't just blast through the entire array using a single striding factor */ NPY_BEGIN_ALLOW_THREADS if (typenum == PyArray_DOUBLE) { if (l == 1) { *((double *)ret->data) = *((double *)ap2->data) * \ *((double *)ap1->data); } else if (ap1shape != _matrix) { cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, ap1stride/sizeof(double), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((double *)ap2->data); a1s = ap1->strides[maxind] / sizeof(double); rets = ret->strides[maxind] / sizeof(double); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_daxpy(l, val, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CDOUBLE) { if (l == 1) { cdouble *ptr1, *ptr2, *res; ptr1 = (cdouble *)ap2->data; ptr2 = (cdouble *)ap1->data; res = (cdouble *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, ap1stride/sizeof(cdouble), (double *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; double *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (double *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cdouble); rets = ret->strides[maxind] / sizeof(cdouble); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_zaxpy(l, pval, (double *)ptr, a1s, (double *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_FLOAT) { if (l == 1) { *((float *)ret->data) = *((float *)ap2->data) * \ *((float *)ap1->data); } else if (ap1shape != _matrix) { cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, ap1stride/sizeof(float), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float val; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; val = *((float *)ap2->data); a1s = ap1->strides[maxind] / sizeof(float); rets = ret->strides[maxind] / sizeof(float); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_saxpy(l, val, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } else if (typenum == PyArray_CFLOAT) { if (l == 1) { cfloat *ptr1, *ptr2, *res; ptr1 = (cfloat *)ap2->data; ptr2 = (cfloat *)ap1->data; res = (cfloat *)ret->data; res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; } else if (ap1shape != _matrix) { cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, ap1stride/sizeof(cfloat), (float *)ret->data, 1); } else { int maxind, oind, i, a1s, rets; char *ptr, *rptr; float *pval; maxind = (ap1->dimensions[0] >= ap1->dimensions[1] ? 0 : 1); oind = 1-maxind; ptr = ap1->data; rptr = ret->data; l = ap1->dimensions[maxind]; pval = (float *)ap2->data; a1s = ap1->strides[maxind] / sizeof(cfloat); rets = ret->strides[maxind] / sizeof(cfloat); for (i=0; i < ap1->dimensions[oind]; i++) { cblas_caxpy(l, pval, (float *)ptr, a1s, (float *)rptr, rets); ptr += ap1->strides[oind]; rptr += ret->strides[oind]; } } } NPY_END_ALLOW_THREADS }