PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z) { Vec_MPI *x = (Vec_MPI*)xin->data; PetscReal sum,work = 0.0; PetscScalar *xx = x->array; PetscErrorCode ierr; PetscInt n = xin->map->n; PetscFunctionBegin; if (type == NORM_2 || type == NORM_FROBENIUS) { #if defined(PETSC_HAVE_SLOW_BLAS_NORM2) #if defined(PETSC_USE_FORTRAN_KERNEL_NORM) fortrannormsqr_(xx,&n,&work); #elif defined(PETSC_USE_UNROLLED_NORM) switch (n & 0x3) { case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4; } while (n>0) { work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+ xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])); xx += 4; n -= 4; } #else {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));} #endif #else {PetscBLASInt one = 1,bn = PetscBLASIntCast(n); work = BLASnrm2_(&bn,xx,&one); work *= work; } #endif ierr = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); *z = sqrt(sum); ierr = PetscLogFlops(2.0*xin->map->n);CHKERRQ(ierr); } else if (type == NORM_1) { /* Find the local part */ ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* Find the local max */ ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);CHKERRQ(ierr); } else if (type == NORM_1_AND_2) { PetscReal temp[2]; ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr); ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr); temp[1] = temp[1]*temp[1]; ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); z[1] = sqrt(z[1]); } PetscFunctionReturn(0); }
PetscErrorCode VecNorm_Seq(Vec xin,NormType type,PetscReal *z) { const PetscScalar *xx; PetscErrorCode ierr; PetscInt n = xin->map->n; PetscBLASInt one = 1, bn; PetscFunctionBegin; ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); if (type == NORM_2 || type == NORM_FROBENIUS) { ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); #if defined(PETSC_USE_REAL___FP16) *z = BLASnrm2_(&bn,xx,&one); #else *z = PetscRealPart(BLASdot_(&bn,xx,&one,xx,&one)); *z = PetscSqrtReal(*z); #endif ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); ierr = PetscLogFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr); } else if (type == NORM_INFINITY) { PetscInt i; PetscReal max = 0.0,tmp; ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); for (i=0; i<n; i++) { if ((tmp = PetscAbsScalar(*xx)) > max) max = tmp; /* check special case of tmp == NaN */ if (tmp != tmp) {max = tmp; break;} xx++; } ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); *z = max; } else if (type == NORM_1) { #if defined(PETSC_USE_COMPLEX) PetscReal tmp = 0.0; PetscInt i; #endif ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) /* BLASasum() returns the nonstandard 1 norm of the 1 norm of the complex entries so we provide a custom loop instead */ for (i=0; i<n; i++) { tmp += PetscAbsScalar(xx[i]); } *z = tmp; #else PetscStackCallBLAS("BLASasum",*z = BLASasum_(&bn,xx,&one)); #endif ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); ierr = PetscLogFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr); } else if (type == NORM_1_AND_2) { ierr = VecNorm_Seq(xin,NORM_1,z);CHKERRQ(ierr); ierr = VecNorm_Seq(xin,NORM_2,z+1);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z) { PetscReal sum,work = 0.0; const PetscScalar *xx; PetscErrorCode ierr; PetscInt n = xin->map->n; PetscBLASInt one = 1,bn; PetscFunctionBegin; ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); if (type == NORM_2 || type == NORM_FROBENIUS) { ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); work = PetscRealPart(BLASdot_(&bn,xx,&one,xx,&one)); ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); ierr = MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr); *z = PetscSqrtReal(sum); ierr = PetscLogFlops(2.0*xin->map->n);CHKERRQ(ierr); } else if (type == NORM_1) { /* Find the local part */ ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* Find the local max */ ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr); } else if (type == NORM_1_AND_2) { PetscReal temp[2]; ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr); ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr); temp[1] = temp[1]*temp[1]; ierr = MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr); z[1] = PetscSqrtReal(z[1]); } PetscFunctionReturn(0); }