void ApplyPCPrecPETSCSVD(void *x, PRIMME_INT *ldx, void *y, PRIMME_INT *ldy, int *blockSize, int *mode, primme_svds_params *primme_svds, int *ierr) { int i, one=1; SCALAR *aux; if (*mode == primme_svds_op_AtA) { aux = (SCALAR *)primme_calloc(primme_svds->mLocal, sizeof(SCALAR), "aux"); for(i=0; i<*blockSize; i++) { ApplyPCPrecPETSCGen((SCALAR*)x+(*ldx)*i, ldx, aux, ldy, &one, 1, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); ApplyPCPrecPETSCGen(aux, ldx, (SCALAR*)y+(*ldy)*i, ldy, &one, 0, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); } free(aux); } else if (*mode == primme_svds_op_AAt) { aux = (SCALAR *)primme_calloc(primme_svds->nLocal, sizeof(SCALAR), "aux"); for(i=0; i<*blockSize; i++) { ApplyPCPrecPETSCGen((SCALAR*)x+(*ldx)*i, ldx, aux, ldy, &one, 0, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); ApplyPCPrecPETSCGen(aux, ldx, (SCALAR*)y+(*ldy)*i, ldy, &one, 1, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); } free(aux); } else if (*mode == primme_svds_op_augmented) { ApplyPCPrecPETSCGen((SCALAR*)x+primme_svds->nLocal, ldx, y, ldy, blockSize, 0, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); ApplyPCPrecPETSCGen(x, ldx, (SCALAR*)y+primme_svds->nLocal, ldy, blockSize, 1, primme_svds->preconditioner, *(MPI_Comm*)primme_svds->commInfo); } *ierr = 0; }
int createInvDavidsonDiagPrecNative(const CSRMatrix *matrix, double **prec) { double *diag; diag = (double*)primme_calloc(matrix->n, sizeof(double), "diag"); getDiagonal(matrix, diag); *prec = diag; return 1; }
void primme_initialize_f77_(primme_params **primme){ #else void primme_initialize_f77(primme_params **primme){ #endif *primme = (primme_params *)primme_calloc(1,sizeof(primme_params),"primme"); primme_initialize(*primme); }
int readMatrixPetsc(const char* matrixFileName, PRIMME_INT *m, PRIMME_INT *n, PRIMME_INT *mLocal, PRIMME_INT *nLocal, int *numProcs, int *procID, Mat **matrix, double *fnorm_, int **perm) { PetscErrorCode ierr; PetscReal fnorm; PetscBool pattern; PetscViewer viewer; PetscInt m0, n0, mLocal0, nLocal0; PetscFunctionBegin; *matrix = (Mat *)primme_calloc(1, sizeof(Mat), "mat"); if (!strcmp("mtx", &matrixFileName[strlen(matrixFileName)-3])) { // coordinate format storing both lower and upper triangular parts ierr = loadmtx(matrixFileName, *matrix, &pattern); CHKERRQ(ierr); } else if (!strcmp("petsc", &matrixFileName[strlen(matrixFileName)-5])) { ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, matrixFileName, FILE_MODE_READ, &viewer); CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, *matrix); CHKERRQ(ierr); ierr = MatSetFromOptions(**matrix); CHKERRQ(ierr); ierr = MatLoad(**matrix, viewer); CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr); } else { SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "Could not read matrix file."); } if (fnorm_) { ierr = MatNorm(**matrix, NORM_FROBENIUS, &fnorm); CHKERRQ(ierr); *fnorm_ = fnorm; } ierr = MatGetSize(**matrix, &m0, &n0); CHKERRQ(ierr); *m = m0; *n = n0; if (perm && *m == *n) { Mat Atemp; ierr = permutematrix(**matrix, NULL, &Atemp, NULL, perm);CHKERRQ(ierr); ierr = MatDestroy(*matrix);CHKERRQ(ierr); **matrix = Atemp; } else if (perm) { *perm = NULL; } ierr = MatGetLocalSize(**matrix, &mLocal0, &nLocal0); CHKERRQ(ierr); *mLocal = mLocal0; *nLocal = nLocal0; MPI_Comm_size(MPI_COMM_WORLD, numProcs); MPI_Comm_rank(MPI_COMM_WORLD, procID); PetscFunctionReturn(0); }
int createInvDiagPrecNative(const CSRMatrix *matrix, double shift, double **prec) { int i; double *diag, minDenominator=1e-14; diag = (double*)primme_calloc(matrix->n, sizeof(double), "diag"); getDiagonal(matrix, diag); for (i=0; i<matrix->n; i++) diag[i] -= shift; for (i=0; i<matrix->n; i++) if (fabs(diag[i]) < minDenominator) diag[i] = copysign(minDenominator, diag[i]); *prec = diag; return 1; }
/****************************************************************************** * Function to broadcast the primme data structure to all processors * ******************************************************************************/ void broadCast(primme_params *primme, primme_preset_method *method, MPI_Comm comm){ int i; MPI_Bcast(&(primme->numEvals), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->target), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->numTargetShifts), 1, MPI_INT, 0, comm); if (primme->numTargetShifts > 0 && procID !=0) { primme->targetShifts = (double *)primme_calloc( primme->numTargetShifts, sizeof(double), "targetShifts"); } for (i=0; i<primme->numTargetShifts; i++) { MPI_Bcast(&(primme->targetShifts[i]), 1, MPI_DOUBLE, 0, comm); } MPI_Bcast(&(primme->locking), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->dynamicMethodSwitch), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->initSize), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->numOrthoConst), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->maxBasisSize), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->minRestartSize), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->maxBlockSize), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->maxMatvecs), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->maxOuterIterations), 1, MPI_INT, 0, comm); for (i=0;i<4;i++) { MPI_Bcast(&(primme->iseed[i]), 1, MPI_DOUBLE, 0, comm); } MPI_Bcast(&(primme->aNorm), 1, MPI_DOUBLE, 0, comm); MPI_Bcast(&(primme->eps), 1, MPI_DOUBLE, 0, comm); MPI_Bcast(&(primme->printLevel), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->restartingParams.scheme), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->restartingParams.maxPrevRetain), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->correctionParams.precondition), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->correctionParams.robustShifts), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->correctionParams.maxInnerIterations),1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.convTest), 1, MPI_INT, 0, comm); MPI_Bcast(&(primme->correctionParams.relTolBase), 1, MPI_DOUBLE, 0, comm); MPI_Bcast(&(primme->correctionParams.projectors.LeftQ), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.LeftX), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.RightQ), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.RightX), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.SkewQ), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.SkewX), 1, MPI_INT, 0,comm); MPI_Bcast(&(primme->correctionParams.projectors.SkewX), 1, MPI_INT, 0,comm); MPI_Bcast(method, 1, MPI_INT, 0, comm); }
int createInvNormalPrecPETSC(Mat matrix, double shift, double **prec) { int i; PetscInt mLocal, nLocal; double *diag, minDenominator=1e-14; MatGetLocalSize(matrix, &mLocal, &nLocal); diag = (double*)primme_calloc(mLocal+nLocal, sizeof(double), "diag"); getSumSquares(matrix, diag); for (i=0; i<mLocal+nLocal; i++) { diag[i] -= shift*shift; if (fabs(diag[i]) < minDenominator) diag[i] = copysign(minDenominator, diag[i]); } *prec = diag; return 1; }
/****************************************************************************** * void generatePermutations() * Given a proc array : proc[i] = processor # where row i lies * it generates all other needed permutation arrays for processing in * Parasails. * ******************************************************************************/ void generatePermutations(int n, int nParts, int *proc, int *perm, int *iperm, int *map) { int i; int *count; count = (int *)primme_calloc(nParts, sizeof(int), "counts"); for (i=0; i < nParts; i++) { count[i] = 0; } for (i=0; i < n; i++) { count[proc[i]]++; } map[0] = 0; for (i=1; i <= nParts; i++) { map[i] = map[i-1] + count[i-1]; } for (i=0; i < n; i++) { iperm[map[proc[i]]] = i; map[proc[i]]++; } for (i=0; i < n; i++) { perm[iperm[i]] = i; } map[0] = 0; for (i=1; i <= nParts; i++) { map[i] = map[i-1] + count[i-1]; } free(count); }
int createILUTPrecNative(const CSRMatrix *matrix, double shift, int level, double threshold, double filter, CSRMatrix **prec) { #ifdef USE_DOUBLECOMPLEX int ierr; int lenFactors; PRIMME_NUM *W; int *iW; CSRMatrix *factors; if (shift != 0.0) { shiftCSRMatrix(-shift, (CSRMatrix*)matrix); } /* Work arrays */ W = (PRIMME_NUM *)primme_calloc(matrix->n+1, sizeof(PRIMME_NUM), "W"); iW = (int *)primme_calloc(matrix->n*2, sizeof(int), "iW"); /* Max size of factorization */ lenFactors = 9*matrix->nnz; factors = (CSRMatrix *)primme_calloc(1, sizeof(CSRMatrix), "factors"); factors->AElts = (PRIMME_NUM *)primme_calloc(lenFactors, sizeof(PRIMME_NUM), "iluElts"); factors->JA = (int *)primme_calloc(lenFactors, sizeof(int), "Jilu"); factors->IA = (int *)primme_calloc(matrix->n+1, sizeof(int), "Iilu"); factors->n = matrix->n; factors->nnz = lenFactors; FORTRAN_FUNCTION(zilut) ((int*)&matrix->n, (PRIMME_NUM*)matrix->AElts, (int*)matrix->JA, (int*)matrix->IA, &level, &threshold, factors->AElts, factors->JA, factors->IA, &lenFactors, W, iW, &ierr); if (ierr != 0) { fprintf(stderr, "ZILUT factorization could not be completed\n"); return(-1); } if (shift != 0.0L) { shiftCSRMatrix(shift, (CSRMatrix*)matrix); } /* free workspace */ free(W); free(iW); *prec = factors; return 0; #else int ierr; int lenFactors; double *W1, *W2; int *iW1, *iW2, *iW3; CSRMatrix *factors; if (shift != 0.0) { shiftCSRMatrix(-shift, (CSRMatrix*)matrix); } /* Work arrays */ W1 = (double *)primme_calloc( matrix->n+1, sizeof(double), "W1"); W2 = (double *)primme_calloc( matrix->n, sizeof(double), "W2"); iW1 = (int *)primme_calloc( matrix->n, sizeof(int), "iW1"); iW2 = (int *)primme_calloc( matrix->n, sizeof(int), "iW2"); iW3 = (int *)primme_calloc( matrix->n, sizeof(int), "iW2"); /* Max size of factorization */ lenFactors = 9*matrix->nnz; factors = (CSRMatrix *)primme_calloc(1, sizeof(CSRMatrix), "factors"); factors->AElts = (double *)primme_calloc(lenFactors, sizeof(double), "iluElts"); factors->JA = (int *)primme_calloc(lenFactors, sizeof(int), "Jilu"); factors->IA = (int *)primme_calloc(matrix->n+1, sizeof(int), "Iilu"); factors->n = matrix->n; factors->nnz = lenFactors; FORTRAN_FUNCTION(ilut) ((int*)&matrix->n, (double*)matrix->AElts, (int*)matrix->JA, (int*)matrix->IA, &level, &threshold, factors->AElts, factors->JA, factors->IA, &lenFactors, W1, W2, iW1, iW2, iW3, &ierr); if (ierr != 0) { fprintf(stderr, "ILUT factorization could not be completed\n"); return(-1); } if (shift != 0.0L) { shiftCSRMatrix(shift, (CSRMatrix*)matrix); } /* free workspace */ free(W1); free(W2); free(iW1); free(iW2); free(iW3); *prec = factors; return 0; #endif }
int zprimme(double *evals, Complex_Z *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ------------------ */ /* Set some defaults */ /* ------------------ */ primme_set_defaults(primme); /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ----------------------- */ /* Set default convTetFun */ /* ----------------------- */ if (!primme->convTestFun) { primme->convTestFun = convTestFunAbsolute; } /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_zprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by zprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_zprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_zprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_zprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_vecs_zprimme(&evecs[primme->numOrthoConst], primme->nLocal, primme->initSize, primme->nLocal, perm, (Complex_Z*)primme->realWork, (int*)primme->intWork); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
int dprimme(double *evals, double *evecs, double *resNorms, primme_params *primme) { int ret; int *perm; double machEps; /* ------------------ */ /* zero out the timer */ /* ------------------ */ primme_wTimer(1); /* ---------------------------- */ /* Clear previous error reports */ /* ---------------------------- */ primme_DeleteStackTrace(primme); /* ----------------------- */ /* Find machine precision */ /* ----------------------- */ machEps = Num_dlamch_primme("E"); /* ----------------------------------------- */ /* Set some defaults for sequential programs */ /* ----------------------------------------- */ if (primme->numProcs == 1) { primme->nLocal = primme->n; primme->procID = 0; if (primme->globalSumDouble == NULL) primme->globalSumDouble = primme_seq_globalSumDouble; } /* --------------------------------------------------------------------- */ /* Decide on whether to use locking (hard locking), or not (soft locking)*/ /* --------------------------------------------------------------------- */ if (primme->target != primme_smallest && primme->target != primme_largest ) { /* Locking is necessary as interior Ritz values can cross shifts */ primme->locking = 1; } else { if (primme->locking == 0) { /* use locking when not enough vectors to restart with */ primme->locking = (primme->numEvals > primme->minRestartSize); } } /* -------------------------------------------------------------- */ /* If needed, we are ready to estimate required memory and return */ /* -------------------------------------------------------------- */ if (evals == NULL && evecs == NULL && resNorms == NULL) return allocate_workspace(primme, FALSE); /* ----------------------------------------------------- */ /* Reset random number seed if inappropriate for DLARENV */ /* Yields unique quadruples per proc if procID < 4096^3 */ /* ----------------------------------------------------- */ if (primme->iseed[0]<0 || primme->iseed[0]>4095) primme->iseed[0] = primme->procID % 4096; if (primme->iseed[1]<0 || primme->iseed[1]>4095) primme->iseed[1] = (int)(primme->procID/4096+1) % 4096; if (primme->iseed[2]<0 || primme->iseed[2]>4095) primme->iseed[2] = (int)((primme->procID/4096)/4096+2) % 4096; if (primme->iseed[3]<0 || primme->iseed[3]>4095) primme->iseed[3] = (2*(int)(((primme->procID/4096)/4096)/4096)+1) % 4096; /* ------------------------------------------------------- */ /* Check primme input data for bounds, correct values etc. */ /* ------------------------------------------------------- */ ret = check_input(evals, evecs, resNorms, primme); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_check_input, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ret; } /* ----------------------------------------------------------------------- */ /* Compute AND allocate memory requirements for main_iter and subordinates */ /* ----------------------------------------------------------------------- */ ret = allocate_workspace(primme, TRUE); if (ret != 0) { primme_PushErrorMessage(Primme_dprimme, Primme_allocate_workspace, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return ALLOCATE_WORKSPACE_FAILURE; } /* --------------------------------------------------------- */ /* Allocate workspace that will be needed locally by dprimme */ /* --------------------------------------------------------- */ perm = (int *)primme_calloc((primme->numEvals), sizeof(int), "Perm array"); if (perm == NULL) { primme_PushErrorMessage(Primme_dprimme, Primme_malloc, 0, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MALLOC_FAILURE; } /*----------------------------------------------------------------------*/ /* Call the solver */ /*----------------------------------------------------------------------*/ ret = main_iter_dprimme(evals, perm, evecs, resNorms, machEps, primme->intWork, primme->realWork, primme); if (ret < 0) { primme_PushErrorMessage(Primme_dprimme, Primme_main_iter, ret, __FILE__, __LINE__, primme); primme->stats.elapsedTime = primme_wTimer(0); return MAIN_ITER_FAILURE; } /*----------------------------------------------------------------------*/ /*----------------------------------------------------------------------*/ /* If locking is engaged, the converged Ritz vectors are stored in the */ /* order they converged. They must then be permuted so that they */ /* correspond to the sorted Ritz values in evals. */ /*----------------------------------------------------------------------*/ permute_evecs_dprimme(&evecs[primme->numOrthoConst], perm, (double *) primme->realWork, primme->numEvals, primme->nLocal); free(perm); primme->stats.elapsedTime = primme_wTimer(0); return(0); }
int main (int argc, char *argv[]) { /* Timing vars */ // double ut1,ut2,st1,st2,wt1,wt2; double wt1,wt2; /* Matrix */ int n, nnz; double fnorm; CSRMatrix matrix; /* Preconditioner */ CSRMatrix Factors; /* Files */ char *DriverConfigFileName, *SolverConfigFileName; /* Driver and solver I/O arrays and parameters */ double *evals, *evecs, *rnorms; driver_params driver; primme_params primme; primme_preset_method method; #ifdef Cplusplus /* C++ has a stricter type checking */ void (*precond_function)(void *, void *, int *, primme_params *); #else void *precond_function; #endif /* Other miscellaneous items */ int i; int ret; /* --------------------------------------------------------------------- */ /* Read matrix and driver setup */ /* --------------------------------------------------------------------- */ /* ------------------------------------------------------- */ /* Get from command line the names for the 2 config files */ /* ------------------------------------------------------- */ if (argc == 3) { DriverConfigFileName = argv[1]; SolverConfigFileName = argv[2]; } /* ----------------------------- */ /* Read in the driver parameters */ /* ----------------------------- */ if (read_driver_params(DriverConfigFileName, &driver) < 0) { fprintf(stderr, "Reading driver parameters failed\n"); return(-1); } /* ------------------------------------------ */ /* Read the matrix and store it in CSR format */ /* ------------------------------------------ */ fprintf(stderr," Matrix: %s\n",driver.matrixFileName); if (!strcmp("mtx", &driver.matrixFileName[strlen(driver.matrixFileName)-3])) { // coordinate format storing both lower and upper triangular parts ret = readfullMTX(driver.matrixFileName, &matrix.AElts, &matrix.JA, &matrix.IA, &n, &nnz); if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); return(-1); } } else if (driver.matrixFileName[strlen(driver.matrixFileName)-1] == 'U') { // coordinate format storing only upper triangular part ret = readUpperMTX(driver.matrixFileName, &matrix.AElts, &matrix.JA, &matrix.IA, &n, &nnz); if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); return(-1); } } else { //Harwell Boeing format NOT IMPLEMENTED //ret = readmt() ret = -1; if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); return(-1); } } fnorm = frobeniusNorm(n, matrix.IA, matrix.AElts); /* ------------------------------------------------------------------------- */ /* Set up preconditioner if needed. We provide these sample options */ /* in driver.PrecChoice: (driver.PrecChoice is read in read_driver_params) */ /* choice = 0 no preconditioner */ /* choice = 1 K=Diag(A-shift), shift provided once by user */ /* choice = 2 K=Diag(A-shift_i), shifts provided by primme every step */ /* choice = 3 K=ILUT(A-shift) , shift provided once by user */ /* ------------------------------------------------------------------------- */ ret = create_preconditioner (matrix, &Factors, &precond_function, n, nnz, driver); if (ret < 0) { fprintf(stderr, "ERROR: Could not create requested preconditioner \n"); return(-1); } /* --------------------------------------------------------------------- */ /* Primme solver setup */ /* primme_initialize (not needed if ALL primme struct members set)*/ /* primme_set_method (bypass it to fully customize your solver) */ /* --------------------------------------------------------------------- */ /* ----------------------------- */ /* Initialize defaults in primme */ /* ----------------------------- */ primme_initialize(&primme); /* --------------------------------------- */ /* Read in the primme configuration file */ /* --------------------------------------- */ primme.n = n; primme.aNorm = fnorm; /* ||A||_frobenius. A configFile entry overwrites it */ if (read_solver_params(SolverConfigFileName, driver.outputFileName, &primme, &method) < 0) { fprintf(stderr, "Reading solver parameters failed\n"); return(-1); } /* --------------------------------------- */ /* Pick one of the default methods(if set) */ /* --------------------------------------- */ if (primme_set_method(method, &primme) < 0 ) { fprintf(primme.outputFile, "No preset method. Using custom settings\n"); } /* --------------------------------------- */ /* Optional: report memory requirements */ /* --------------------------------------- */ ret = dprimme(NULL,NULL,NULL,&primme); fprintf(primme.outputFile,"PRIMME will allocate the following memory:\n"); fprintf(primme.outputFile,"real workspace, %ld bytes\n",primme.realWorkSize); fprintf(primme.outputFile,"int workspace, %d bytes\n",primme.intWorkSize); /* --------------------------------------- */ /* Set up matrix vector and preconditioner */ /* --------------------------------------- */ primme.matrixMatvec = MatrixMatvec; primme.applyPreconditioner = precond_function; /* --------------------------------------- */ /* Optional: provide matrix/preconditioner */ /* --------------------------------------- */ primme.matrix = &matrix; primme.preconditioner = &Factors; /* --------------------------------------- */ /* Display given parameter configuration */ /* Place this after the dprimme() to see */ /* any changes dprimme() made to primme */ /* --------------------------------------- */ fprintf(primme.outputFile," Matrix: %s\n",driver.matrixFileName); primme_display_params(primme); /* --------------------------------------------------------------------- */ /* Run the dprimme solver */ /* --------------------------------------------------------------------- */ /* Allocate space for converged Ritz values and residual norms */ evals = (double *)primme_calloc(primme.numEvals, sizeof(double), "evals"); evecs = (double *)primme_calloc(primme.n* (primme.numEvals+primme.maxBlockSize), sizeof(double), "evecs"); rnorms = (double *)primme_calloc(primme.numEvals, sizeof(double), "rnorms"); /* ------------------------ */ /* Initial guess (optional) */ /* ------------------------ */ for (i=0;i<primme.n;i++) evecs[i]=1/sqrt(primme.n); /* ------------- */ /* Call primme */ /* ------------- */ wt1 = primme_wTimer2(1); // primme_get_time(&ut1,&st1); ret = dprimme(evals, evecs, rnorms, &primme); wt2 = primme_wTimer2(0); // primme_get_time(&ut2,&st2); /* --------------------------------------------------------------------- */ /* Reporting */ /* --------------------------------------------------------------------- */ primme_PrintStackTrace(primme); fprintf(primme.outputFile, "Wallclock Runtime : %-f\n", wt2-wt1); // fprintf(primme.outputFile, "User Time : %f seconds\n", ut2-ut1); // fprintf(primme.outputFile, "Syst Time : %f seconds\n", st2-st1); if (primme.procID == 0) { for (i=0; i < primme.numEvals; i++) { fprintf(primme.outputFile, "Eval[%d]: %-22.15E rnorm: %-22.15E\n", i+1, evals[i], rnorms[i]); } fprintf(primme.outputFile, " %d eigenpairs converged\n", primme.initSize); fprintf(primme.outputFile, "Tolerance : %-22.15E\n", primme.aNorm*primme.eps); fprintf(primme.outputFile, "Iterations: %-d\n", primme.stats.numOuterIterations); fprintf(primme.outputFile, "Restarts : %-d\n", primme.stats.numRestarts); fprintf(primme.outputFile, "Matvecs : %-d\n", primme.stats.numMatvecs); fprintf(primme.outputFile, "Preconds : %-d\n", primme.stats.numPreconds); fprintf(primme.outputFile, "\n\n#,%d,%.1f\n\n", primme.stats.numMatvecs, wt2-wt1); switch (primme.dynamicMethodSwitch) { case -1: fprintf(primme.outputFile, "Recommended method for next run: DEFAULT_MIN_MATVECS\n"); break; case -2: fprintf(primme.outputFile, "Recommended method for next run: DEFAULT_MIN_TIME\n"); break; case -3: fprintf(primme.outputFile, "Recommended method for next run: DYNAMIC (close call)\n"); break; } } if (ret != 0) { fprintf(primme.outputFile, "Error: dprimme returned with nonzero exit status\n"); return -1; } fclose(primme.outputFile); primme_Free(&primme); return(0); }
/****************************************************************************** * Creates one of 3 preconditioners depending on driver.PrecChoice: * * Our sample choices are * * choice = 0 no preconditioner * choice = 1 K=Diag(A-shift), shift provided once by user * choice = 2 K=Diag(A-shift_i), shifts provided by primme every step * choice = 3 K=ILUT(A-shift) , shift provided once by user * * on return: * Factors: pointer to the preconditioner CSR structure * precond_function: pointer to the function that applies the preconditioner * * For each choice we have: * * generated by function: Applied by function * ------------------------- ------------------- * choice = 0 - NULL * choice = 1 generate_Inv_Diagonal_Prec Apply_Inv_Diagonal_Prec * choice = 2 generate_Diagonal_Prec Apply_Diagonal_Shifted_Prec * choice = 3 ilut Apply_ILUT_Prec * * The user is cautioned that ILUT is a nonsymmetric preconditioner, * which could potentially prevent the QMR inner solver from converging. * We have not noticed this in our experiments. * ******************************************************************************/ int create_preconditioner(CSRMatrix matrix, CSRMatrix *Factors, #ifdef Cplusplus /* C++ has a stricter type checking */ void (**precond_function)(void *, void *, int *, primme_params *), #else void **precond_function, #endif int n, int nnz, driver_params driver) { int lenFactors; *precond_function = NULL; switch (driver.PrecChoice) { case 0: // No preconditioner created break; case 1: case 2: // Diagonal: K = diag(A-shift I), shift can be primme provided Factors->AElts = (double *)primme_calloc(n, sizeof(double), "Factors.AElts"); Factors->IA = (int *)primme_calloc(n+1, sizeof(int), "Factors.IA"); Factors->JA = (int *)primme_calloc(n, sizeof(int),"Factors.JA"); printf("Generating diagonal preconditioner"); if (driver.PrecChoice == 1) { printf(" with the user provided shift %e\n",driver.shift); generate_Inv_Diagonal_Prec(n, driver.shift, matrix.IA, matrix.JA, matrix.AElts, Factors->IA, Factors->JA, Factors->AElts); *precond_function = Apply_Inv_Diagonal_Prec; break; } else { printf(" that will use solver provided shifts\n"); generate_Diagonal_Prec(n, matrix.IA, matrix.JA, matrix.AElts, Factors->IA, Factors->JA, Factors->AElts); *precond_function = Apply_Diagonal_Shifted_Prec; break; } case 3: { // ILUT(A-shift I) int ierr; int precondlfil = driver.level; double precondTol = driver.threshold; double *W1, *W2; int *iW1, *iW2, *iW3; if (driver.shift != 0.0L) { shiftCSRMatrix(-driver.shift, n, matrix.IA,matrix.JA,matrix.AElts); } // Work arrays W1 = (double *)primme_calloc( n+1, sizeof(double), "W1"); W2 = (double *)primme_calloc( n, sizeof(double), "W2"); iW1 = (int *)primme_calloc( n, sizeof(int), "iW1"); iW2 = (int *)primme_calloc( n, sizeof(int), "iW2"); iW3 = (int *)primme_calloc( n, sizeof(int), "iW2"); //--------------------------------------------------- // Max size of factorization // lenFactors = 9*nnz; // //--------------------------------------------------- Factors->AElts = (double *)primme_calloc(lenFactors, sizeof(double), "iluElts"); Factors->JA = (int *)primme_calloc(lenFactors, sizeof(int), "Jilu"); Factors->IA = (int *)primme_calloc(n+1, sizeof(int), "Iilu"); ilut_(&n,matrix.AElts,matrix.JA,matrix.IA,&precondlfil,&precondTol, Factors->AElts, Factors->JA, Factors->IA, &lenFactors, W1,W2,iW1,iW2,iW3,&ierr); if (ierr != 0) { fprintf(stderr, "ILUT factorization could not be completed\n"); return(-1); } if (driver.shift != 0.0L) { shiftCSRMatrix(driver.shift, n, matrix.IA,matrix.JA,matrix.AElts); } // free workspace free(W1); free(W2); free(iW1); free(iW2); free(iW3); } *precond_function = Apply_ILUT_Prec; break; case 4: // Parasails(A - shift I) // not implemented yet break; } return 0; }
int main (int argc, char *argv[]) { /* Timing vars */ double ut1,ut2,st1,st2,wt1,wt2; /* Matrix */ int n, nLocal, nnz; double fnorm; CSRMatrix matrix; // The matrix in simple SPARSKIT CSR format Matrix *Par_matrix; // Pointer to the matrix in Parasails CSR format /* Permutation/partitioning stuff */ int *mask, *map; int *fg2or, *or2fg; /* Preconditioner */ ParaSails *Par_Factors; // Pointer to the matrix in Parasails CSR format /* Files */ char *DriverConfigFileName, *SolverConfigFileName; char partFileName[512]; FILE *partFile; /* Driver and solver I/O arrays and parameters */ double *evals, *evecs, *rnorms; driver_params driver; primme_params primme; primme_preset_method method; #ifdef Cplusplus /* C++ has a stricter type checking */ void (*precond_function)(void *, void *, int *, primme_params *); #else void *precond_function; #endif /* Other miscellaneous items */ int i,j; int ret, modulo; int rangeStart; int rangeEnd; int numProcs, procID; MPI_Comm comm; /* --------------------------------------------------------------------- */ /* MPI INITIALIZATION */ /* --------------------------------------------------------------------- */ MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &procID); comm = MPI_COMM_WORLD; /* --------------------------------------------------------------------- */ /* Read matrix and driver setup */ /* --------------------------------------------------------------------- */ /* ------------------------------------------------------- */ /* Get from command line the names for the 2 config files */ /* ------------------------------------------------------- */ if (argc == 3) { DriverConfigFileName = argv[1]; SolverConfigFileName = argv[2]; } else { MPI_Finalize(); return(-1); } /* ----------------------------- */ /* Read in the driver parameters */ /* ----------------------------- */ if (read_driver_params(DriverConfigFileName, &driver) < 0) { fprintf(stderr, "Reading driver parameters failed\n"); fflush(stderr); MPI_Finalize(); return(-1); } /* ------------------------------------------ */ /* Read the matrix and store it in CSR format */ /* ------------------------------------------ */ if (procID == 0) { fprintf(stdout," Matrix: %s\n",driver.matrixFileName); fflush(stdout); if (!strcmp("mtx", &driver.matrixFileName[strlen(driver.matrixFileName)-3])) { // coordinate format storing both lower and upper triangular parts ret = readfullMTX(driver.matrixFileName, &matrix.AElts, &matrix.JA, &matrix.IA, &n, &nnz); if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); return(-1); } } else if (driver.matrixFileName[strlen(driver.matrixFileName)-1] == 'U') { // coordinate format storing only upper triangular part ret = readUpperMTX(driver.matrixFileName, &matrix.AElts, &matrix.JA, &matrix.IA, &n, &nnz); if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); return(-1); } } else { //Harwell Boeing format NOT IMPLEMENTED //ret = readmt() ret = -1; if (ret < 0) { fprintf(stderr, "ERROR: Could not read matrix file\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); return(-1); } } } // if procID == 0 /* ----------------------------------------------------------- */ // Allocate space on other processors and broadcast the matrix /* ----------------------------------------------------------- */ MPI_Bcast(&nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); if (procID != 0) { matrix.AElts = (double *)primme_calloc(nnz, sizeof(double), "A"); matrix.JA = (int *)primme_calloc(nnz, sizeof(int), "JA"); matrix.IA = (int *)primme_calloc(n+1, sizeof(int), "IA"); } else { // Proc 0 converts CSR to C indexing for (i=0; i < n+1; i++) { matrix.IA[i]--; } for (i=0; i < nnz; i++) { matrix.JA[i]--; } } MPI_Bcast(matrix.AElts, nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(matrix.IA, n+1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(matrix.JA, nnz, MPI_INT, 0, MPI_COMM_WORLD); fnorm = frobeniusNorm(n, matrix.IA, matrix.AElts); /* ---------------------------------------------------------------------- */ /* Partitioning of the matrix among the processors */ /* ---------------------------------------------------------------------- */ mask = (int *)primme_calloc(n, sizeof(int), "mask"); map = (int *)primme_calloc(numProcs+1, sizeof(int), "map"); fg2or = (int *)primme_calloc(n, sizeof(int), "fg2or"); or2fg = (int *)primme_calloc(n, sizeof(int), "or2fg"); // /* * * * * * * * * * * * * * * * * * // * Read the partition from a file // * * * * * * * * * * * * * * * * * */ // sprintf(partFileName, "%s/%s", driver.partDir, driver.partId); // partFile = fopen(partFileName, "r"); // // if (partFile == 0) { // fprintf(stderr, "ERROR: Could not open '%s'\n", partFileName); // MPI_Finalize(); // return(-1); // } // // for (i = 0; i < n; i++) { // fscanf(partFile, "%d", &mask[i]); // } // // fclose(partFile); /* * * * * * * * * * * * * * * * * * * * * * * */ /* Simplistic assignment of processors to rows */ /* * * * * * * * * * * * * * * * * * * * * * * */ nLocal = n / numProcs; modulo = n % numProcs; rangeStart = 0; for (i=0; i<numProcs; i++) { rangeEnd = rangeStart + nLocal; if (i < modulo) rangeEnd = rangeEnd + 1; for (j = rangeStart; j< rangeEnd; j++) mask[j] = i; rangeStart = rangeEnd; } /* * * * * * * * * * * * * * * * * * * * * * * */ generatePermutations(n, numProcs, mask, or2fg, fg2or, map); rangeStart = map[procID]; rangeEnd = map[procID+1]-1; nLocal = rangeEnd - rangeStart+1; Par_matrix = csrToParaSails(procID, map, fg2or, or2fg, matrix.IA, matrix.JA, matrix.AElts, comm); /* ------------------------------------------------------------------------- */ /* Set up preconditioner if needed. For parallel programs the only choice */ /* in driver.PrecChoice: (driver.PrecChoice is read in read_driver_params) */ /* choice = 4 Parallel ParaSails preconditioners */ /* with parameters (level, threshold, filter, isymm) */ /* as read in read_driver_params(). */ /* ------------------------------------------------------------------------- */ if (driver.PrecChoice == 4) { Par_Factors = generate_precond(&matrix, driver.shift, n, procID, map, fg2or, or2fg, rangeStart, rangeEnd, driver.isymm, driver.level, driver.threshold, driver.filter, comm); precond_function = par_ApplyParasailsPrec; } else { Par_Factors = NULL; precond_function = NULL; // Free A as it is not further needed free(matrix.AElts); free(matrix.IA); free(matrix.JA); } free(mask); free(map); free(fg2or); free(or2fg); /* --------------------------------------------------------------------- */ /* Primme solver setup */ /* primme_initialize (not needed if ALL primme struct members set)*/ /* primme_set_method (bypass it to fully customize your solver) */ /* --------------------------------------------------------------------- */ /* ----------------------------- */ /* Initialize defaults in primme */ /* ----------------------------- */ primme_initialize(&primme); /* --------------------------------------- */ /* Read in the primme configuration file */ /* --------------------------------------- */ primme.n = n; primme.aNorm = fnorm; /* ||A||_frobenius. A configFile entry overwrites it */ if (procID == 0) { if (read_solver_params(SolverConfigFileName, driver.outputFileName, &primme, &method) < 0) { fprintf(stderr, "Reading solver parameters failed\n"); MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); return(-1); } } /* ------------------------------------------------- */ // Send read common primme members to all processors // Setup the primme members local to this processor /* ------------------------------------------------- */ broadCast(&primme, &method, comm); primme.procID = procID; primme.numProcs = numProcs; primme.nLocal = nLocal; primme.commInfo = &comm; primme.globalSumDouble = par_GlobalSumDouble; /* --------------------------------------- */ /* Pick one of the default methods(if set) */ /* --------------------------------------- */ if (primme_set_method(method, &primme) < 0 ) { fprintf(primme.outputFile, "No preset method. Using custom settings\n"); } /* --------------------------------------- */ /* Optional: report memory requirements */ /* --------------------------------------- */ ret = dprimme(NULL,NULL,NULL,&primme); fprintf(primme.outputFile,"PRIMME will allocate the following memory:\n"); fprintf(primme.outputFile," processor %d, real workspace, %ld bytes\n", procID, primme.realWorkSize); fprintf(primme.outputFile," processor %d, int workspace, %d bytes\n", procID, primme.intWorkSize); /* --------------------------------------- */ /* Set up matrix vector and preconditioner */ /* --------------------------------------- */ primme.matrixMatvec = par_MatrixMatvec; primme.applyPreconditioner = precond_function; /* --------------------------------------- */ /* Optional: provide matrix/preconditioner */ /* --------------------------------------- */ primme.matrix = Par_matrix; primme.preconditioner = Par_Factors; /* --------------------------------------- */ /* Display given parameter configuration */ /* Place this after the dprimme() to see */ /* any changes dprimme() made to primme */ /* --------------------------------------- */ if (procID >= 0) { fprintf(primme.outputFile," Matrix: %s\n",driver.matrixFileName); primme_display_params(primme); } /* --------------------------------------------------------------------- */ /* Run the dprimme solver */ /* --------------------------------------------------------------------- */ /* Allocate space for converged Ritz values and residual norms */ evals = (double *)primme_calloc(primme.numEvals, sizeof(double), "evals"); evecs = (double *)primme_calloc(primme.nLocal* (primme.numEvals+primme.maxBlockSize), sizeof(double), "evecs"); rnorms = (double *)primme_calloc(primme.numEvals, sizeof(double), "rnorms"); /* ------------------------ */ /* Initial guess (optional) */ /* ------------------------ */ for (i=0;i<primme.nLocal;i++) evecs[i]=1/sqrt(primme.n); /* ------------- */ /* Call primme */ /* ------------- */ wt1 = primme_get_wtime(); primme_get_time(&ut1,&st1); ret = dprimme(evals, evecs, rnorms, &primme); wt2 = primme_get_wtime(); primme_get_time(&ut2,&st2); /* --------------------------------------------------------------------- */ /* Reporting */ /* --------------------------------------------------------------------- */ if (procID == 0) { primme_PrintStackTrace(primme); } fprintf(primme.outputFile, "Wallclock Runtime : %-f\n", wt2-wt1); fprintf(primme.outputFile, "User Time : %f seconds\n", ut2-ut1); fprintf(primme.outputFile, "Syst Time : %f seconds\n", st2-st1); if (primme.procID == 0) { for (i=0; i < primme.numEvals; i++) { fprintf(primme.outputFile, "Eval[%d]: %-22.15E rnorm: %-22.15E\n", i+1, evals[i], rnorms[i]); } fprintf(primme.outputFile, " %d eigenpairs converged\n", primme.initSize); fprintf(primme.outputFile, "Tolerance : %-22.15E\n", primme.aNorm*primme.eps); fprintf(primme.outputFile, "Iterations: %-d\n", primme.stats.numOuterIterations); fprintf(primme.outputFile, "Restarts : %-d\n", primme.stats.numRestarts); fprintf(primme.outputFile, "Matvecs : %-d\n", primme.stats.numMatvecs); fprintf(primme.outputFile, "Preconds : %-d\n", primme.stats.numPreconds); fprintf(primme.outputFile, "\n\n#,%d,%.1f\n\n", primme.stats.numMatvecs, wt2-wt1); switch (primme.dynamicMethodSwitch) { case -1: fprintf(primme.outputFile, "Recommended method for next run: DEFAULT_MIN_MATVECS\n"); break; case -2: fprintf(primme.outputFile, "Recommended method for next run: DEFAULT_MIN_TIME\n"); break; case -3: fprintf(primme.outputFile, "Recommended method for next run: DYNAMIC (close call)\n"); break; } } if (ret != 0 && procID == 0) { fprintf(primme.outputFile, "Error: dprimme returned with nonzero exit status: %d \n",ret); return -1; } fflush(primme.outputFile); fclose(primme.outputFile); primme_Free(&primme); fflush(stdout); fflush(stderr); MPI_Barrier(comm); MPI_Finalize(); return(0); }
void primme_initialize_f90(primme_params **primme){ *primme = (primme_params *)primme_calloc(1,sizeof(primme_params),"primme"); primme_initialize(*primme); }