// START-C-CLIENT-1 /// \brief main entry-point for the tool /// \param argc the number of command-line arguments /// \param argv the command-line arguments as an array of strings /// \return the exit number int ClientMain ( int argc, MQ_CST argv[] ) { struct MqBufferS * buf; // a object for return values // the commandline-arguments (before and after the first MQ_ALFA) struct MqBufferLS * largv = MqBufferLCreateArgs(argc, argv); struct MqBufferLS * parentArgv = MqBufferLDup(largv); // what should be tested ? MQ_BOL sendB; // test MqSendEND time? MQ_BOL sendAndWait; // test MqSendEND_AND_WAIT round-trip time? MQ_BOL sendAndCall; // test MqSendEND_AND_CALLBACK round-trip time? MQ_BOL sendPersistent; // test MqSendEND_AND_WAIT together with persistent transactions round-trip time? MQ_BOL parent; // test parent-context creation time? MQ_BOL child; // test child-context creation time? MQ_BOL all; // test all MQ_INT num = -1; #define SIZE 1000 MQ_BIN data = MqSysMalloc (MQ_ERROR_PANIC, (SIZE)); // make a backup of largv and lalfa for every test // create the client context and start the server struct MqS * mqctx = MqContextCreate(0, NULL); mqctx->setup.fHelp = ClientHelp; MqErrorCheck (MqLinkCreate (mqctx, &largv)); // read application specific arguments MqBufferLCheckOptionO (mqctx, largv, "--send-perf", &sendB); MqBufferLCheckOptionO (mqctx, largv, "--send-and-wait-perf", &sendAndWait); MqBufferLCheckOptionO (mqctx, largv, "--send-and-callback-perf", &sendAndCall); MqBufferLCheckOptionO (mqctx, largv, "--send-persistent-perf", &sendPersistent); MqBufferLCheckOptionO (mqctx, largv, "--parent-perf", &parent); MqBufferLCheckOptionO (mqctx, largv, "--child-perf", &child); MqBufferLCheckOptionO (mqctx, largv, "--all", &all); if (all) { sendB = sendAndWait = sendAndCall = sendPersistent = parent = child = MQ_YES; } else if (sendB == MQ_NO && sendAndWait == MQ_NO && sendAndCall == MQ_NO && sendPersistent == MQ_NO && parent == MQ_NO && child == MQ_NO ) { sendAndWait = MQ_YES; } // the user can supply --num to change the number of iterations MqBufferLCheckOptionI(mqctx, largv,"--num",&num); // check for wrong arguments MqErrorCheck (MqCheckForLeftOverArguments(mqctx, &largv)); // initialize memory, just run one test-case to initialize dynamic data memset (data, 'A', SIZE); MqSendSTART (mqctx); MqSendB (mqctx, data, SIZE); MqErrorCheck (MqSendEND_AND_WAIT (mqctx, "ECOU", MQ_TIMEOUT10)); MqErrorCheck (MqReadU (mqctx, &buf)); // start the MqSendEND_AND_WAIT transaction-performance test MqDLogC (mqctx, 0, "start: --------------------------------------\n"); if (sendB) { // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_TRANS : num); StatTimerSP itemT = StatCreate (mqctx); { StatCtxSP stat = StatCtxCreate (mqctx, "MqSendEND", lnum); const MQ_BYT stepY = 1; const MQ_SRT stepS = ((SHRT_MAX/lnum)*2); const MQ_INT stepI = ((INT_MAX/lnum)*2); const MQ_WID stepW = ((LLONG_MAX/lnum)*2); MQ_BYT valY = SCHAR_MIN; MQ_SRT valS = SHRT_MIN; MQ_INT valI = INT_MIN; MQ_WID valW = LLONG_MIN; int i; StatInit (itemT); for (i=0; i<lnum; i++) { MqSendSTART (mqctx); MqSendY (mqctx, valY); MqSendS (mqctx, valS); MqSendI (mqctx, valI); MqSendW (mqctx, valW); MqSendB (mqctx, data, ((i%SIZE)+1)); MqErrorCheck (MqSendEND (mqctx, "RDUL")); valY += stepY; valS += stepS; valI += stepI; valW += stepW; }; // just sync with the server MqSendSTART (mqctx); MqSendI(mqctx, valI); MqErrorCheck (MqSendEND_AND_WAIT (mqctx, "ECOI", 10)); MqErrorCheck (MqReadI(mqctx,&valI)); StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); } // cleanup StatDelete (&itemT); } /* finish the MqSendEND test */ if (sendAndCall) { // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_TRANS : num); StatTimerSP itemT = StatCreate (mqctx); { StatCtxSP stat = StatCtxCreate (mqctx, "MqSendEND_AND_CALLBACK", lnum); const MQ_BYT stepY = 1; const MQ_SRT stepS = ((SHRT_MAX/lnum)*2); const MQ_INT stepI = ((INT_MAX/lnum)*2); const MQ_WID stepW = ((LLONG_MAX/lnum)*2); MQ_BYT valY = SCHAR_MIN; MQ_SRT valS = SHRT_MIN; MQ_INT valI = INT_MIN; MQ_WID valW = LLONG_MIN; int i; callnum = 0; StatInit (itemT); for (i=0; i<lnum; i++) { MqSendSTART (mqctx); MqSendY (mqctx, valY); MqSendS (mqctx, valS); MqSendI (mqctx, valI); MqSendW (mqctx, valW); MqSendB (mqctx, data, ((i%SIZE)+1)); MqErrorCheck (MqSendEND_AND_CALLBACK (mqctx, "ECUL", RET_ECUL, NULL, NULL)); valY += stepY; valS += stepS; valI += stepI; valW += stepW; // don't flood the socket buffer with unread messages if ((i % 7) == 0) { while (MqProcessEvent(mqctx, 3, MQ_WAIT_NO) == MQ_OK); } MqErrorCheck(MqErrorGetCode(mqctx)); }; // wait untill all callbacks are processed while (callnum != lnum) MqErrorCheck(MqProcessEvent(mqctx, 3, MQ_WAIT_ONCE)); StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); } // cleanup StatDelete (&itemT); //MqSysSleep (MQ_ERROR_IGNORE, 2); } /* finish the MqSendEND_AND_CALLBACK test */ if (sendAndWait) { // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_TRANS : num); StatTimerSP itemT = StatCreate (mqctx); { StatCtxSP stat = StatCtxCreate (mqctx, "MqSendEND_AND_WAIT", lnum); const MQ_BYT stepY = 1; const MQ_SRT stepS = ((SHRT_MAX/lnum)*2); const MQ_INT stepI = ((INT_MAX/lnum)*2); const MQ_WID stepW = ((LLONG_MAX/lnum)*2); MQ_BYT valY = SCHAR_MIN; MQ_SRT valS = SHRT_MIN; MQ_INT valI = INT_MIN; MQ_WID valW = LLONG_MIN; MQ_BUF buf; int i; StatInit (itemT); for (i=0; i<lnum; i++) { MqSendSTART (mqctx); MqSendY (mqctx, valY); MqSendS (mqctx, valS); MqSendI (mqctx, valI); MqSendW (mqctx, valW); MqSendB (mqctx, data, ((i%SIZE)+1)); MqErrorCheck (MqSendEND_AND_WAIT (mqctx, "ECUL", MQ_TIMEOUT10)); MqErrorCheck (MqReadY (mqctx, &valY)); MqErrorCheck (MqReadS (mqctx, &valS)); MqErrorCheck (MqReadI (mqctx, &valI)); MqErrorCheck (MqReadW (mqctx, &valW)); MqErrorCheck (MqReadU (mqctx, &buf)); valY += stepY; valS += stepS; valI += stepI; valW += stepW; }; StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); } // cleanup StatDelete (&itemT); } /* finish the MqSendEND_AND_WAIT performance test */ if (sendPersistent) { // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_TRANS / 5000 : num); StatTimerSP itemT = StatCreate (mqctx); { StatCtxSP stat = StatCtxCreate (mqctx, "MqSendPERSISTENT", lnum); const MQ_BYT stepY = 1; const MQ_SRT stepS = ((SHRT_MAX/lnum)*2); const MQ_INT stepI = ((INT_MAX/lnum)*2); const MQ_WID stepW = ((LLONG_MAX/lnum)*2); MQ_BYT valY = SCHAR_MIN; MQ_SRT valS = SHRT_MIN; MQ_INT valI = INT_MIN; MQ_WID valW = LLONG_MIN; int i; // setup the callback MqErrorCheck(MqServiceCreate(mqctx, "SDTR", RET_SDTR, NULL ,NULL)); unlink("testDb"); // set transaction-database name MqErrorCheck(MqSendSTART(mqctx)); MqErrorCheck(MqSendC(mqctx,"testDb")); MqErrorCheck(MqSendEND_AND_WAIT(mqctx,"STDB",MQ_TIMEOUT_USER)); // prepare sql queries MqSendSTART (mqctx); MqSendT_START(mqctx); MqSendI (mqctx, 999); MqSendT_END(mqctx, "SDTR"); MqSendY (mqctx, valY); MqSendS (mqctx, valS); MqSendI (mqctx, valI); MqSendW (mqctx, valW); MqSendB (mqctx, data, 10); MqErrorCheck (MqSendEND_AND_WAIT (mqctx, "ECUL", MQ_TIMEOUT10)); MqErrorCheck(MqProcessEvent(mqctx, 3, MQ_WAIT_ONCE)); // start the test callnum = 0; StatInit (itemT); for (i=0; i<lnum; i++) { MqSendSTART (mqctx); MqSendT_START(mqctx); MqSendI (mqctx, 999); MqSendT_END(mqctx, "SDTR"); MqSendY (mqctx, valY); MqSendS (mqctx, valS); MqSendI (mqctx, valI); MqSendW (mqctx, valW); MqSendB (mqctx, data, ((i%SIZE)+1)); MqErrorCheck (MqSendEND_AND_WAIT (mqctx, "ECUL", MQ_TIMEOUT10)); valY += stepY; valS += stepS; valI += stepI; valW += stepW; }; // wait untill all callbacks are processed while (callnum != lnum) { MqErrorCheck(MqProcessEvent(mqctx, 3, MQ_WAIT_ONCE)); } StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); MqErrorCheck(MqServiceDelete(mqctx, "SDTR")); } // cleanup StatDelete (&itemT); unlink("testDb"); } /* finish the MqSendTRANSACTION performance test */ // start the parent-context creation test if (parent) { int n; StatTimerSP itemT; // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_PARENT : num); struct MqS** msgqueA = (struct MqS**) MqSysMalloc(MQ_ERROR_PANIC, lnum * sizeof(struct MqS*)); struct MqBufferLS** largvA = (struct MqBufferLS**) MqSysMalloc(MQ_ERROR_PANIC, lnum * sizeof(struct MqBufferLS*)); // copy the argument vector to create 'num' parent context for (n=0; n<lnum; n++) { largvA[n] = MqBufferLDup(parentArgv); } itemT = StatCreate (mqctx); { // loop to create the parent context StatCtxSP stat = StatCtxCreate (mqctx, "parent create", lnum); StatInit (itemT); for (n=0; n<lnum; n++) { msgqueA[n] = MqContextCreate(0, mqctx); if (MqLinkCreate (msgqueA[n], &largvA[n]) == MQ_ERROR) { MqContextDelete (&mqctx); mqctx = msgqueA[n]; for (n=0; n < n-1; n++) { MqContextDelete (&msgqueA[n]); } goto error; } MqBufferLDelete(&largvA[n]); } StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); // loop to delete the parent context stat = StatCtxCreate (mqctx, "parent delete", lnum); StatInit (itemT); for (n=0; n<lnum; n++) { MqContextDelete (&msgqueA[n]); } StatCtxCalc (stat, itemT); StatCtxPrint (stat); StatCtxDelete (&stat); } // cleanup MqSysFree(largvA); MqSysFree(msgqueA); StatDelete (&itemT); } // start the child-context creation test if (child) { int n; StatTimerSP itemT; // if necessary apply the default number of transactions MQ_INT const lnum = (num == -1 ? NUM_CHILD : num); struct MqS** contextA = (struct MqS**) MqSysMalloc(MQ_ERROR_PANIC, lnum * sizeof(struct MqS*)); // fill template configuration struct MqS *template = MqContextCreate(0, NULL);
main(int argc, char *argv[]) { char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; float *a; int *asub, *xa; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; float *rhsb, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; /* Add more functionalities that the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ options.IterRefine = SINGLE; /* Perform single-precision refinement */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("SLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ sreadhb(&m, &n, &nnz, &a, &asub, &xa); sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); ldx = n; sGenXtrue(n, nrhs, xact, ldx); sFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using dgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("sgssvx(): info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ float *sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); if ( options.IterRefine != NOREFINE ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void psgssvx(int nprocs, superlumt_options_t *superlumt_options, SuperMatrix *A, int *perm_c, int *perm_r, equed_t *equed, float *R, float *C, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, float *rcond, float *ferr, float *berr, superlu_memusage_t *superlu_memusage, int *info) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * psgssvx() solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from sgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS: diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation matrix * that usually preserves sparsity. * For more details of this step, see ssp_colorder.c. * * 1.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*A*Pc = L*U, with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of A is used to * estimate the condition number of the matrix A. If the reciprocal of * the condition number is less than machine precision, * info = A->ncol+1 is returned as a warning, but the routine still * goes on to solve for X and computes error bounds as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS:diag(R)*A'*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see ssp_colorder.c. * * 2.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*transpose(A)*Pc = L*U, with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of transpose(A) * is used to estimate the condition number of the matrix A. * If the reciprocal of the condition number is less than machine * precision, info = A->nrow+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by psgstrf(). There is a single thread of * control to call psgstrf(), and all threads spawned by psgstrf() * are terminated before returning from psgstrf(). * * superlumt_options (input) superlumt_options_t* * The structure defines the input parameters and data structure * to control how the LU factorization will be performed. * The following fields should be defined for this structure: * * o fact (fact_t) * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = FACTORED: On entry, L, U, perm_r and perm_c contain the * factored form of A. If equed is not NOEQUIL, the matrix A has * been equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = DOFACT: The matrix A will be factored, and the factors will be * stored in L and U. * = EQUILIBRATE: The matrix A will be equilibrated if necessary, * then factored into L and U. * * o trans (trans_t) * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Transpose) * * o refact (yes_no_t) * Specifies whether this is first time or subsequent factorization. * = NO: this factorization is treated as the first one; * = YES: it means that a factorization was performed prior to this * one. Therefore, this factorization will re-use some * existing data structures, such as L and U storage, column * elimination tree, and the symbolic information of the * Householder matrix. * * o panel_size (int) * A panel consists of at most panel_size consecutive columns. * * o relax (int) * To control degree of relaxing supernodes. If the number * of nodes (columns) in a subtree of the elimination tree is less * than relax, this subtree is considered as one supernode, * regardless of the row structures of those columns. * * o diag_pivot_thresh (float) * Diagonal pivoting threshold. At step j of the Gaussian * elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * use A_jj as pivot, else use A_ij with maximum magnitude. * 0 <= diag_pivot_thresh <= 1. The default value is 1, * corresponding to partial pivoting. * * o usepr (yes_no_t) * Whether the pivoting will use perm_r specified by the user. * = YES: use perm_r; perm_r is input, unchanged on exit. * = NO: perm_r is determined by partial pivoting, and is output. * * o drop_tol (double) (NOT IMPLEMENTED) * Drop tolerance parameter. At step j of the Gaussian elimination, * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. * 0 <= drop_tol <= 1. The default value of drop_tol is 0, * corresponding to not dropping any entry. * * o work (void*) of size lwork * User-supplied work space and space for the output data structures. * Not referenced if lwork = 0; * * o lwork (int) * Specifies the length of work array. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * superlu_memusage->total_needed; no other side effects. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR, Dtype = _D, Mtype = GE. In the future, * more general A will be handled. * * On entry, If superlumt_options->fact = FACTORED and equed is not * NOEQUIL, then A must have been equilibrated by the scaling factors * in R and/or C. On exit, A is not modified * if superlumt_options->fact = FACTORED or DOFACT, or * if superlumt_options->fact = EQUILIBRATE and equed = NOEQUIL. * * On exit, if superlumt_options->fact = EQUILIBRATE and equed is not * NOEQUIL, A is scaled as follows: * If A->Stype = NC: * equed = ROW: A := diag(R) * A * equed = COL: A := A * diag(C) * equed = BOTH: A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = ROW: transpose(A) := diag(R) * transpose(A) * equed = COL: transpose(A) := transpose(A) * diag(C) * equed = BOTH: transpose(A) := diag(R) * transpose(A) * diag(C). * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If superlumt_options->usepr = NO, perm_r is output argument; * If superlumt_options->usepr = YES, the pivoting routine will try * to use the input perm_r, unless a certain threshold criterion * is violated. In that case, perm_r is overwritten by a new * permutation determined by partial pivoting or diagonal * threshold pivoting. * * equed (input/output) equed_t* * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If superlumt_options->fact = FACTORED, equed is an input argument, * otherwise it is an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = ROW or BOTH, A (if A->Stype = NC) or transpose(A) * (if A->Stype = NR) is multiplied on the left by diag(R). * If equed = NOEQUIL or COL, R is not accessed. * If fact = FACTORED, R is an input argument; otherwise, R is output. * If fact = FACTORED and equed = ROW or BOTH, each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = COL or BOTH, A (if A->Stype = NC) or trnspose(A) * (if A->Stype = NR) is multiplied on the right by diag(C). * If equed = NOEQUIL or ROW, C is not accessed. * If fact = FACTORED, C is an input argument; otherwise, C is output. * If fact = FACTORED and equed = COL or BOTH, each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = NOEQUIL, B is not modified; otherwise * if A->Stype = NC: * if trans = NOTRANS and equed = ROW or BOTH, B is overwritten * by diag(R)*B; * if trans = TRANS or CONJ and equed = COL of BOTH, B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = NOTRANS and equed = COL or BOTH, B is overwritten * by diag(C)*B; * if trans = TRANS or CONJ and equed = ROW of BOTH, B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = _D, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not NOEQUIL, and the solution to the * equilibrated system is inv(diag(C))*X if trans = NOTRANS and * equed = COL or BOTH, or inv(diag(R))*X if trans = TRANS or CONJ * and equed = ROW or BOTH. * * recip_pivot_growth (output) float* * The reciprocal pivot growth factor computed as * max_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) ). * If recip_pivot_growth is much less than 1, the stability of the * LU factorization could be poor. * * rcond (output) float* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * ferr (output) float*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * berr (output) float*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * superlu_memusage (output) superlu_memusage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ NCformat *Astore; DNformat *Bstore, *Xstore; float *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, dofact, notran, rowequ; char norm[1]; trans_t trant; int i, j, info1; float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int n, relax, panel_size; Gstat_t Gstat; double t0; /* temporary time */ double *utime; flops_t *ops, flopcnt; /* External functions */ extern float slangs(char *, SuperMatrix *); extern double slamch_(char *); Astore = A->Store; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; n = A->ncol; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; superlumt_options->perm_c = perm_c; superlumt_options->perm_r = perm_r; *info = 0; dofact = (superlumt_options->fact == DOFACT); equil = (superlumt_options->fact == EQUILIBRATE); notran = (superlumt_options->trans == NOTRANS); if (dofact || equil) { *equed = NOEQUIL; rowequ = FALSE; colequ = FALSE; } else { rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); smlnum = slamch_("Safe minimum"); bignum = 1. / smlnum; } /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ if ( nprocs <= 0 ) *info = -1; else if ( (!dofact && !equil && (superlumt_options->fact != FACTORED)) || (!notran && (superlumt_options->trans != TRANS) && (superlumt_options->trans != CONJ)) || (superlumt_options->refact != YES && superlumt_options->refact != NO) || (superlumt_options->usepr != YES && superlumt_options->usepr != NO) || superlumt_options->lwork < -1 ) *info = -2; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_S || A->Mtype != SLU_GE ) *info = -3; else if ((superlumt_options->fact == FACTORED) && !(rowequ || colequ || (*equed == NOEQUIL))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) *info = -11; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE ) *info = -12; } } if (*info != 0) { i = -(*info); xerbla_("psgssvx", &i); return; } /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ panel_size = superlumt_options->panel_size; relax = superlumt_options->relax; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == NC */ trant = superlumt_options->trans; AA = A; } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ slaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); } utime[EQUIL] = SuperLU_timer_() - t0; } /* ------------------------------------------------------------ Scale the right hand side. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( dofact || equil ) { /* Obtain column etree, the column count (colcnt_h) and supernode partition (part_super_h) for the Householder matrix. */ t0 = SuperLU_timer_(); sp_colorder(AA, perm_c, superlumt_options, &AC); utime[ETREE] = SuperLU_timer_() - t0; #if ( PRNTlevel >= 2 ) printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); #endif /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); psgstrf(superlumt_options, &AC, perm_r, L, U, &Gstat, info); utime[FACT] = SuperLU_timer_() - t0; flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; if ( superlumt_options->lwork == -1 ) { superlu_memusage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U); } } else { /* ------------------------------------------------------------ Compute the reciprocal pivot growth factor *recip_pivot_growth. ------------------------------------------------------------*/ *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U); /* ------------------------------------------------------------ Estimate the reciprocal of the condition number of A. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = slangs(norm, AA); sgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); sgstrs(trant, L, U, perm_r, perm_c, X, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t0; ops[SOLVE] = ops[TRISOLVE]; /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); sgsrfs(trant, AA, L, U, perm_r, perm_c, *equed, R, C, B, X, ferr, berr, &Gstat, info); utime[REFINE] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Transform the solution matrix X to a solution of the original system. ------------------------------------------------------------*/ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision.*/ if ( *rcond < slamch_("E") ) *info = A->ncol + 1; } superlu_sQuerySpace(nprocs, L, U, panel_size, superlu_memusage); /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ if ( superlumt_options->refact == NO ) { SUPERLU_FREE(superlumt_options->etree); SUPERLU_FREE(superlumt_options->colcnt_h); SUPERLU_FREE(superlumt_options->part_super_h); } if ( dofact || equil ) { Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif PrintStat(&Gstat); StatFree(&Gstat); }
int main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program CLINSOLX2. * * This example illustrates how to use CGSSVX to solve systems repeatedly * with the same sparsity pattern of matrix A. * In this case, the column permutation vector perm_c is computed once. * The following data structures will be reused in the subsequent call to * CGSSVX: perm_c, etree * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, A1, L, U; SuperMatrix B, B1, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; complex *a, *a1; int *asub, *xa, *asub1, *xa1; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, j, m, n, nnz; complex *rhsb, *rhsb1, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("DLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ creadhb(&m, &n, &nnz, &a, &asub, &xa); if ( !(a1 = complexMalloc(nnz)) ) ABORT("Malloc fails for a1[]."); if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[]."); if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[]."); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsb1 = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[]."); if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE); cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE); xact = complexMalloc(n * nrhs); ldx = n; cGenXtrue(n, nrhs, xact, ldx); cFillRHS(trans, nrhs, xact, ldx, &A, &B); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m]; if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B ------------------------------------------------------------*/ cgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("First system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); Destroy_CompCol_Matrix(&A); Destroy_Dense_Matrix(&B); if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1 ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A. ------------------------------------------------------------*/ options.Fact = SamePattern; StatInit(&stat); /* Initialize the statistics variables. */ cCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_C, SLU_GE); cCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_C, SLU_GE); cgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("\nSecond system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A1); Destroy_Dense_Matrix(&B1); Destroy_Dense_Matrix(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void c_fortran_zgssv_(int *iopt, int *n, int *nnz, int *nrhs, doublecomplex *values, int *rowind, int *colptr, doublecomplex *b, int *ldb, fptr *f_factors, /* a handle containing the address pointing to the factored matrices */ int *info) { /* * This routine can be called from Fortran. * * iopt (input) int * Specifies the operation: * = 1, performs LU decomposition for the first time * = 2, performs triangular solve * = 3, free all the storage in the end * * f_factors (input/output) fptr* * If iopt == 1, it is an output and contains the pointer pointing to * the structure of the factored matrices. * Otherwise, it it an input. * */ SuperMatrix A, AC, B; SuperMatrix *L, *U; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; /* column elimination tree */ SCformat *Lstore; NCformat *Ustore; int i, panel_size, permc_spec, relax; trans_t trans; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; factors_t *LUfactors; trans = TRANS; if ( *iopt == 1 ) { /* LU decomposition */ /* Set the default input options. */ set_default_options(&options); /* Initialize the statistics variables. */ StatInit(&stat); /* Adjust to 0-based indexing */ for (i = 0; i < *nnz; ++i) --rowind[i]; for (i = 0; i <= *n; ++i) --colptr[i]; zCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = 0: natural ordering * permc_spec = 1: minimum degree on structure of A'*A * permc_spec = 2: minimum degree on structure of A'+A * permc_spec = 3: approximate minimum degree for unsymmetric matrices */ permc_spec = options.ColPerm; get_perm_c(permc_spec, &A, perm_c); sp_preorder(&options, &A, perm_c, etree, &AC); panel_size = sp_ienv(1); relax = sp_ienv(2); zgstrf(&options, &AC, relax, panel_size, etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); if ( *info == 0 ) { Lstore = (SCformat *) L->Store; Ustore = (NCformat *) U->Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); zQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("zgstrf() error returns INFO= %d\n", *info); if ( *info <= *n ) { /* factorization completes */ zQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } /* Restore to 1-based indexing */ for (i = 0; i < *nnz; ++i) ++rowind[i]; for (i = 0; i <= *n; ++i) ++colptr[i]; /* Save the LU factors in the factors handle */ LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); LUfactors->L = L; LUfactors->U = U; LUfactors->perm_c = perm_c; LUfactors->perm_r = perm_r; *f_factors = (fptr) LUfactors; /* Free un-wanted storage */ SUPERLU_FREE(etree); Destroy_SuperMatrix_Store(&A); Destroy_CompCol_Permuted(&AC); StatFree(&stat); } else if ( *iopt == 2 ) { /* Triangular solve */ /* Initialize the statistics variables. */ StatInit(&stat); /* Extract the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; L = LUfactors->L; U = LUfactors->U; perm_c = LUfactors->perm_c; perm_r = LUfactors->perm_r; zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE); /* Solve the system A*X=B, overwriting B with X. */ zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); Destroy_SuperMatrix_Store(&B); StatFree(&stat); } else if ( *iopt == 3 ) { /* Free storage */ /* Free the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; SUPERLU_FREE (LUfactors->perm_r); SUPERLU_FREE (LUfactors->perm_c); Destroy_SuperNode_Matrix(LUfactors->L); Destroy_CompCol_Matrix(LUfactors->U); SUPERLU_FREE (LUfactors->L); SUPERLU_FREE (LUfactors->U); SUPERLU_FREE (LUfactors); } else { fprintf(stderr,"Invalid iopt=%d passed to c_fortran_zgssv()\n",*iopt); exit(-1); } }
main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program ZLINSOLX1. * * This example illustrates how to use ZGSSVX to solve systems with the same * A but different right-hand side. * In this case, we factorize A only once in the first call to DGSSVX, * and reuse the following data structures in the subsequent call to ZGSSVX: * perm_c, perm_r, R, C, L, U. * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; doublecomplex *rhsb, *rhsx, *xact; double *R, *C; double *ferr, *berr; double u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default values for options argument: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("ZLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE); zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("LU factorization: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM USING THE FACTORED FORM OF A. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ B.ncol = nrhs; /* Set the number of right-hand side */ /* Initialize the statistics variables. */ StatInit(&stat); zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("Triangular solve: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
static PyObject * Py_gssv(PyObject *self, PyObject *args, PyObject *kwdict) { PyObject *Py_B=NULL, *Py_X=NULL; PyArrayObject *nzvals=NULL; PyArrayObject *colind=NULL, *rowptr=NULL; int N, nnz; int info; int csc=0; int *perm_r=NULL, *perm_c=NULL; SuperMatrix A, B, L, U; superlu_options_t options; SuperLUStat_t stat; PyObject *option_dict = NULL; int type; int ssv_finished = 0; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "options",NULL}; /* Get input arguments */ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|iO", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &option_dict)) { return NULL; } if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); return NULL; } type = PyArray_TYPE(nzvals); if (!CHECK_SLU_TYPE(type)) { PyErr_SetString(PyExc_TypeError, "nzvals is not of a type supported by SuperLU"); return NULL; } if (!set_superlu_options_from_dict(&options, 0, option_dict, NULL, NULL)) { return NULL; } /* Create Space for output */ Py_X = PyArray_CopyFromObject(Py_B, type, 1, 2); if (Py_X == NULL) return NULL; if (csc) { if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, type)) { Py_DECREF(Py_X); return NULL; } } else { if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, type)) { Py_DECREF(Py_X); return NULL; } } if (DenseSuper_from_Numeric(&B, Py_X)) { Destroy_SuperMatrix_Store(&A); Py_DECREF(Py_X); return NULL; } /* B and Py_X share same data now but Py_X "owns" it */ /* Setup options */ if (setjmp(_superlu_py_jmpbuf)) { goto fail; } else { perm_c = intMalloc(N); perm_r = intMalloc(N); StatInit(&stat); /* Compute direct inverse of sparse Matrix */ gssv(type, &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); } ssv_finished = 1; SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); return Py_BuildValue("Ni", Py_X, info); fail: SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ Destroy_SuperMatrix_Store(&B); if (ssv_finished) { /* Avoid trying to free partially initialized matrices; might leak some memory, but avoids a crash */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } StatFree(&stat); Py_XDECREF(Py_X); return NULL; }
static PyObject *SuperLU_solve(SuperLUObject * self, PyObject * args, PyObject * kwds) { PyArrayObject *b, *x = NULL; SuperMatrix B = { 0 }; #ifndef NPY_PY3K char itrans = 'N'; #else int itrans = 'N'; #endif int info; trans_t trans; SuperLUStat_t stat = { 0 }; static char *kwlist[] = { "rhs", "trans", NULL }; if (!CHECK_SLU_TYPE(self->type)) { PyErr_SetString(PyExc_ValueError, "unsupported data type"); return NULL; } #ifndef NPY_PY3K if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|c", kwlist, &PyArray_Type, &b, &itrans)) #else if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|C", kwlist, &PyArray_Type, &b, &itrans)) #endif return NULL; /* solve transposed system: matrix was passed row-wise instead of * column-wise */ if (itrans == 'n' || itrans == 'N') trans = NOTRANS; else if (itrans == 't' || itrans == 'T') trans = TRANS; else if (itrans == 'h' || itrans == 'H') trans = CONJ; else { PyErr_SetString(PyExc_ValueError, "trans must be N, T, or H"); return NULL; } x = (PyArrayObject*)PyArray_FROMANY( (PyObject*)b, self->type, 1, 2, NPY_F_CONTIGUOUS | NPY_ENSURECOPY); if (x == NULL) { goto fail; } if (x->dimensions[0] != self->n) { PyErr_SetString(PyExc_ValueError, "b is of incompatible size"); goto fail; } if (setjmp(_superlu_py_jmpbuf)) goto fail; if (DenseSuper_from_Numeric(&B, (PyObject *)x)) goto fail; StatInit(&stat); /* Solve the system, overwriting vector x. */ gstrs(self->type, trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); if (info) { PyErr_SetString(PyExc_SystemError, "gstrs was called with invalid arguments"); goto fail; } /* free memory */ Destroy_SuperMatrix_Store(&B); StatFree(&stat); return (PyObject *) x; fail: XDestroy_SuperMatrix_Store(&B); XStatFree(&stat); Py_XDECREF(x); return NULL; }
PyObject *newSuperLUObject(SuperMatrix * A, PyObject * option_dict, int intype, int ilu) { /* A must be in SLU_NC format used by the factorization routine. */ SuperLUObject *self; SuperMatrix AC = { 0 }; /* Matrix postmultiplied by Pc */ int lwork = 0; int *etree = NULL; int info; int n; superlu_options_t options; SuperLUStat_t stat = { 0 }; int panel_size, relax; n = A->ncol; if (!set_superlu_options_from_dict(&options, ilu, option_dict, &panel_size, &relax)) { return NULL; } /* Create SLUObject */ self = PyObject_New(SuperLUObject, &SuperLUType); if (self == NULL) return PyErr_NoMemory(); self->m = A->nrow; self->n = n; self->perm_r = NULL; self->perm_c = NULL; self->L.Store = NULL; self->U.Store = NULL; self->cached_U = NULL; self->cached_L = NULL; self->type = intype; if (setjmp(_superlu_py_jmpbuf)) goto fail; /* Calculate and apply minimum degree ordering */ etree = intMalloc(n); self->perm_r = intMalloc(n); self->perm_c = intMalloc(n); StatInit(&stat); get_perm_c(options.ColPerm, A, self->perm_c); /* calc column permutation */ sp_preorder(&options, A, self->perm_c, etree, &AC); /* apply column * permutation */ /* Perform factorization */ if (!CHECK_SLU_TYPE(SLU_TYPECODE_TO_NPY(A->Dtype))) { PyErr_SetString(PyExc_ValueError, "Invalid type in SuperMatrix."); goto fail; } if (ilu) { gsitrf(SLU_TYPECODE_TO_NPY(A->Dtype), &options, &AC, relax, panel_size, etree, NULL, lwork, self->perm_c, self->perm_r, &self->L, &self->U, &stat, &info); } else { gstrf(SLU_TYPECODE_TO_NPY(A->Dtype), &options, &AC, relax, panel_size, etree, NULL, lwork, self->perm_c, self->perm_r, &self->L, &self->U, &stat, &info); } if (info) { if (info < 0) PyErr_SetString(PyExc_SystemError, "gstrf was called with invalid arguments"); else { if (info <= n) PyErr_SetString(PyExc_RuntimeError, "Factor is exactly singular"); else PyErr_NoMemory(); } goto fail; } /* free memory */ SUPERLU_FREE(etree); Destroy_CompCol_Permuted(&AC); StatFree(&stat); return (PyObject *) self; fail: SUPERLU_FREE(etree); XDestroy_CompCol_Permuted(&AC); XStatFree(&stat); Py_DECREF(self); return NULL; }
main(int argc, char *argv[]) { SuperMatrix A, AC, L, U, B; NCformat *Astore; SCPformat *Lstore; NCPformat *Ustore; superlumt_options_t superlumt_options; pxgstrf_shared_t pxgstrf_shared; pdgstrf_threadarg_t *pdgstrf_threadarg; int nprocs; fact_t fact; trans_t trans; yes_no_t refact, usepr; double u, drop_tol; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ void *work; int info, lwork, nrhs, ldx; int m, n, nnz, permc_spec, panel_size, relax; int i, firstfact; double *rhsb, *xact; Gstat_t Gstat; flops_t flopcnt; void parse_command_line(); /* Default parameters to control factorization. */ nprocs = 1; fact = EQUILIBRATE; trans = NOTRANS; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; usepr = NO; drop_tol = 0.0; work = NULL; lwork = 0; nrhs = 1; /* Get the number of processes from command line. */ parse_command_line(argc, argv, &nprocs); /* Read the input matrix stored in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); /* Set up the sparse matrix data structure for A. */ dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); if (!(rhsb = doubleMalloc(m * nrhs))) SUPERLU_ABORT("Malloc fails for rhsb[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(trans, nrhs, xact, ldx, &A, &B); if (!(perm_r = intMalloc(m))) SUPERLU_ABORT("Malloc fails for perm_r[]."); if (!(perm_c = intMalloc(n))) SUPERLU_ABORT("Malloc fails for perm_c[]."); /******************************** * THE FIRST TIME FACTORIZATION * ********************************/ /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); /* ------------------------------------------------------------ Get column permutation vector perm_c[], according to permc_spec: permc_spec = 0: natural ordering permc_spec = 1: minimum degree ordering on structure of A'*A permc_spec = 2: minimum degree ordering on structure of A'+A permc_spec = 3: approximate minimum degree for unsymmetric matrices ------------------------------------------------------------*/ permc_spec = 1; get_perm_c(permc_spec, &A, perm_c); /* ------------------------------------------------------------ Initialize the option structure superlumt_options using the user-input parameters; Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ refact= NO; pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax, u, usepr, drop_tol, perm_c, perm_r, work, lwork, &A, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pdgstrf(&superlumt_options, &AC, perm_r, &L, &U, &Gstat, &info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; Gstat.ops[FACT] = flopcnt; /* ------------------------------------------------------------ Solve the system A*X=B, overwriting B with X. ------------------------------------------------------------*/ dgstrs(trans, &L, &U, perm_r, perm_c, &B, &Gstat, &info); printf("\n** Result of sparse LU **\n"); dinf_norm_error(nrhs, &B, xact); /* Check inf. norm of the error */ Destroy_CompCol_Permuted(&AC); /* Free extra arrays in AC. */ /********************************* * THE SUBSEQUENT FACTORIZATIONS * *********************************/ /* ------------------------------------------------------------ Re-initialize statistics variables and options used by the factorization routine pdgstrf(). ------------------------------------------------------------*/ StatInit(n, nprocs, &Gstat); refact= YES; pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax, u, usepr, drop_tol, perm_c, perm_r, work, lwork, &A, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pdgstrf(&superlumt_options, &AC, perm_r, &L, &U, &Gstat, &info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; Gstat.ops[FACT] = flopcnt; /* ------------------------------------------------------------ Re-generate right-hand side B, then solve A*X= B. ------------------------------------------------------------*/ dFillRHS(trans, nrhs, xact, ldx, &A, &B); dgstrs(trans, &L, &U, perm_r, perm_c, &B, &Gstat, &info); /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ pxgstrf_finalize(&superlumt_options, &AC); printf("\n** Result of sparse LU **\n"); dinf_norm_error(nrhs, &B, xact); /* Check inf. norm of the error */ Lstore = (SCPformat *) L.Store; Ustore = (NCPformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); fflush(stdout); SUPERLU_FREE (rhsb); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); if ( lwork >= 0 ) { Destroy_SuperNode_SCP(&L); Destroy_CompCol_NCP(&U); } StatFree(&Gstat); }
EXTERN_C_END /*MC MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices via the external package SuperLU. Use ./configure --download-superlu to have PETSc installed with SuperLU Options Database Keys: + -mat_superlu_equil <FALSE> - Equil (None) . -mat_superlu_colperm <COLAMD> - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD . -mat_superlu_iterrefine <NOREFINE> - (choose one of) NOREFINE SINGLE DOUBLE EXTRA . -mat_superlu_symmetricmode: <FALSE> - SymmetricMode (None) . -mat_superlu_diagpivotthresh <1> - DiagPivotThresh (None) . -mat_superlu_pivotgrowth <FALSE> - PivotGrowth (None) . -mat_superlu_conditionnumber <FALSE> - ConditionNumber (None) . -mat_superlu_rowperm <NOROWPERM> - (choose one of) NOROWPERM LargeDiag . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None) . -mat_superlu_printstat <FALSE> - PrintStat (None) . -mat_superlu_lwork <0> - size of work array in bytes used by factorization (None) . -mat_superlu_ilu_droptol <0> - ILU_DropTol (None) . -mat_superlu_ilu_filltol <0> - ILU_FillTol (None) . -mat_superlu_ilu_fillfactor <0> - ILU_FillFactor (None) . -mat_superlu_ilu_droprull <0> - ILU_DropRule (None) . -mat_superlu_ilu_norm <0> - ILU_Norm (None) - -mat_superlu_ilu_milu <0> - ILU_MILU (None) Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves Level: beginner .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, MATSOLVERSPOOLES, PCFactorSetMatSolverPackage(), MatSolverPackage M*/ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatGetFactor_seqaij_superlu" PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F) { Mat B; Mat_SuperLU *lu; PetscErrorCode ierr; PetscInt indx,m=A->rmap->n,n=A->cmap->n; PetscBool flg; const char *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */ const char *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"}; const char *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */ PetscFunctionBegin; ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU){ B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU; } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); B->ops->destroy = MatDestroy_SuperLU; B->ops->view = MatView_SuperLU; B->factortype = ftype; B->assembled = PETSC_TRUE; /* required by -ksp_view */ B->preallocated = PETSC_TRUE; ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr); if (ftype == MAT_FACTOR_LU){ set_default_options(&lu->options); /* Comments from SuperLU_4.0/SRC/dgssvx.c: "Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)." We set 'options.Equil = NO' as default because additional space is needed for it. */ lu->options.Equil = NO; } else if (ftype == MAT_FACTOR_ILU){ /* Set the default input options of ilu: */ ilu_set_default_options(&lu->options); } lu->options.PrintStat = NO; /* Initialize the statistics variables. */ StatInit(&lu->stat); lu->lwork = 0; /* allocate space internally by system malloc */ ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr); ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr); if (flg) {lu->options.ColPerm = (colperm_t)indx;} ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr); if (flg) { lu->options.IterRefine = (IterRefine_t)indx;} ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr); if (flg) lu->options.SymmetricMode = YES; ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&lu->options.DiagPivotThresh,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr); if (flg) lu->options.PivotGrowth = YES; ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr); if (flg) lu->options.ConditionNumber = YES; ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr); if (flg) {lu->options.RowPerm = (rowperm_t)indx;} ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr); if (flg) lu->options.ReplaceTinyPivot = YES; ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr); if (flg) lu->options.PrintStat = YES; ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr); if (lu->lwork > 0 ){ ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr); } else if (lu->lwork != 0 && lu->lwork != -1){ ierr = PetscPrintf(PETSC_COMM_SELF," Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork); lu->lwork = 0; } /* ilu options */ ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&lu->options.ILU_DropTol,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&lu->options.ILU_FillTol,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&lu->options.ILU_FillFactor,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr); if (flg){ lu->options.ILU_Norm = (norm_t)indx; } ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr); if (flg){ lu->options.ILU_MILU = (milu_t)indx; } PetscOptionsEnd(); if (lu->options.Equil == YES) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */ ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr); } /* Allocate spaces (notice sizes are for the transpose) */ ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr); ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr); ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr); /* create rhs and solution x without allocate space for .Store */ #if defined(PETSC_USE_COMPLEX) zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE); zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE); #else dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); #endif #ifdef SUPERLU2 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr); #endif ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr); B->spptr = lu; *F = B; PetscFunctionReturn(0); }
bool SparseMatrix::solveSLU (Vector& B) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); // Invoke the simple driver pdgssv(numThreads, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &ierr); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; Destroy_SuperMatrix_Store(&Bmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); SuperLUStat_t stat; StatInit(&stat); // Invoke the simple driver dgssv(slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &stat, &ierr); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else factored = true; if (printSLUstat) StatPrint(&stat); StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
bool SparseMatrix::solveSLUx (Vector& B, Real* rcond) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(numThreads); slu->equed = NOEQUIL; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; slu->opts->etree = new int[ncol]; slu->opts->colcnt_h = new int[ncol]; slu->opts->part_super_h = new int[ncol]; memset(slu->opts->colcnt_h, 0, ncol*sizeof(int)); memset(slu->opts->part_super_h, 0, ncol*sizeof(int)); memset(slu->opts->etree, 0, ncol*sizeof(int)); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); } else if (factored) slu->opts->fact = FACTORED; // Re-use previous factorization else slu->opts->refact = YES; // Re-use previous ordering // Create right-hand-side and solution vector(s) Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); Real ferr[nrhs], berr[nrhs]; superlu_memusage_t mem_usage; // Invoke the expert driver pdgssvx(numThreads, slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->equed, slu->R, slu->C, &slu->L, &slu->U, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &ierr); B.swap(X); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->etree = new int[ncol]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side vector and solution vector Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); slu->opts->ConditionNumber = printSLUstat || rcond ? YES : NO; slu->opts->PivotGrowth = printSLUstat ? YES : NO; void* work = 0; int lwork = 0; Real ferr[nrhs], berr[nrhs]; mem_usage_t mem_usage; SuperLUStat_t stat; StatInit(&stat); // Invoke the expert driver #if SUPERLU_VERSION == 5 GlobalLU_t Glu; dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &Glu, &mem_usage, &stat, &ierr); #else dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &stat, &ierr); #endif B.swap(X); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } if (printSLUstat) { StatPrint(&stat); IFEM::cout <<"Reciprocal condition number = "<< slu->rcond <<"\nReciprocal pivot growth = "<< slu->rpg << std::endl; } StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
int main ( ) /******************************************************************************/ /* Purpose: D_SAMPLE_ST tests the SUPERLU solver with a 5x5 double precision real matrix. Discussion: The general (GE) representation of the matrix is: [ 19 0 21 21 0 12 21 0 0 0 0 12 16 0 0 0 0 0 5 21 12 12 0 0 18 ] The (0-based) compressed column (CC) representation of this matrix is: I CC A -- -- -- 0 0 19 1 12 4 12 1 3 21 2 12 4 12 0 6 21 2 16 0 8 21 3 5 3 10 21 4 18 * 12 * The right hand side B and solution X are # B X -- -- ---------- 0 1 -0.03125 1 1 0.0654762 2 1 0.0133929 3 1 0.0625 4 1 0.0327381 Licensing: This code is distributed under the GNU LGPL license. Modified: 18 July 2014 Author: John Burkardt Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide. */ { SuperMatrix A; double *acc; double *b; double *b2; SuperMatrix B; int *ccc; int i; int *icc; int info; int j; SuperMatrix L; int m; int n; int nrhs = 1; int ncc; superlu_options_t options; int *perm_c; int permc_spec; int *perm_r; SuperLUStat_t stat; SuperMatrix U; timestamp ( ); printf ( "\n" ); printf ( "D_SAMPLE_ST:\n" ); printf ( " C version\n" ); printf ( " SUPERLU solves a double precision real linear system.\n" ); printf ( " The matrix is read from a Sparse Triplet (ST) file.\n" ); /* Read the matrix from a file associated with standard input, in sparse triplet (ST) format, into compressed column (CC) format. */ dreadtriple ( &m, &n, &ncc, &acc, &icc, &ccc ); /* Print the matrix. */ cc_print ( m, n, ncc, icc, ccc, acc, " CC Matrix:" ); /* Convert the compressed column (CC) matrix into a SuperMatrix A. */ dCreate_CompCol_Matrix ( &A, m, n, ncc, acc, icc, ccc, SLU_NC, SLU_D, SLU_GE ); /* Create the right-hand side matrix. */ b = ( double * ) malloc ( m * sizeof ( double ) ); for ( i = 0; i < m; i++ ) { b[i] = 1.0; } printf ( "\n" ); printf ( " Right hand side:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b[i] ); } /* Create Super Right Hand Side. */ dCreate_Dense_Matrix ( &B, m, nrhs, b, m, SLU_DN, SLU_D, SLU_GE ); /* Set space for the permutations. */ perm_r = ( int * ) malloc ( m * sizeof ( int ) ); perm_c = ( int * ) malloc ( n * sizeof ( int ) ); /* Set the input options. */ set_default_options ( &options ); options.ColPerm = NATURAL; /* Initialize the statistics variables. */ StatInit ( &stat ); /* Solve the linear system. */ dgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); dPrint_CompCol_Matrix ( ( char * ) "A", &A ); dPrint_CompCol_Matrix ( ( char * ) "U", &U ); dPrint_SuperNode_Matrix ( ( char * ) "L", &L ); print_int_vec ( ( char * ) "\nperm_r", m, perm_r ); /* By some miracle involving addresses, the solution has been put into the B vector. */ printf ( "\n" ); printf ( " Computed solution:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b[i] ); } /* Demonstrate that RHS is really the solution now. Multiply it by the matrix. */ b2 = cc_mv ( m, n, ncc, icc, ccc, acc, b ); printf ( "\n" ); printf ( " Product A*X:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b2[i] ); } /* Free memory. */ free ( b ); free ( b2 ); free ( perm_c ); free ( perm_r ); Destroy_SuperMatrix_Store ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); StatFree ( &stat ); /* Terminate. */ printf ( "\n" ); printf ( "D_SAMPLE_ST:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
void dgssvx(char *fact, char *trans, char *refact, SuperMatrix *A, factor_param_t *factor_params, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, mem_usage_t *mem_usage, int *info ) { /* * Purpose * ======= * * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if trans='N') * or diag(C)*B (if trans = 'T' or 'C'). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation * matrix that usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 1.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if fact = 'E') as Pr*A*Pc = L*U, * with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form of * A is used to estimate the condition number of the matrix A. If * the reciprocal of the condition number is less than machine * precision, info = A->ncol+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the transpose of A: * * 2.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A'*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 2.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * transpose(A) (after equilibration if fact = 'E') as * Pr*transpose(A)*Pc = L*U with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form * of transpose(A) is used to estimate the condition number of the * matrix A. If the reciprocal of the condition number * is less than machine precision, info = A->nrow+1 is returned as * a warning, but the routine still goes on to solve for X and * computes error bounds as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * fact (input) char* * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = 'F': On entry, L, U, perm_r and perm_c contain the factored * form of A. If equed is not 'N', the matrix A has been * equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = 'N': The matrix A will be factored, and the factors will be * stored in L and U. * = 'E': The matrix A will be equilibrated if necessary, then * factored into L and U. * * trans (input) char* * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Transpose) * * refact (input) char* * Specifies whether we want to re-factor the matrix. * = 'N': Factor the matrix A. * = 'Y': Matrix A was factored before, now we want to re-factor * matrix A with perm_r and etree as inputs. Use * the same storage for the L\U factors previously allocated, * expand it if necessary. User should insure to use the same * memory model. In this case, perm_r may be modified due to * different pivoting determined by diagonal threshold. * If fact = 'F', then refact is not accessed. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Currently, the type of A can be: * Stype = NC or NR, Dtype = D_, Mtype = GE. In the future, * more general A can be handled. * * On entry, If fact = 'F' and equed is not 'N', then A must have * been equilibrated by the scaling factors in R and/or C. * A is not modified if fact = 'F' or 'N', or if fact = 'E' and * equed = 'N' on exit. * * On exit, if fact = 'E' and equed is not 'N', A is scaled as follows: * If A->Stype = NC: * equed = 'R': A := diag(R) * A * equed = 'C': A := A * diag(C) * equed = 'B': A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = 'R': transpose(A) := diag(R) * transpose(A) * equed = 'C': transpose(A) := transpose(A) * diag(C) * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). * * factor_params (input) factor_param_t* * The structure defines the input scalar parameters, consisting of * the following fields. If factor_params = NULL, the default * values are used for all the fields; otherwise, the values * are given by the user. * - panel_size (int): Panel size. A panel consists of at most * panel_size consecutive columns. If panel_size = -1, use * default value 8. * - relax (int): To control degree of relaxing supernodes. If the * number of nodes (columns) in a subtree of the elimination * tree is less than relax, this subtree is considered as one * supernode, regardless of the row structures of those columns. * If relax = -1, use default value 8. * - diag_pivot_thresh (double): Diagonal pivoting threshold. * At step j of the Gaussian elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * then use A_jj as pivot. 0 <= diag_pivot_thresh <= 1. * If diag_pivot_thresh = -1, use default value 1.0, * which corresponds to standard partial pivoting. * - drop_tol (double): Drop tolerance threshold. (NOT IMPLEMENTED) * At step j of the Gaussian elimination, if * abs(A_ij)/(max_i abs(A_ij)) < drop_tol, * then drop entry A_ij. 0 <= drop_tol <= 1. * If drop_tol = -1, use default value 0.0, which corresponds to * standard Gaussian elimination. * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If refact is not 'Y', perm_r is output argument; * If refact = 'Y', the pivoting routine will try to use the input * perm_r, unless a certain threshold criterion is violated. * In that case, perm_r is overwritten by a new permutation * determined by partial pivoting or diagonal threshold pivoting. * * etree (input/output) int*, dimension (A->ncol) * Elimination tree of Pc'*A'*A*Pc. * If fact is not 'F' and refact = 'Y', etree is an input argument, * otherwise it is an output argument. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * equed (input/output) char* * Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If fact = 'F', equed is an input argument, otherwise it is * an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = 'R' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the left by diag(R). * If equed = 'N' or 'C', R is not accessed. * If fact = 'F', R is an input argument; otherwise, R is output. * If fact = 'F' and equed = 'R' or 'B', each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = 'C' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the right by diag(C). * If equed = 'N' or 'R', C is not accessed. * If fact = 'F', C is an input argument; otherwise, C is output. * If fact = 'F' and equed = 'C' or 'B', each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = D_, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NC, Dtype = D_, Mtype = TRU. * * work (workspace/output) void*, size (lwork) (in bytes) * User supplied workspace, should be large enough * to hold data structures for factors L and U. * On exit, if fact is not 'F', L and U point to this array. * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * mem_usage->total_needed; no other side effects. * * See argument 'mem_usage' for memory usage statistics. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = D_, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = 'N', B is not modified; otherwise * if A->Stype = NC: * if trans = 'N' and equed = 'R' or 'B', B is overwritten by * diag(R)*B; * if trans = 'T' or 'C' and equed = 'C' of 'B', B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = 'N' and equed = 'C' or 'B', B is overwritten by * diag(C)*B; * if trans = 'T' or 'C' and equed = 'R' of 'B', B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = D_, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not 'N', and the solution to the equilibrated * system is inv(diag(C))*X if trans = 'N' and equed = 'C' or 'B', * or inv(diag(R))*X if trans = 'T' or 'C' and equed = 'R' or 'B'. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). * The infinity norm is used. If recip_pivot_growth is much less * than 1, the stability of the LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * mem_usage (output) mem_usage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ; char trant[1], norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double diag_pivot_thresh, drop_tol; double t0; /* temporary time */ double *utime; extern SuperLUStat_t SuperLUStat; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double dlamch_(char *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; #if 0 printf("dgssvx: fact=%c, trans=%c, refact=%c, equed=%c\n", *fact, *trans, *refact, *equed); #endif *info = 0; nofact = lsame_(fact, "N"); equil = lsame_(fact, "E"); notran = lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); smlnum = dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters */ if (!nofact && !equil && !lsame_(fact, "F")) *info = -1; else if (!notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; else if ( !(lsame_(refact,"Y") || lsame_(refact, "N")) ) *info = -3; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != NC && A->Stype != NR) || A->Dtype != D_ || A->Mtype != GE ) *info = -4; else if (lsame_(fact, "F") && !(rowequ || colequ || lsame_(equed, "N"))) *info = -9; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -10; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -11; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -15; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != DN || B->Dtype != D_ || B->Mtype != GE ) *info = -16; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != DN || X->Dtype != D_ || X->Mtype != GE ) *info = -17; } } if (*info != 0) { i = -(*info); xerbla_("dgssvx", &i); return; } /* Default values for factor_params */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; drop_tol = 0.0; if ( factor_params != NULL ) { if ( factor_params->panel_size != -1 ) panel_size = factor_params->panel_size; if ( factor_params->relax != -1 ) relax = factor_params->relax; if ( factor_params->diag_pivot_thresh != -1 ) diag_pivot_thresh = factor_params->diag_pivot_thresh; if ( factor_params->drop_tol != -1 ) drop_tol = factor_params->drop_tol; } StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to NC format when necessary. */ if ( A->Stype == NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ *trant = 'T'; notran = 0; } else { *trant = 'N'; notran = 1; } } else { /* A->Stype == NC */ *trant = *trans; AA = A; } if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } if ( nofact || equil ) { t0 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, work, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_r, perm_c, X, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); dgsrfs(trant, AA, L, U, perm_r, perm_c, equed, R, C, B, X, ferr, berr, info); utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if ( *rcond < dlamch_("E") ) *info = A->ncol + 1; dQuerySpace(L, U, panel_size, mem_usage); if ( nofact || equil ) Destroy_CompCol_Permuted(&AC); if ( A->Stype == NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* PrintStat( &SuperLUStat ); */ StatFree(); }
SolveSuperLU (const MatriceMorse<R> &AA, int strategy, double ttgv, double epsilon, double pivot, double pivot_sym, string ¶m_char, KN<long> pperm_r, KN<long> pperm_c): eps(epsilon), epsr(0), tgv(ttgv), etree(0), string_option(param_char), perm_r(pperm_r), perm_c(pperm_c), RR(0), CC(0), tol_pivot_sym(pivot_sym), tol_pivot(pivot) { SuperMatrix B, X; SuperLUStat_t stat; void *work = 0; int info, lwork = 0/*, nrhs = 1*/; int i; double ferr[1]; double berr[1]; double rpg, rcond; R *bb; R *xx; A.Store = 0; B.Store = 0; X.Store = 0; L.Store = 0; U.Store = 0; int status; n = AA.n; m = AA.m; nnz = AA.nbcoef; arow = AA.a; asubrow = AA.cl; xarow = AA.lg; /* FreeFem++ use Morse Format */ // FFCS - "this->" required by g++ 4.7 this->CompRow_to_CompCol(m, n, nnz, arow, asubrow, xarow, &a, &asub, &xa); /* Defaults */ lwork = 0; // nrhs = 0; /* Set the default values for options argument: * options.Fact = DOFACT; * options.Equil = YES; * options.ColPerm = COLAMD; * options.DiagPivotThresh = 1.0; * options.Trans = NOTRANS; * options.IterRefine = NOREFINE; * options.SymmetricMode = NO; * options.PivotGrowth = NO; * options.ConditionNumber = NO; * options.PrintStat = YES; */ set_default_options(&options); printf(".. default options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); if (!string_option.empty()) {read_options_freefem(string_option, &options);} printf(".. options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); Dtype_t R_SLU = SuperLUDriver<R>::R_SLU_T(); // FFCS - "this->" required by g++ 4.7 this->Create_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, R_SLU, SLU_GE); this->Create_Dense_Matrix(&B, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); this->Create_Dense_Matrix(&X, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); if (etree.size() == 0) {etree.resize(n);} if (perm_r.size() == 0) {perm_r.resize(n);} if (perm_c.size() == 0) {perm_c.resize(n);} if (!(RR = new double[n])) { ABORT("SUPERLU_MALLOC fails for R[]."); } for (int ii = 0; ii < n; ii++) { RR[ii] = 1.; } if (!(CC = new double[m])) { ABORT("SUPERLU_MALLOC fails for C[]."); } for (int ii = 0; ii < n; ii++) { CC[ii] = 1.; } ferr[0] = 0; berr[0] = 0; /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ SuperLUDriver<R>::gssvx(&options, &A, perm_c, perm_r, etree, equed, RR, CC, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if (verbosity > 2) { printf("LU factorization: dgssvx() returns info %d\n", info); } if (verbosity > 3) { if (info == 0 || info == n + 1) { if (options.PivotGrowth) {printf("Recip. pivot growth = %e\n", rpg);} if (options.ConditionNumber) { printf("Recip. condition number = %e\n", rcond); } Lstore = (SCformat *)L.Store; Ustore = (NCformat *)U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu / 1e6, mem_usage.total_needed / 1e6, stat.expansions ); fflush(stdout); } else if (info > 0 && lwork == -1) { printf("** Estimated memory: %d bytes\n", info - n); } } if (verbosity > 5) {StatPrint(&stat);} StatFree(&stat); if (B.Store) {Destroy_SuperMatrix_Store(&B);} if (X.Store) {Destroy_SuperMatrix_Store(&X);} options.Fact = FACTORED;/* Indicate the factored form of A is supplied. */ }
int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int startRow, endRow, nrows, *partition, *AdiagI, *AdiagJ, nnz; int irow, colNum, index, *cscI, *cscJ, jcol, *colLengs; int *etree, permcSpec, lwork, panelSize, relax, info; double *AdiagA, *cscA, diagPivotThresh, dropTol; char refact[1]; hypre_CSRMatrix *Adiag; HYPRE_SuperLU *sluPtr; SuperMatrix sluAmat, auxAmat; superlu_options_t slu_options; SuperLUStat_t slu_stat; /* ---------------------------------------------------------------- */ /* get matrix information */ /* ---------------------------------------------------------------- */ sluPtr = (HYPRE_SuperLU *) solver; assert ( sluPtr != NULL ); HYPRE_ParCSRMatrixGetRowPartitioning( A_csr, &partition ); startRow = partition[0]; endRow = partition[1] - 1; nrows = endRow - startRow + 1; free( partition ); if ( startRow != 0 ) { printf("HYPRE_ParCSR_SuperLUSetup ERROR - start row != 0.\n"); return -1; } /* ---------------------------------------------------------------- */ /* get hypre matrix */ /* ---------------------------------------------------------------- */ Adiag = hypre_ParCSRMatrixDiag((hypre_ParCSRMatrix *) A_csr); AdiagI = hypre_CSRMatrixI(Adiag); AdiagJ = hypre_CSRMatrixJ(Adiag); AdiagA = hypre_CSRMatrixData(Adiag); nnz = AdiagI[nrows]; /* ---------------------------------------------------------------- */ /* convert the csr matrix into csc matrix */ /* ---------------------------------------------------------------- */ colLengs = (int *) malloc(nrows * sizeof(int)); for ( irow = 0; irow < nrows; irow++ ) colLengs[irow] = 0; for ( irow = 0; irow < nrows; irow++ ) for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) colLengs[AdiagJ[jcol]]++; cscJ = (int *) malloc( (nrows+1) * sizeof(int) ); cscI = (int *) malloc( nnz * sizeof(int) ); cscA = (double *) malloc( nnz * sizeof(double) ); cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } for ( irow = 0; irow < nrows; irow++ ) { for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) { colNum = AdiagJ[jcol]; index = cscJ[colNum]++; cscI[index] = irow; cscA[index] = AdiagA[jcol]; } } cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } free(colLengs); /* ---------------------------------------------------------------- */ /* create SuperMatrix */ /* ---------------------------------------------------------------- */ dCreate_CompCol_Matrix(&sluAmat,nrows,nrows,cscJ[nrows],cscA,cscI, cscJ, SLU_NC, SLU_D, SLU_GE); etree = (int *) malloc(nrows * sizeof(int)); sluPtr->permC_ = (int *) malloc(nrows * sizeof(int)); sluPtr->permR_ = (int *) malloc(nrows * sizeof(int)); permcSpec = 0; get_perm_c(permcSpec, &sluAmat, sluPtr->permC_); slu_options.Fact = DOFACT; slu_options.SymmetricMode = NO; sp_preorder(&slu_options, &sluAmat, sluPtr->permC_, etree, &auxAmat); diagPivotThresh = 1.0; dropTol = 0.0; panelSize = sp_ienv(1); relax = sp_ienv(2); StatInit(&slu_stat); lwork = 0; slu_options.ColPerm = MY_PERMC; slu_options.DiagPivotThresh = diagPivotThresh; dgstrf(&slu_options, &auxAmat, dropTol, relax, panelSize, etree, NULL, lwork, sluPtr->permC_, sluPtr->permR_, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), &slu_stat, &info); Destroy_CompCol_Permuted(&auxAmat); Destroy_CompCol_Matrix(&sluAmat); free(etree); sluPtr->factorized_ = 1; StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSetup ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }
void Solver (const MatriceMorse<R> &AA, KN_<R> &x, const KN_<R> &b) const { SuperMatrix B, X; SuperLUStat_t stat; int info = 0, lwork = 0; double ferr[1], berr[1]; double rpg, rcond; double *xx; B.Store = 0; X.Store = 0; ffassert(&x[0] != &b[0]); epsr = (eps < 0) ? (epsr > 0 ? -epsr : -eps) : eps; Dtype_t R_SLU = SuperLUDriver<R>::R_SLU_T(); { void *work = 0; int nrhs = 1; KN_2Ptr<R> xx(x), bb(b); // cout << " xx #### " << xx.c.N() << " "<< xx.ca.N() << " " << xx.ca.step << endl; // cout << " bb #### " << bb.c.N() << " "<< bb.ca.N() << " " << bb.ca.step <<endl; // FFCS - "this->" required by g++ 4.7 this->Create_Dense_Matrix(&B, m, 1, bb, m, SLU_DN, R_SLU, SLU_GE); this->Create_Dense_Matrix(&X, m, 1, xx, m, SLU_DN, R_SLU, SLU_GE); B.ncol = nrhs; /* Set the number of right-hand side */ /* Initialize the statistics variables. */ StatInit(&stat); SuperLUDriver<R>::gssvx(&options, &A, perm_c, perm_r, etree, equed, RR, CC, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if (verbosity > 2) { printf("Triangular solve: dgssvx() returns info %d\n", info); } } if (verbosity > 3) { if (info == 0 || info == n + 1) { /* This is how you could access the solution matrix. */ // R *sol = (R *)((DNformat *)X.Store)->nzval; if (options.IterRefine) { int i = 0; printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); printf("%8d%8d%16e%16e\n", i + 1, stat.RefineSteps, ferr[0], berr[0]); } fflush(stdout); } else if (info > 0 && lwork == -1) { printf("** Estimated memory: %d bytes\n", info - n); } } // cout << " x min max " << x.min() << " " <<x.max() << endl; // cout << "=========================================" << endl; if (B.Store) {Destroy_SuperMatrix_Store(&B);} if (X.Store) {Destroy_SuperMatrix_Store(&X);} }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; double *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; #if 1 /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); dreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); dreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); dreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } #else /* Read the matrix in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); #endif dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); dgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ dinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("dgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
/* Here is a driver inspired by A. Sheffer's "cow flattener". */ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { /* OpenNL Context */ __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; NLuint n = context->n; NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ /* Compressed Row Storage matrix representation */ NLint *xa = __NL_NEW_ARRAY(NLint, n+1); NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n); NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz); NLint *asub = __NL_NEW_ARRAY(NLint, nnz); NLint *etree = __NL_NEW_ARRAY(NLint, n); /* SuperLU variables */ SuperMatrix At, AtP; NLint info, panel_size, relax; superlu_options_t options; /* Temporary variables */ NLuint i, jj, count; __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); /* Convert M to compressed column format */ for(i=0, count=0; i<n; i++) { __NLRowColumn *Ri = M->row + i; xa[i] = count; for(jj=0; jj<Ri->size; jj++, count++) { a[count] = Ri->coeff[jj].value; asub[count] = Ri->coeff[jj].index; } } xa[n] = nnz; /* Free M, don't need it anymore at this point */ __nlSparseMatrixClear(M); /* Create superlu A matrix transposed */ sCreate_CompCol_Matrix( &At, n, n, nnz, a, asub, xa, SLU_NC, /* Colum wise, no supernode */ SLU_S, /* floats */ SLU_GE /* general storage */ ); /* Set superlu options */ set_default_options(&options); options.ColPerm = MY_PERMC; options.Fact = DOFACT; StatInit(&(context->slu.stat)); panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ relax = sp_ienv(2); /* Compute permutation and permuted matrix */ context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); if ((permutation == NULL) || (*permutation == -1)) { get_perm_c(3, &At, context->slu.perm_c); if (permutation) memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); } else memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); /* Decompose into L and U */ sgstrf(&options, &AtP, relax, panel_size, etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); /* Cleanup */ Destroy_SuperMatrix_Store(&At); Destroy_CompCol_Permuted(&AtP); __NL_DELETE_ARRAY(etree); __NL_DELETE_ARRAY(xa); __NL_DELETE_ARRAY(rhs); __NL_DELETE_ARRAY(a); __NL_DELETE_ARRAY(asub); context->slu.alloc_slu = NL_TRUE; return (info == 0); }
void dgssv(SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * Purpose * ======= * * DGSSV solves the system of linear equations A*X=B, using the * LU factorization from DGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc * is a permutation matrix. For more details of this step, * see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the * above algorithm to the transpose of A: * * 2.1. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE. * In the future, more general A may be handled. * * perm_c (input/output) int* * If A->Stype = SLU_NC, column permutation vector of size A->ncol * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = SLU_NR, column permutation vector of size A->nrow * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ double t1; /* Temporary time */ char refact[1], trans[1]; DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ double diag_pivot_thresh = 1.0; double drop_tol = 0; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ double *utime; extern SuperLUStat_t SuperLUStat; /* Test the input parameters ... */ *info = 0; Bstore = B->Store; if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -6; if ( *info != 0 ) { i = -(*info); xerbla_("dgssv", &i); return; } *refact = 'N'; *trans = 'N'; panel_size = sp_ienv(1); relax = sp_ienv(2); StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); *trans = 'T'; } else if ( A->Stype == SLU_NC ) AA = A; etree = intMalloc(A->ncol); t1 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t1; /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t1 = SuperLU_timer_(); /* Compute the LU factorization of A. */ dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, NULL, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t1; t1 = SuperLU_timer_(); if ( *info == 0 ) { /* Solve the system A*X=B, overwriting B with X. */ dgstrs (trans, L, U, perm_r, perm_c, B, info); } utime[SOLVE] = SuperLU_timer_() - t1; SUPERLU_FREE (etree); Destroy_CompCol_Permuted(&AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /*PrintStat( &SuperLUStat );*/ StatFree(); }
static PyObject *Py_sgssv (PyObject *self, PyObject *args, PyObject *kwdict) { PyObject *Py_B=NULL, *Py_X=NULL; PyArrayObject *nzvals=NULL; PyArrayObject *colind=NULL, *rowptr=NULL; int N, nnz; int info; int csc=0, permc_spec=2; int *perm_r=NULL, *perm_c=NULL; SuperMatrix A, B, L, U; superlu_options_t options; SuperLUStat_t stat; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; /* Get input arguments */ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) return NULL; if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); return NULL; } /* Create Space for output */ Py_X = PyArray_CopyFromObject(Py_B,PyArray_FLOAT,1,2); if (Py_X == NULL) return NULL; if (csc) { if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { Py_DECREF(Py_X); return NULL; } } else { if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { Py_DECREF(Py_X); return NULL; } } if (DenseSuper_from_Numeric(&B, Py_X)) { Destroy_SuperMatrix_Store(&A); Py_DECREF(Py_X); return NULL; } /* B and Py_X share same data now but Py_X "owns" it */ /* Setup options */ if (setjmp(_superlu_py_jmpbuf)) goto fail; else { perm_c = intMalloc(N); perm_r = intMalloc(N); set_default_options(&options); options.ColPerm=superlu_module_getpermc(permc_spec); StatInit(&stat); /* Compute direct inverse of sparse Matrix */ sgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); } SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); return Py_BuildValue("Ni", Py_X, info); fail: SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); Py_XDECREF(Py_X); return NULL; }
int main(int argc, char *argv[]) { void zmatvec_mult(doublecomplex alpha, doublecomplex x[], doublecomplex beta, doublecomplex y[]); void zpsolve(int n, doublecomplex x[], doublecomplex y[]); extern int zfgmr( int n, void (*matvec_mult)(doublecomplex, doublecomplex [], doublecomplex, doublecomplex []), void (*psolve)(int n, doublecomplex [], doublecomplex[]), doublecomplex *rhs, doublecomplex *sol, double tol, int restrt, int *itmax, FILE *fits); extern int zfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; doublecomplex *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; doublecomplex *rhsb, *rhsx, *xact; doublecomplex *work = NULL; double *R, *C; double u, rpg, rcond; doublecomplex zero = {0.0, 0.0}; doublecomplex one = {1.0, 0.0}; doublecomplex none = {-1.0, 0.0}; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; int restrt, iter, maxit, i; double resid; doublecomplex *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SILU; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); zreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); zreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); zreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; zfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); /* Generate the right-hand side */ if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE); zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ B.ncol = 0; /* not to perform triangular solution */ zgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info); /* Set RHS for GMRES. */ if (!(b = doublecomplexMalloc(m))) ABORT("Malloc fails for b[]."); if (*equed == 'R' || *equed == 'B') { for (i = 0; i < n; ++i) zd_mult(&b[i], &rhsb[i], R[i]); } else { for (i = 0; i < m; i++) b[i] = rhsb[i]; } printf("zgsisx(): info %d, equed %c\n", info, equed[0]); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; GLOBAL_OPTIONS = &options; GLOBAL_R = R; GLOBAL_C = C; GLOBAL_MEM_USAGE = &mem_usage; /* Set the options to do solve-only. */ options.Fact = FACTORED; options.PivotGrowth = NO; options.ConditionNumber = NO; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(x = doublecomplexMalloc(n))) ABORT("Malloc fails for x[]."); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; doublecomplex temp; extern double dznrm2_(int *, doublecomplex [], int *); extern void zaxpy_(int *, doublecomplex *, doublecomplex [], int *, doublecomplex [], int *); /* Initial guess */ for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); /* Call GMRES */ zfgmr(n, zmatvec_mult, zpsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = dznrm2_(&(Astore->nnz), (doublecomplex *)((DNformat *)A.Store)->nzval, &i_1); nrmB = dznrm2_(&m, b, &i_1); sp_zgemv("N", none, &A, x, 1, one, b, 1); res = dznrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); /* Scale the solution back if equilibration was performed. */ if (*equed == 'C' || *equed == 'B') for (i = 0; i < n; i++) zd_mult(&x[i], &x[i], C[i]); for (i = 0; i < m; i++) { z_sub(&temp, &x[i], &xact[i]); maxferr = SUPERLU_MAX(maxferr, z_abs1(&temp)); } printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_D0 runs a small 5 by 5 example of the use of SUPER_LU. Modified: 23 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. */ { double *a; SuperMatrix A; int *asub; SuperMatrix B; int i; int info; SuperMatrix L; int m; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; int permc_spec; double *rhs; double sol[5]; SuperLUStat_t stat; SuperMatrix U; int *xa; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Simple 5 by 5 example of SUPER_LU solver.\n" ); /* Initialize parameters. */ m = 5; n = 5; nnz = 12; /* Set aside space for the arrays. */ a = doubleMalloc ( nnz ); if ( !a ) { ABORT ( "Malloc fails for a[]." ); } asub = intMalloc ( nnz ); if ( !asub ) { ABORT ( "Malloc fails for asub[]." ); } xa = intMalloc ( n+1 ); if ( !xa ) { ABORT ( "Malloc fails for xa[]." ); } /* Initialize matrix A. */ a[0] = 19.0; a[1] = 12.0; a[2] = 12.0; a[3] = 21.0; a[4] = 12.0; a[5] = 12.0; a[6] = 21.0; a[7] = 16.0; a[8] = 21.0; a[9] = 5.0; a[10]= 21.0; a[11]= 18.0; asub[0] = 0; asub[1] = 1; asub[2] = 4; asub[3] = 1; asub[4] = 2; asub[5] = 4; asub[6] = 0; asub[7] = 2; asub[8] = 0; asub[9] = 3; asub[10]= 3; asub[11]= 4; xa[0] = 0; xa[1] = 3; xa[2] = 6; xa[3] = 8; xa[4] = 10; xa[5] = 12; sol[0] = -0.031250000; sol[1] = 0.065476190; sol[2] = 0.013392857; sol[3] = 0.062500000; sol[4] = 0.032738095; /* Create matrix A in the format expected by SuperLU. */ dCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE ); /* Create the right-hand side matrix B. */ nrhs = 1; rhs = doubleMalloc ( m * nrhs ); if ( !rhs ) { ABORT("Malloc fails for rhs[]."); } for ( i = 0; i < m; i++ ) { rhs[i] = 1.0; } dCreate_Dense_Matrix ( &B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE ); /* Set up the arrays for the permutations. */ perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } /* Set the default input options, and then adjust some of them. */ set_default_options ( &options ); options.ColPerm = NATURAL; /* Initialize the statistics variables. */ StatInit ( &stat ); /* Factor the matrix and solve the linear system. */ dgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); /* Print some of the results. */ dPrint_CompCol_Matrix ( "Matrix A", &A ); dPrint_SuperNode_Matrix ( "Factor L", &L ); dPrint_CompCol_Matrix ( "Factor U", &U ); dPrint_Dense_Matrix ( "Solution X", &B ); printf ( "\n" ); printf ( " The exact solution:\n" ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( "%d %f\n", i, sol[i] ); } printf ( "\n" ); print_int_vec ( "perm_r", m, perm_r ); /* De-allocate storage. */ SUPERLU_FREE ( rhs ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); StatFree ( &stat ); printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Normal end of execution.\n" ); return 0; }
main(int argc, char *argv[]) { /* * Purpose * ======= * * SDRIVE is the main test program for the FLOAT linear * equation driver routines SGSSV and SGSSVX. * * The program is invoked by a shell script file -- stest.csh. * The output from the tests are written into a file -- stest.out. * * ===================================================================== */ float *a, *a_save; int *asub, *asub_save; int *xa, *xa_save; SuperMatrix A, B, X, L, U; SuperMatrix ASAV, AC; GlobalLU_t Glu; /* Not needed on return. */ mem_usage_t mem_usage; int *perm_r; /* row permutation from partial pivoting */ int *perm_c, *pc_save; /* column permutation */ int *etree; float zero = 0.0; float *R, *C; float *ferr, *berr; float *rwork; float *wwork; void *work; int info, lwork, nrhs, panel_size, relax; int m, n, nnz; float *xact; float *rhsb, *solx, *bsav; int ldb, ldx; float rpg, rcond; int i, j, k1; float rowcnd, colcnd, amax; int maxsuper, rowblk, colblk; int prefact, nofact, equil, iequed; int nt, nrun, nfail, nerrs, imat, fimat, nimat; int nfact, ifact, itran; int kl, ku, mode, lda; int zerot, izero, ioff; double u; float anorm, cndnum; float *Afull; float result[NTESTS]; superlu_options_t options; fact_t fact; trans_t trans; SuperLUStat_t stat; static char matrix_type[8]; static char equed[1], path[4], sym[1], dist[1]; FILE *fp; /* Fixed set of parameters */ int iseed[] = {1988, 1989, 1990, 1991}; static char equeds[] = {'N', 'R', 'C', 'B'}; static fact_t facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm}; static trans_t transs[] = {NOTRANS, TRANS, CONJ}; /* Some function prototypes */ extern int sgst01(int, int, SuperMatrix *, SuperMatrix *, SuperMatrix *, int *, int *, float *); extern int sgst02(trans_t, int, int, int, SuperMatrix *, float *, int, float *, int, float *resid); extern int sgst04(int, int, float *, int, float *, int, float rcond, float *resid); extern int sgst07(trans_t, int, int, SuperMatrix *, float *, int, float *, int, float *, int, float *, float *, float *); extern int slatb4_slu(char *, int *, int *, int *, char *, int *, int *, float *, int *, float *, char *); extern int slatms_slu(int *, int *, char *, int *, char *, float *d, int *, float *, float *, int *, int *, char *, float *, int *, float *, int *); extern int sp_sconvert(int, int, float *, int, int, int, float *a, int *, int *, int *); /* Executable statements */ strcpy(path, "SGE"); nrun = 0; nfail = 0; nerrs = 0; /* Defaults */ lwork = 0; n = 1; nrhs = 1; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; strcpy(matrix_type, "LA"); parse_command_line(argc, argv, matrix_type, &n, &panel_size, &relax, &nrhs, &maxsuper, &rowblk, &colblk, &lwork, &u, &fp); if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { fprintf(stderr, "expert: cannot allocate %d bytes\n", lwork); exit (-1); } } /* Set the default input options. */ set_default_options(&options); options.DiagPivotThresh = u; options.PrintStat = NO; options.PivotGrowth = YES; options.ConditionNumber = YES; options.IterRefine = SLU_SINGLE; if ( strcmp(matrix_type, "LA") == 0 ) { /* Test LAPACK matrix suite. */ m = n; lda = SUPERLU_MAX(n, 1); nnz = n * n; /* upper bound */ fimat = 1; nimat = NTYPES; Afull = floatCalloc(lda * n); sallocateA(n, nnz, &a, &asub, &xa); } else { /* Read a sparse matrix */ fimat = nimat = 0; sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); } sallocateA(n, nnz, &a_save, &asub_save, &xa_save); rhsb = floatMalloc(m * nrhs); bsav = floatMalloc(m * nrhs); solx = floatMalloc(n * nrhs); ldb = m; ldx = n; sCreate_Dense_Matrix(&B, m, nrhs, rhsb, ldb, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, n, nrhs, solx, ldx, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); etree = intMalloc(n); perm_r = intMalloc(n); perm_c = intMalloc(n); pc_save = intMalloc(n); R = (float *) SUPERLU_MALLOC(m*sizeof(float)); C = (float *) SUPERLU_MALLOC(n*sizeof(float)); ferr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); berr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); j = SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs); rwork = (float *) SUPERLU_MALLOC(j*sizeof(float)); for (i = 0; i < j; ++i) rwork[i] = 0.; if ( !R ) ABORT("SUPERLU_MALLOC fails for R"); if ( !C ) ABORT("SUPERLU_MALLOC fails for C"); if ( !ferr ) ABORT("SUPERLU_MALLOC fails for ferr"); if ( !berr ) ABORT("SUPERLU_MALLOC fails for berr"); if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork"); wwork = floatCalloc( SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs) ); for (i = 0; i < n; ++i) perm_c[i] = pc_save[i] = i; options.ColPerm = MY_PERMC; for (imat = fimat; imat <= nimat; ++imat) { /* All matrix types */ if ( imat ) { /* Skip types 5, 6, or 7 if the matrix size is too small. */ zerot = (imat >= 5 && imat <= 7); if ( zerot && n < imat-4 ) continue; /* Set up parameters with SLATB4 and generate a test matrix with SLATMS. */ slatb4_slu(path, &imat, &n, &n, sym, &kl, &ku, &anorm, &mode, &cndnum, dist); slatms_slu(&n, &n, dist, iseed, sym, &rwork[0], &mode, &cndnum, &anorm, &kl, &ku, "No packing", Afull, &lda, &wwork[0], &info); if ( info ) { printf(FMT3, "SLATMS", info, izero, n, nrhs, imat, nfail); continue; } /* For types 5-7, zero one or more columns of the matrix to test that INFO is returned correctly. */ if ( zerot ) { if ( imat == 5 ) izero = 1; else if ( imat == 6 ) izero = n; else izero = n / 2 + 1; ioff = (izero - 1) * lda; if ( imat < 7 ) { for (i = 0; i < n; ++i) Afull[ioff + i] = zero; } else { for (j = 0; j < n - izero + 1; ++j) for (i = 0; i < n; ++i) Afull[ioff + i + j*lda] = zero; } } else { izero = 0; } /* Convert to sparse representation. */ sp_sconvert(n, n, Afull, lda, kl, ku, a, asub, xa, &nnz); } else { izero = 0; zerot = 0; } sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); /* Save a copy of matrix A in ASAV */ sCreate_CompCol_Matrix(&ASAV, m, n, nnz, a_save, asub_save, xa_save, SLU_NC, SLU_S, SLU_GE); sCopy_CompCol_Matrix(&A, &ASAV); /* Form exact solution. */ sGenXtrue(n, nrhs, xact, ldx); StatInit(&stat); for (iequed = 0; iequed < 4; ++iequed) { *equed = equeds[iequed]; if (iequed == 0) nfact = 4; else nfact = 1; /* Only test factored, pre-equilibrated matrix */ for (ifact = 0; ifact < nfact; ++ifact) { fact = facts[ifact]; options.Fact = fact; for (equil = 0; equil < 2; ++equil) { options.Equil = equil; prefact = ( options.Fact == FACTORED || options.Fact == SamePattern_SameRowPerm ); /* Need a first factor */ nofact = (options.Fact != FACTORED); /* Not factored */ /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); if ( zerot ) { if ( prefact ) continue; } else if ( options.Fact == FACTORED ) { if ( equil || iequed ) { /* Compute row and column scale factors to equilibrate matrix A. */ sgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &info); /* Force equilibration. */ if ( !info && n > 0 ) { if ( strncmp(equed, "R", 1)==0 ) { rowcnd = 0.; colcnd = 1.; } else if ( strncmp(equed, "C", 1)==0 ) { rowcnd = 1.; colcnd = 0.; } else if ( strncmp(equed, "B", 1)==0 ) { rowcnd = 0.; colcnd = 0.; } } /* Equilibrate the matrix. */ slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } } if ( prefact ) { /* Need a factor for the first time */ /* Save Fact option. */ fact = options.Fact; options.Fact = DOFACT; /* Preorder the matrix, obtain the column etree. */ sp_preorder(&options, &A, perm_c, etree, &AC); /* Factor the matrix AC. */ sgstrf(&options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, &L, &U, &Glu, &stat, &info); if ( info ) { printf("** First factor: info %d, equed %c\n", info, *equed); if ( lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); exit(0); } } Destroy_CompCol_Permuted(&AC); /* Restore Fact option. */ options.Fact = fact; } /* if .. first time factor */ for (itran = 0; itran < NTRAN; ++itran) { trans = transs[itran]; options.Trans = trans; /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); /* Set the right hand side. */ sFillRHS(trans, nrhs, xact, ldx, &A, &B); sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, bsav, ldb); /*---------------- * Test sgssv *----------------*/ if ( options.Fact == DOFACT && itran == 0) { /* Not yet factored, and untransposed */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, solx, ldx); sgssv(&options, &A, perm_c, perm_r, &L, &U, &X, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssv", info, izero, n, nrhs, imat, nfail); } else { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); nt = 1; if ( izero == 0 ) { /* Compute residual of the computed solution. */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &A, solx, ldx, wwork,ldb, &result[1]); nt = 2; } /* Print information about the tests that did not pass the threshold. */ for (i = 0; i < nt; ++i) { if ( result[i] >= THRESH ) { printf(FMT1, "sgssv", n, i, result[i]); ++nfail; } } nrun += nt; } /* else .. info == 0 */ /* Restore perm_c. */ for (i = 0; i < n; ++i) perm_c[i] = pc_save[i]; if (lwork == 0) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* if .. end of testing sgssv */ /*---------------- * Test sgssvx *----------------*/ /* Equilibrate the matrix if fact = FACTORED and equed = 'R', 'C', or 'B'. */ if ( options.Fact == FACTORED && (equil || iequed) && n > 0 ) { slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } /* Solve the system and compute the condition number and error bounds using sgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssvx", info, izero, n, nrhs, imat, nfail); if ( lwork == -1 ) { printf("** Estimated memory: %.0f bytes\n", mem_usage.total_needed); exit(0); } } else { if ( !prefact ) { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); k1 = 0; } else { k1 = 1; } if ( !info ) { /* Compute residual of the computed solution.*/ sCopy_Dense_Matrix(m, nrhs, bsav, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &ASAV, solx, ldx, wwork, ldb, &result[1]); /* Check solution from generated exact solution. */ sgst04(n, nrhs, solx, ldx, xact, ldx, rcond, &result[2]); /* Check the error bounds from iterative refinement. */ sgst07(trans, n, nrhs, &ASAV, bsav, ldb, solx, ldx, xact, ldx, ferr, berr, &result[3]); /* Print information about the tests that did not pass the threshold. */ for (i = k1; i < NTESTS; ++i) { if ( result[i] >= THRESH ) { printf(FMT2, "sgssvx", options.Fact, trans, *equed, n, imat, i, result[i]); ++nfail; } } nrun += NTESTS; } /* if .. info == 0 */ } /* else .. end of testing sgssvx */ } /* for itran ... */ if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* for equil ... */ } /* for ifact ... */ } /* for iequed ... */ #if 0 if ( !info ) { PrintPerf(&L, &U, &mem_usage, rpg, rcond, ferr, berr, equed); } #endif Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&ASAV); StatFree(&stat); } /* for imat ... */ /* Print a summary of the results. */ PrintSumm("SGE", nfail, nrun, nerrs); if ( strcmp(matrix_type, "LA") == 0 ) SUPERLU_FREE (Afull); SUPERLU_FREE (rhsb); SUPERLU_FREE (bsav); SUPERLU_FREE (solx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (pc_save); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); SUPERLU_FREE (rwork); SUPERLU_FREE (wwork); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); #if 0 Destroy_CompCol_Matrix(&A); Destroy_CompCol_Matrix(&ASAV); #else SUPERLU_FREE(a); SUPERLU_FREE(asub); SUPERLU_FREE(xa); SUPERLU_FREE(a_save); SUPERLU_FREE(asub_save); SUPERLU_FREE(xa_save); #endif if ( lwork > 0 ) { SUPERLU_FREE (work); Destroy_SuperMatrix_Store(&L); Destroy_SuperMatrix_Store(&U); } return 0; }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_S3 solves a sparse system read from a file using SGSSVX. Discussion: The sparse matrix is stored in a file using the Harwell-Boeing sparse matrix format. The file should be assigned to the standard input of this program. For instance, if the matrix is stored in the file "g10_rua.txt", the execution command might be: super_lu_s3 < g10_rua.txt Modified: 25 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. Local parameters: SuperMatrix L, the computed L factor. int *perm_c, the column permutation vector. int *perm_r, the row permutations from partial pivoting. SuperMatrix U, the computed U factor. */ { SuperMatrix A; NCformat *Astore; float *a; int *asub; SuperMatrix B; float *berr; float *C; char equed[1]; yes_no_t equil; int *etree; float *ferr; int i; int info; SuperMatrix L; int ldx; SCformat *Lstore; int lwork; int m; mem_usage_t mem_usage; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; float *R; float rcond; float *rhsb; float *rhsx; float rpg; float *sol; SuperLUStat_t stat; trans_t trans; SuperMatrix U; float u; NCformat *Ustore; void *work; SuperMatrix X; int *xa; float *xact; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Read a sparse matrix A from standard input,\n"); printf ( " stored in Harwell-Boeing Sparse Matrix format.\n" ); printf ( "\n" ); printf ( " Solve a linear system A * X = B using SGSSVX.\n" ); /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options ( &options ); /* Can use command line input to modify the defaults. */ parse_command_line ( argc, argv, &lwork, &u, &equil, &trans ); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; printf ( "\n" ); printf ( " Length of work array LWORK = %d\n", lwork ); printf ( " Equilibration option EQUIL = %d\n", equil ); printf ( " Diagonal pivot threshhold value U = %f\n", u ); printf ( " Tranpose option TRANS = %d\n", trans ); /* Add more functionalities that the defaults. Compute reciprocal pivot growth */ options.PivotGrowth = YES; /* Compute reciprocal condition number */ options.ConditionNumber = YES; /* Perform single-precision refinement */ options.IterRefine = SINGLE; if ( 0 < lwork ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT ( "SUPERLU_MALLOC cannot allocate work[]" ); } } /* Read matrix A from a file in Harwell-Boeing format. */ sreadhb ( &m, &n, &nnz, &a, &asub, &xa ); /* Create storage for a compressed column matrix. */ sCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE ); Astore = A.Store; printf ( "\n" ); printf ( " Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz ); rhsb = floatMalloc ( m * nrhs ); if ( !rhsb ) { ABORT ( "Malloc fails for rhsb[]." ); } rhsx = floatMalloc ( m * nrhs ); if ( !rhsx ) { ABORT ( "Malloc fails for rhsx[]." ); } sCreate_Dense_Matrix ( &B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE ); sCreate_Dense_Matrix ( &X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE ); xact = floatMalloc ( n * nrhs ); if ( !xact ) { ABORT ( "SUPERLU_MALLOC cannot allocate xact[]" ); } ldx = n; sGenXtrue ( n, nrhs, xact, ldx ); sFillRHS ( trans, nrhs, xact, ldx, &A, &B ); etree = intMalloc ( n ); if ( !etree ) { ABORT ( "Malloc fails for etree[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } R = (float *) SUPERLU_MALLOC ( A.nrow * sizeof(float) ); if ( !R ) { ABORT ( "SUPERLU_MALLOC fails for R[]." ); } C = (float *) SUPERLU_MALLOC ( A.ncol * sizeof(float) ); if ( !C ) { ABORT ( "SUPERLU_MALLOC fails for C[]." ); } ferr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !ferr ) { ABORT ( "SUPERLU_MALLOC fails for ferr[]." ); } berr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !berr ) { ABORT ( "SUPERLU_MALLOC fails for berr[]." ); } /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using SGSSVX. */ sgssvx ( &options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info ); printf ( "\n" ); printf ( " SGSSVX returns INFO = %d\n", info ); if ( info == 0 || info == n+1 ) { sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) { printf ( "\n" ); printf ( " Reciprocal pivot growth = %e\n", rpg); } if ( options.ConditionNumber == YES ) { printf ( "\n" ); printf ( " Reciprocal condition number = %e\n", rcond); } if ( options.IterRefine != NOREFINE ) { printf ( "\n" ); printf ( " Iterative Refinement:\n"); printf ( "%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for ( i = 0; i < nrhs; i++ ) { printf ( "%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf ( "\n" ); printf ( " Number of nonzeros in factor L = %d\n", Lstore->nnz ); printf ( " Number of nonzeros in factor U = %d\n", Ustore->nnz ); printf ( " Number of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n ); printf ( "\n" ); printf ( " L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions ); fflush ( stdout ); } else if ( info > 0 && lwork == -1 ) { printf ( "\n" ); printf ( " Estimated memory: %d bytes\n", info - n ); } if ( options.PrintStat ) { StatPrint ( &stat ); } StatFree ( &stat ); SUPERLU_FREE ( rhsb ); SUPERLU_FREE ( rhsx ); SUPERLU_FREE ( xact ); SUPERLU_FREE ( etree ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); SUPERLU_FREE ( R ); SUPERLU_FREE ( C ); SUPERLU_FREE ( ferr ); SUPERLU_FREE ( berr ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperMatrix_Store ( &X ); if ( 0 <= lwork ) { Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); } /* Say goodbye. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Normal end of execution.\n"); return 0; }
bool SuperLUSolver::Solve(SparseMatrixType& rA, VectorType& rX, VectorType& rB) { //std::cout << "matrix size in solver: " << rA.size1() << std::endl; //std::cout << "RHS size in solver SLU: " << rB.size() << std::endl; // typedef ublas::compressed_matrix<double, ublas::row_major, 0, // ublas::unbounded_array<int>, ublas::unbounded_array<double> > cm_t; //make a copy of the RHS VectorType rC = rB; superlu_options_t options; SuperLUStat_t stat; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); options.IterRefine = SLU_DOUBLE; // options.ColPerm = MMD_AT_PLUS_A; //Fill the SuperLU matrices SuperMatrix Aslu, B, L, U; //create a copy of the matrix int *index1_vector = new (std::nothrow) int[rA.index1_data().size()]; int *index2_vector = new (std::nothrow) int[rA.index2_data().size()]; // double *values_vector = new (std::nothrow) double[rA.value_data().size()]; for( int unsigned i = 0; i < rA.index1_data().size(); i++ ) index1_vector[i] = (int)rA.index1_data()[i]; for( unsigned int i = 0; i < rA.index2_data().size(); i++ ) index2_vector[i] = (int)rA.index2_data()[i]; /* for( unsigned int i = 0; i < rA.value_data().size(); i++ ) values_vector[i] = (double)rA.value_data()[i];*/ //create a copy of the rhs vector (it will be overwritten with the solution) /* double *b_vector = new (std::nothrow) double[rB.size()]; for( unsigned int i = 0; i < rB.size(); i++ ) b_vector[i] = rB[i];*/ /* dCreate_CompCol_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), values_vector, index2_vector, index1_vector, SLU_NR, SLU_D, SLU_GE );*/ //works also with dCreate_CompCol_Matrix dCreate_CompRow_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), rA.value_data().begin(), index2_vector, //can not avoid a copy as ublas uses unsigned int internally index1_vector, //can not avoid a copy as ublas uses unsigned int internally SLU_NR, SLU_D, SLU_GE ); dCreate_Dense_Matrix (&B, rB.size(), 1,&rB[0],rB.size(),SLU_DN, SLU_D, SLU_GE); //allocate memory for permutation arrays int* perm_c; int* perm_r; if ( !(perm_c = intMalloc(rA.size1())) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(rA.size2())) ) ABORT("Malloc fails for perm_r[]."); //initialize container for statistical data StatInit(&stat); //call solver routine int info; dgssv(&options, &Aslu, perm_c, perm_r, &L, &U, &B, &stat, &info); //print output if (options.PrintStat) { StatPrint(&stat); } //resubstitution of results #pragma omp parallel for for(int i=0; i<static_cast<int>(rB.size()); i++ ) rX[i] = rB[i]; // B(i,0); //recover the RHS rB=rC; //deallocate memory used StatFree(&stat); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_SuperMatrix_Store(&Aslu); //note that by using the "store" function we will take care of deallocation ourselves Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); delete [] index1_vector; delete [] index2_vector; // delete [] b_vector; //CHECK WITH VALGRIND IF THIS IS NEEDED ...or if it is done by the lines above //deallocate tempory storage used for the matrix // if(b_vector!=NULL) delete [] index1_vector; // // if(b_vector!=NULL) delete [] index2_vector; // if(b_vector!=NULL) delete [] values_vector; // if(b_vector!=NULL) delete [] b_vector; return true; }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; doublecomplex *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; /* Read the matrix in Harwell-Boeing format. */ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); zgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ zinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } else { printf("zgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void pzgssv(int nprocs, SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * PZGSSV solves the system of linear equations A*X=B, using the parallel * LU factorization routine PZGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc is a * permutation matrix. * For more details of this step, see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. Permute columns of tranpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of "SuperMatrix" structure. * * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by pzgstrf(). There is a single thread of * control to call pzgstrf(), and all threads spawned by pzgstrf() * are terminated before returning from pzgstrf(). * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR; Dtype = _D; Mtype = GE. In the future, * more general A will be handled. * * perm_c (input/output) int* * If A->Stype=NC, column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype=NR, column permutation vector of size A->nrow * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (output) int*, * If A->Stype=NR, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype=NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Use column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ trans_t trans; NCformat *Astore; DNformat *Bstore; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int i, n, panel_size, relax; fact_t fact; yes_no_t refact, usepr; double diag_pivot_thresh, drop_tol; void *work; int lwork; superlumt_options_t superlumt_options; Gstat_t Gstat; double t; /* Temporary time */ double *utime; flops_t *ops, flopcnt; /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ Astore = A->Store; Bstore = B->Store; *info = 0; if ( nprocs <= 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(1, A->nrow) )*info = -7; if ( *info != 0 ) { i = -(*info); xerbla_("pzgssv", &i); return; } #if 0 /* Use the best sequential code. if this part is commented out, we will use the parallel code run on one processor. */ if ( nprocs == 1 ) { return; } #endif fact = EQUILIBRATE; refact = NO; trans = NOTRANS; panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; usepr = NO; drop_tol = 0.0; work = NULL; lwork = 0; /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ n = A->ncol; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); trans = TRANS; } else if ( A->Stype == SLU_NC ) AA = A; /* ------------------------------------------------------------ Initialize the option structure superlumt_options using the user-input parameters; Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ pzgstrf_init(nprocs, fact, trans, refact, panel_size, relax, diag_pivot_thresh, usepr, drop_tol, perm_c, perm_r, work, lwork, AA, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pzgstrf(&superlumt_options, &AC, perm_r, L, U, &Gstat, info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; #if ( PRNTlevel==1 ) printf("nprocs = %d, flops %e, Mflops %.2f\n", nprocs, flopcnt, flopcnt/utime[FACT]*1e-6); printf("Parameters: w %d, relax %d, maxsuper %d, rowblk %d, colblk %d\n", sp_ienv(1), sp_ienv(2), sp_ienv(3), sp_ienv(4), sp_ienv(5)); fflush(stdout); #endif /* ------------------------------------------------------------ Solve the system A*X=B, overwriting B with X. ------------------------------------------------------------*/ if ( *info == 0 ) { t = SuperLU_timer_(); zgstrs (trans, L, U, perm_r, perm_c, B, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t; ops[SOLVE] = ops[TRISOLVE]; } /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ pxgstrf_finalize(&superlumt_options, &AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif PrintStat(&Gstat); StatFree(&Gstat); }
/* Here is a driver inspired by A. Sheffer's "cow flattener". */ static NLboolean __nlSolve_SUPERLU( NLboolean do_perm) { /* OpenNL Context */ __NLSparseMatrix* M = &(__nlCurrentContext->M); NLfloat* b = __nlCurrentContext->b; NLfloat* x = __nlCurrentContext->x; /* Compressed Row Storage matrix representation */ NLuint n = __nlCurrentContext->n; NLuint nnz = __nlSparseMatrixNNZ(M); /* Number of Non-Zero coeffs */ NLint* xa = __NL_NEW_ARRAY(NLint, n+1); NLfloat* rhs = __NL_NEW_ARRAY(NLfloat, n); NLfloat* a = __NL_NEW_ARRAY(NLfloat, nnz); NLint* asub = __NL_NEW_ARRAY(NLint, nnz); /* Permutation vector */ NLint* perm_r = __NL_NEW_ARRAY(NLint, n); NLint* perm = __NL_NEW_ARRAY(NLint, n); /* SuperLU variables */ SuperMatrix A, B; /* System */ SuperMatrix L, U; /* Inverse of A */ NLint info; /* status code */ DNformat *vals = NULL; /* access to result */ float *rvals = NULL; /* access to result */ /* SuperLU options and stats */ superlu_options_t options; SuperLUStat_t stat; /* Temporary variables */ __NLRowColumn* Ri = NULL; NLuint i,jj,count; __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); /* * Step 1: convert matrix M into SuperLU compressed column * representation. * ------------------------------------------------------- */ count = 0; for(i=0; i<n; i++) { Ri = &(M->row[i]); xa[i] = count; for(jj=0; jj<Ri->size; jj++) { a[count] = Ri->coeff[jj].value; asub[count] = Ri->coeff[jj].index; count++; } } xa[n] = nnz; /* Save memory for SuperLU */ __nlSparseMatrixClear(M); /* * Rem: symmetric storage does not seem to work with * SuperLU ... (->deactivated in main SLS::Solver driver) */ sCreate_CompCol_Matrix( &A, n, n, nnz, a, asub, xa, SLU_NR, /* Row_wise, no supernode */ SLU_S, /* floats */ SLU_GE /* general storage */ ); /* Step 2: create vector */ sCreate_Dense_Matrix( &B, n, 1, b, n, SLU_DN, /* Fortran-type column-wise storage */ SLU_S, /* floats */ SLU_GE /* general */ ); /* Step 3: get permutation matrix * ------------------------------ * com_perm: 0 -> no re-ordering * 1 -> re-ordering for A^t.A * 2 -> re-ordering for A^t+A * 3 -> approximate minimum degree ordering */ get_perm_c(do_perm ? 3 : 0, &A, perm); /* Step 4: call SuperLU main routine * --------------------------------- */ set_default_options(&options); options.ColPerm = MY_PERMC; StatInit(&stat); sgssv(&options, &A, perm, perm_r, &L, &U, &B, &stat, &info); /* Step 5: get the solution * ------------------------ * Fortran-type column-wise storage */ vals = (DNformat*)B.Store; rvals = (float*)(vals->nzval); if(info == 0) { for(i = 0; i < n; i++){ x[i] = rvals[i]; } } /* Step 6: cleanup * --------------- */ /* * For these two ones, only the "store" structure * needs to be deallocated (the arrays have been allocated * by us). */ Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); /* * These ones need to be fully deallocated (they have been * allocated by SuperLU). */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); __NL_DELETE_ARRAY(xa); __NL_DELETE_ARRAY(rhs); __NL_DELETE_ARRAY(a); __NL_DELETE_ARRAY(asub); __NL_DELETE_ARRAY(perm_r); __NL_DELETE_ARRAY(perm); return (info == 0); }