static void __nlSparseMatrixConstruct( __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage ) { NLuint i; M->m = m; M->n = n; M->storage = storage; if(storage & __NL_ROWS) { M->row = __NL_NEW_ARRAY(__NLRowColumn, m); for(i=0; i<m; i++) { __nlRowColumnConstruct(&(M->row[i])); } } else { M->row = NULL; } if(storage & __NL_COLUMNS) { M->column = __NL_NEW_ARRAY(__NLRowColumn, n); for(i=0; i<n; i++) { __nlRowColumnConstruct(&(M->column[i])); } } else { M->column = NULL; } M->diag_size = MIN(m,n); M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size); }
static void __nlBeginMatrix() { NLuint i; NLuint n = 0; NLenum storage = __NL_ROWS; __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX); if (!__nlCurrentContext->solve_again) { for(i=0; i<__nlCurrentContext->nb_variables; i++) { if(!__nlCurrentContext->variable[i].locked) __nlCurrentContext->variable[i].index = n++; else __nlCurrentContext->variable[i].index = ~0; } __nlCurrentContext->n = n; /* a least squares problem results in a symmetric matrix */ if(__nlCurrentContext->least_squares) __nlCurrentContext->symmetric = NL_TRUE; if(__nlCurrentContext->symmetric) storage = (storage | __NL_SYMMETRIC); /* SuperLU storage does not support symmetric storage */ storage = (storage & ~__NL_SYMMETRIC); __nlSparseMatrixConstruct(&__nlCurrentContext->M, n, n, storage); __nlCurrentContext->alloc_M = NL_TRUE; __nlCurrentContext->x = __NL_NEW_ARRAY(NLfloat, n); __nlCurrentContext->alloc_x = NL_TRUE; __nlCurrentContext->b = __NL_NEW_ARRAY(NLfloat, n); __nlCurrentContext->alloc_b = NL_TRUE; } else { /* need to recompute b only, A is not constructed anymore */ __NL_CLEAR_ARRAY(NLfloat, __nlCurrentContext->b, __nlCurrentContext->n); } __nlVariablesToVector(); __nlRowColumnConstruct(&__nlCurrentContext->af); __nlCurrentContext->alloc_af = NL_TRUE; __nlRowColumnConstruct(&__nlCurrentContext->al); __nlCurrentContext->alloc_al = NL_TRUE; __nlCurrentContext->current_row = 0; }
static void __nlRowColumnGrow(__NLRowColumn* c) { if(c->capacity != 0) { c->capacity = 2 * c->capacity; c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity); } else { c->capacity = 4; c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity); } }
static void __nlBeginMatrix() { NLuint i; NLuint m = 0, n = 0; NLenum storage = __NL_ROWS; __NLContext *context = __nlCurrentContext; __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX); if (!context->solve_again) { for(i=0; i<context->nb_variables; i++) { if(context->variable[i].locked) { context->variable[i].index = ~0; context->variable[i].a = __NL_NEW(__NLRowColumn); __nlRowColumnConstruct(context->variable[i].a); } else context->variable[i].index = n++; } m = (context->nb_rows == 0)? n: context->nb_rows; context->m = m; context->n = n; __nlSparseMatrixConstruct(&context->M, m, n, storage); context->alloc_M = NL_TRUE; context->b = __NL_NEW_ARRAY(NLfloat, m*context->nb_rhs); context->alloc_b = NL_TRUE; context->x = __NL_NEW_ARRAY(NLfloat, n*context->nb_rhs); context->alloc_x = NL_TRUE; } else { /* need to recompute b only, A is not constructed anymore */ __NL_CLEAR_ARRAY(NLfloat, context->b, context->m*context->nb_rhs); } __nlVariablesToVector(); }
static void __nlBeginSystem() { __nl_assert(__nlCurrentContext->nb_variables > 0); if (__nlCurrentContext->solve_again) __nlTransition(__NL_STATE_SYSTEM_SOLVED, __NL_STATE_SYSTEM); else { __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM); __nlCurrentContext->variable = __NL_NEW_ARRAY( __NLVariable, __nlCurrentContext->nb_variables); __nlCurrentContext->alloc_variable = NL_TRUE; } }
static void __nlEndMatrix() { __NLContext *context = __nlCurrentContext; NLuint i; __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED); if(context->least_squares) { if(!__nlCurrentContext->solve_again) { __nlSparseMatrix_square(&context->MtM, &context->M); context->alloc_MtM = NL_TRUE; context->Mtb = __NL_NEW_ARRAY(NLfloat, context->n*context->nb_rhs); context->alloc_Mtb = NL_TRUE; } } for(i=0; i<context->nb_rhs; i++) __nlEndMatrixRHS(i); }
/* Here is a driver inspired by A. Sheffer's "cow flattener". */ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { /* OpenNL Context */ __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; NLuint n = context->n; NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ /* Compressed Row Storage matrix representation */ NLint *xa = __NL_NEW_ARRAY(NLint, n+1); NLfloat *rhs = __NL_NEW_ARRAY(NLfloat, n); NLfloat *a = __NL_NEW_ARRAY(NLfloat, nnz); NLint *asub = __NL_NEW_ARRAY(NLint, nnz); NLint *etree = __NL_NEW_ARRAY(NLint, n); /* SuperLU variables */ SuperMatrix At, AtP; NLint info, panel_size, relax; superlu_options_t options; /* Temporary variables */ NLuint i, jj, count; __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); /* Convert M to compressed column format */ for(i=0, count=0; i<n; i++) { __NLRowColumn *Ri = M->row + i; xa[i] = count; for(jj=0; jj<Ri->size; jj++, count++) { a[count] = Ri->coeff[jj].value; asub[count] = Ri->coeff[jj].index; } } xa[n] = nnz; /* Free M, don't need it anymore at this point */ __nlSparseMatrixClear(M); /* Create superlu A matrix transposed */ sCreate_CompCol_Matrix( &At, n, n, nnz, a, asub, xa, SLU_NC, /* Colum wise, no supernode */ SLU_S, /* floats */ SLU_GE /* general storage */ ); /* Set superlu options */ set_default_options(&options); options.ColPerm = MY_PERMC; options.Fact = DOFACT; StatInit(&(context->slu.stat)); panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ relax = sp_ienv(2); /* Compute permutation and permuted matrix */ context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); if ((permutation == NULL) || (*permutation == -1)) { get_perm_c(3, &At, context->slu.perm_c); if (permutation) memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); } else memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); /* Decompose into L and U */ sgstrf(&options, &AtP, relax, panel_size, etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); /* Cleanup */ Destroy_SuperMatrix_Store(&At); Destroy_CompCol_Permuted(&AtP); __NL_DELETE_ARRAY(etree); __NL_DELETE_ARRAY(xa); __NL_DELETE_ARRAY(rhs); __NL_DELETE_ARRAY(a); __NL_DELETE_ARRAY(asub); context->slu.alloc_slu = NL_TRUE; return (info == 0); }
/* Here is a driver inspired by A. Sheffer's "cow flattener". */ static NLboolean __nlSolve_SUPERLU( NLboolean do_perm) { /* OpenNL Context */ __NLSparseMatrix* M = &(__nlCurrentContext->M); NLfloat* b = __nlCurrentContext->b; NLfloat* x = __nlCurrentContext->x; /* Compressed Row Storage matrix representation */ NLuint n = __nlCurrentContext->n; NLuint nnz = __nlSparseMatrixNNZ(M); /* Number of Non-Zero coeffs */ NLint* xa = __NL_NEW_ARRAY(NLint, n+1); NLfloat* rhs = __NL_NEW_ARRAY(NLfloat, n); NLfloat* a = __NL_NEW_ARRAY(NLfloat, nnz); NLint* asub = __NL_NEW_ARRAY(NLint, nnz); /* Permutation vector */ NLint* perm_r = __NL_NEW_ARRAY(NLint, n); NLint* perm = __NL_NEW_ARRAY(NLint, n); /* SuperLU variables */ SuperMatrix A, B; /* System */ SuperMatrix L, U; /* Inverse of A */ NLint info; /* status code */ DNformat *vals = NULL; /* access to result */ float *rvals = NULL; /* access to result */ /* SuperLU options and stats */ superlu_options_t options; SuperLUStat_t stat; /* Temporary variables */ __NLRowColumn* Ri = NULL; NLuint i,jj,count; __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); /* * Step 1: convert matrix M into SuperLU compressed column * representation. * ------------------------------------------------------- */ count = 0; for(i=0; i<n; i++) { Ri = &(M->row[i]); xa[i] = count; for(jj=0; jj<Ri->size; jj++) { a[count] = Ri->coeff[jj].value; asub[count] = Ri->coeff[jj].index; count++; } } xa[n] = nnz; /* Save memory for SuperLU */ __nlSparseMatrixClear(M); /* * Rem: symmetric storage does not seem to work with * SuperLU ... (->deactivated in main SLS::Solver driver) */ sCreate_CompCol_Matrix( &A, n, n, nnz, a, asub, xa, SLU_NR, /* Row_wise, no supernode */ SLU_S, /* floats */ SLU_GE /* general storage */ ); /* Step 2: create vector */ sCreate_Dense_Matrix( &B, n, 1, b, n, SLU_DN, /* Fortran-type column-wise storage */ SLU_S, /* floats */ SLU_GE /* general */ ); /* Step 3: get permutation matrix * ------------------------------ * com_perm: 0 -> no re-ordering * 1 -> re-ordering for A^t.A * 2 -> re-ordering for A^t+A * 3 -> approximate minimum degree ordering */ get_perm_c(do_perm ? 3 : 0, &A, perm); /* Step 4: call SuperLU main routine * --------------------------------- */ set_default_options(&options); options.ColPerm = MY_PERMC; StatInit(&stat); sgssv(&options, &A, perm, perm_r, &L, &U, &B, &stat, &info); /* Step 5: get the solution * ------------------------ * Fortran-type column-wise storage */ vals = (DNformat*)B.Store; rvals = (float*)(vals->nzval); if(info == 0) { for(i = 0; i < n; i++){ x[i] = rvals[i]; } } /* Step 6: cleanup * --------------- */ /* * For these two ones, only the "store" structure * needs to be deallocated (the arrays have been allocated * by us). */ Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); /* * These ones need to be fully deallocated (they have been * allocated by SuperLU). */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); __NL_DELETE_ARRAY(xa); __NL_DELETE_ARRAY(rhs); __NL_DELETE_ARRAY(a); __NL_DELETE_ARRAY(asub); __NL_DELETE_ARRAY(perm_r); __NL_DELETE_ARRAY(perm); return (info == 0); }