/* ----------------------------------------------------------------------------- Evaluate a THUNK_SELECTOR if possible. p points to a THUNK_SELECTOR that we want to evaluate. The result of "evaluating" it will be evacuated and a pointer to the to-space closure will be returned. If the THUNK_SELECTOR could not be evaluated (its selectee is still a THUNK, for example), then the THUNK_SELECTOR itself will be evacuated. -------------------------------------------------------------------------- */ static void unchain_thunk_selectors(StgSelector *p, StgClosure *val) { StgSelector *prev; prev = NULL; while (p) { ASSERT(p->header.info == &stg_WHITEHOLE_info); // val must be in to-space. Not always: when we recursively // invoke eval_thunk_selector(), the recursive calls will not // evacuate the value (because we want to select on the value, // not evacuate it), so in this case val is in from-space. // ASSERT(!HEAP_ALLOCED_GC(val) || Bdescr((P_)val)->gen_no > N || (Bdescr((P_)val)->flags & BF_EVACUATED)); prev = (StgSelector*)((StgClosure *)p)->payload[0]; // Update the THUNK_SELECTOR with an indirection to the // value. The value is still in from-space at this stage. // // (old note: Why not do upd_evacuee(q,p)? Because we have an // invariant that an EVACUATED closure always points to an // object in the same or an older generation (required by // the short-cut test in the EVACUATED case, below). if ((StgClosure *)p == val) { // must be a loop; just leave a BLACKHOLE in place. This // can happen when we have a chain of selectors that // eventually loops back on itself. We can't leave an // indirection pointing to itself, and we want the program // to deadlock if it ever enters this closure, so // BLACKHOLE is correct. // XXX we do not have BLACKHOLEs any more; replace with // a THUNK_SELECTOR again. This will go into a loop if it is // entered, and should result in a NonTermination exception. ((StgThunk *)p)->payload[0] = val; write_barrier(); SET_INFO((StgClosure *)p, &stg_sel_0_upd_info); } else { ((StgInd *)p)->indirectee = val; write_barrier(); SET_INFO((StgClosure *)p, &stg_IND_info); } // For the purposes of LDV profiling, we have created an // indirection. LDV_RECORD_CREATE(p); p = prev; } }
void revertCAFs( void ) { StgIndStatic *c; for (c = revertible_caf_list; c != (StgIndStatic *)END_OF_STATIC_LIST; c = (StgIndStatic *)c->static_link) { SET_INFO((StgClosure *)c, c->saved_info); c->saved_info = NULL; // could, but not necessary: c->static_link = NULL; } revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST; }
STATIC_INLINE StgInd * lockCAF (StgRegTable *reg, StgIndStatic *caf) { const StgInfoTable *orig_info; Capability *cap = regTableToCapability(reg); StgInd *bh; orig_info = caf->header.info; #ifdef THREADED_RTS const StgInfoTable *cur_info; if (orig_info == &stg_IND_STATIC_info || orig_info == &stg_WHITEHOLE_info) { // already claimed by another thread; re-enter the CAF return NULL; } cur_info = (const StgInfoTable *) cas((StgVolatilePtr)&caf->header.info, (StgWord)orig_info, (StgWord)&stg_WHITEHOLE_info); if (cur_info != orig_info) { // already claimed by another thread; re-enter the CAF return NULL; } // successfully claimed by us; overwrite with IND_STATIC #endif // For the benefit of revertCAFs(), save the original info pointer caf->saved_info = orig_info; // Allocate the blackhole indirection closure bh = (StgInd *)allocate(cap, sizeofW(*bh)); SET_HDR(bh, &stg_CAF_BLACKHOLE_info, caf->header.prof.ccs); bh->indirectee = (StgClosure *)cap->r.rCurrentTSO; caf->indirectee = (StgClosure *)bh; write_barrier(); SET_INFO((StgClosure*)caf,&stg_IND_STATIC_info); return bh; }
/** * \brief handles the supported <track> sub-elements */ static bool add_meta( input_item_t *p_input_item, track_elem_t *p_track ) { /* exit if setting is impossible */ if( !p_input_item || !p_track ) return false; #define SET_INFO( type, prop ) \ if( p_track->prop ) {input_item_Set##type( p_input_item, p_track->prop );} SET_INFO( Title, name ) SET_INFO( Artist, artist ) SET_INFO( Album, album ) SET_INFO( Genre, genre ) SET_INFO( TrackNum, trackNum ) SET_INFO( Duration, duration ) #undef SET_INFO return true; }
STATIC_INLINE StgWord lockCAF (StgClosure *caf, StgClosure *bh) { const StgInfoTable *orig_info; orig_info = caf->header.info; #ifdef THREADED_RTS const StgInfoTable *cur_info; if (orig_info == &stg_IND_STATIC_info || orig_info == &stg_WHITEHOLE_info) { // already claimed by another thread; re-enter the CAF return 0; } cur_info = (const StgInfoTable *) cas((StgVolatilePtr)&caf->header.info, (StgWord)orig_info, (StgWord)&stg_WHITEHOLE_info); if (cur_info != orig_info) { // already claimed by another thread; re-enter the CAF return 0; } // successfully claimed by us; overwrite with IND_STATIC #endif // For the benefit of revertCAFs(), save the original info pointer ((StgIndStatic *)caf)->saved_info = orig_info; ((StgIndStatic*)caf)->indirectee = bh; write_barrier(); SET_INFO(caf,&stg_IND_STATIC_info); return 1; }
/* ----------------------------------------------------------------------------- * Pausing a thread * * We have to prepare for GC - this means doing lazy black holing * here. We also take the opportunity to do stack squeezing if it's * turned on. * -------------------------------------------------------------------------- */ void threadPaused(Capability *cap, StgTSO *tso) { StgClosure *frame; const StgRetInfoTable *info; const StgInfoTable *bh_info; const StgInfoTable *cur_bh_info USED_IF_THREADS; StgClosure *bh; StgPtr stack_end; uint32_t words_to_squeeze = 0; uint32_t weight = 0; uint32_t weight_pending = 0; bool prev_was_update_frame = false; StgWord heuristic_says_squeeze; // Check to see whether we have threads waiting to raise // exceptions, and we're not blocking exceptions, or are blocked // interruptibly. This is important; if a thread is running with // TSO_BLOCKEX and becomes blocked interruptibly, this is the only // place we ensure that the blocked_exceptions get a chance. maybePerformBlockedException (cap, tso); if (tso->what_next == ThreadKilled) { return; } // NB. Updatable thunks *must* be blackholed, either by eager blackholing or // lazy blackholing. See Note [upd-black-hole] in sm/Scav.c. stack_end = tso->stackobj->stack + tso->stackobj->stack_size; frame = (StgClosure *)tso->stackobj->sp; while ((P_)frame < stack_end) { info = get_ret_itbl(frame); switch (info->i.type) { case UPDATE_FRAME: // If we've already marked this frame, then stop here. if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; weight_pending = 0; } goto end; } SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); bh = ((StgUpdateFrame *)frame)->updatee; bh_info = bh->header.info; #if defined(THREADED_RTS) retry: #endif // Note [suspend duplicate work] // // If the info table is a WHITEHOLE or a BLACKHOLE, then // another thread has claimed it (via the SET_INFO() // below), or is in the process of doing so. In that case // we want to suspend the work that the current thread has // done on this thunk and wait until the other thread has // finished. // // If eager blackholing is taking place, it could be the // case that the blackhole points to the current // TSO. e.g.: // // this thread other thread // -------------------------------------------------------- // c->indirectee = other_tso; // c->header.info = EAGER_BH // threadPaused(): // c->header.info = WHITEHOLE // c->indirectee = other_tso // c->indirectee = this_tso; // c->header.info = EAGER_BH // c->header.info = BLACKHOLE // threadPaused() // *** c->header.info is now BLACKHOLE, // c->indirectee points to this_tso // // So in this case do *not* suspend the work of the // current thread, because the current thread will become // deadlocked on itself. See #5226 for an instance of // this bug. // // Note that great care is required when entering computations // suspended by this mechanism. See Note [AP_STACKs must be eagerly // blackholed] for details. if (((bh_info == &stg_BLACKHOLE_info) && ((StgInd*)bh)->indirectee != (StgClosure*)tso) || (bh_info == &stg_WHITEHOLE_info)) { debugTrace(DEBUG_squeeze, "suspending duplicate work: %ld words of stack", (long)((StgPtr)frame - tso->stackobj->sp)); // If this closure is already an indirection, then // suspend the computation up to this point. // NB. check raiseAsync() to see what happens when // we're in a loop (#2783). suspendComputation(cap,tso,(StgUpdateFrame*)frame); // Now drop the update frame, and arrange to return // the value to the frame underneath: tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; tso->stackobj->sp[1] = (StgWord)bh; ASSERT(bh->header.info != &stg_TSO_info); tso->stackobj->sp[0] = (W_)&stg_enter_info; // And continue with threadPaused; there might be // yet more computation to suspend. frame = (StgClosure *)(tso->stackobj->sp + 2); prev_was_update_frame = false; continue; } // We should never have made it here in the event of blackholes that // we already own; they should have been marked when we blackholed // them and consequently we should have stopped our stack walk // above. ASSERT(!((bh_info == &stg_BLACKHOLE_info) && (((StgInd*)bh)->indirectee == (StgClosure*)tso))); // zero out the slop so that the sanity checker can tell // where the next closure is. OVERWRITING_CLOSURE(bh); // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a // BLACKHOLE here. #if defined(THREADED_RTS) // first we turn it into a WHITEHOLE to claim it, and if // successful we write our TSO and then the BLACKHOLE info pointer. cur_bh_info = (const StgInfoTable *) cas((StgVolatilePtr)&bh->header.info, (StgWord)bh_info, (StgWord)&stg_WHITEHOLE_info); if (cur_bh_info != bh_info) { bh_info = cur_bh_info; goto retry; } #endif // The payload of the BLACKHOLE points to the TSO ((StgInd *)bh)->indirectee = (StgClosure *)tso; write_barrier(); SET_INFO(bh,&stg_BLACKHOLE_info); // .. and we need a write barrier, since we just mutated the closure: recordClosureMutated(cap,bh); // We pretend that bh has just been created. LDV_RECORD_CREATE(bh); frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); if (prev_was_update_frame) { words_to_squeeze += sizeofW(StgUpdateFrame); weight += weight_pending; weight_pending = 0; } prev_was_update_frame = true; break; case UNDERFLOW_FRAME: case STOP_FRAME: goto end; // normal stack frames; do nothing except advance the pointer default: { uint32_t frame_size = stack_frame_sizeW(frame); weight_pending += frame_size; frame = (StgClosure *)((StgPtr)frame + frame_size); prev_was_update_frame = false; } } } end: // Should we squeeze or not? Arbitrary heuristic: we squeeze if // the number of words we have to shift down is less than the // number of stack words we squeeze away by doing so. // The threshold was bumped from 5 to 8 as a result of #2797 heuristic_says_squeeze = ((weight <= 8 && words_to_squeeze > 0) || weight < words_to_squeeze); debugTrace(DEBUG_squeeze, "words_to_squeeze: %d, weight: %d, squeeze: %s", words_to_squeeze, weight, heuristic_says_squeeze ? "YES" : "NO"); if (RtsFlags.GcFlags.squeezeUpdFrames == true && heuristic_says_squeeze) { stackSqueeze(cap, tso, (StgPtr)frame); tso->flags |= TSO_SQUEEZED; // This flag tells threadStackOverflow() that the stack was // squeezed, because it may not need to be expanded. } else { tso->flags &= ~TSO_SQUEEZED; } }
void iluk_seq(Euclid_dh ctx) { START_FUNC_DH HYPRE_Int *rp, *cval, *diag; HYPRE_Int *CVAL; HYPRE_Int i, j, len, count, col, idx = 0; HYPRE_Int *list, *marker, *fill, *tmpFill; HYPRE_Int temp, m, from = ctx->from, to = ctx->to; HYPRE_Int *n2o_row, *o2n_col, beg_row, beg_rowP; double *AVAL; REAL_DH *work, *aval; Factor_dh F = ctx->F; SubdomainGraph_dh sg = ctx->sg; bool debug = false; if (logFile != NULL && Parser_dhHasSwitch(parser_dh, "-debug_ilu")) debug = true; m = F->m; rp = F->rp; cval = F->cval; fill = F->fill; diag = F->diag; aval = F->aval; work = ctx->work; count = rp[from]; if (sg == NULL) { SET_V_ERROR("subdomain graph is NULL"); } n2o_row = ctx->sg->n2o_row; o2n_col = ctx->sg->o2n_col; beg_row = ctx->sg->beg_row[myid_dh]; beg_rowP = ctx->sg->beg_rowP[myid_dh]; /* allocate and initialize working space */ list = (HYPRE_Int*)MALLOC_DH((m+1)*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; tmpFill = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; i<m; ++i) marker[i] = -1; /* working space for values */ for (i=0; i<m; ++i) work[i] = 0.0; /* printf_dh("====================== starting iluk_seq; level= %i\n\n", ctx->level); */ /*---------- main loop ----------*/ for (i=from; i<to; ++i) { HYPRE_Int row = n2o_row[i]; /* local row number */ HYPRE_Int globalRow = row+beg_row; /* global row number */ /*hypre_fprintf(logFile, "--------------------------------- localRow= %i\n", 1+i); */ if (debug) { hypre_fprintf(logFile, "ILU_seq ================================= starting local row: %i, (global= %i) level= %i\n", i+1, i+1+sg->beg_rowP[myid_dh], ctx->level); } EuclidGetRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* compute scaling value for row(i) */ if (ctx->isScaled) { compute_scaling_private(i, len, AVAL, ctx); CHECK_V_ERROR; } /* Compute symbolic factor for row(i); this also performs sparsification */ count = symbolic_row_private(i, list, marker, tmpFill, len, CVAL, AVAL, o2n_col, ctx, debug); CHECK_V_ERROR; /* Ensure adequate storage; reallocate, if necessary. */ if (idx + count > F->alloc) { Factor_dhReallocate(F, idx, count); CHECK_V_ERROR; SET_INFO("REALLOCATED from ilu_seq"); cval = F->cval; fill = F->fill; aval = F->aval; } /* Copy factored symbolic row to permanent storage */ col = list[m]; while (count--) { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; /*hypre_fprintf(logFile, " col= %i\n", 1+col); */ col = list[col]; } /* add row-pointer to start of next row. */ rp[i+1] = idx; /* Insert pointer to diagonal */ temp = rp[i]; while (cval[temp] != i) ++temp; diag[i] = temp; /*hypre_fprintf(logFile, " diag[i]= %i\n", diag); */ /* compute numeric factor for current row */ numeric_row_private(i, len, CVAL, AVAL, work, o2n_col, ctx, debug); CHECK_V_ERROR EuclidRestoreRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* Copy factored numeric row to permanent storage, and re-zero work vector */ if (debug) { hypre_fprintf(logFile, "ILU_seq: "); for (j=rp[i]; j<rp[i+1]; ++j) { col = cval[j]; aval[j] = work[col]; work[col] = 0.0; hypre_fprintf(logFile, "%i,%i,%g ; ", 1+cval[j], fill[j], aval[j]); fflush(logFile); } hypre_fprintf(logFile, "\n"); } else { for (j=rp[i]; j<rp[i+1]; ++j) { col = cval[j]; aval[j] = work[col]; work[col] = 0.0; } } /* check for zero diagonal */ if (! aval[diag[i]]) { hypre_sprintf(msgBuf_dh, "zero diagonal in local row %i", i+1); SET_V_ERROR(msgBuf_dh); } } FREE_DH(list); CHECK_V_ERROR; FREE_DH(tmpFill); CHECK_V_ERROR; FREE_DH(marker); CHECK_V_ERROR; /* adjust column indices back to global */ if (beg_rowP) { HYPRE_Int start = rp[from]; HYPRE_Int stop = rp[to]; for (i=start; i<stop; ++i) cval[i] += beg_rowP; } /* for debugging: this is so the Print methods will work, even if F hasn't been fully factored */ for (i=to+1; i<m; ++i) rp[i] = 0; END_FUNC_DH }
void ilut_seq(Euclid_dh ctx) { START_FUNC_DH HYPRE_Int *rp, *cval, *diag, *CVAL; HYPRE_Int i, len, count, col, idx = 0; HYPRE_Int *list, *marker; HYPRE_Int temp, m, from, to; HYPRE_Int *n2o_row, *o2n_col, beg_row, beg_rowP; double *AVAL, droptol; REAL_DH *work, *aval, val; Factor_dh F = ctx->F; SubdomainGraph_dh sg = ctx->sg; bool debug = false; if (logFile != NULL && Parser_dhHasSwitch(parser_dh, "-debug_ilu")) debug = true; m = F->m; rp = F->rp; cval = F->cval; diag = F->diag; aval = F->aval; work = ctx->work; from = ctx->from; to = ctx->to; count = rp[from]; droptol = ctx->droptol; if (sg == NULL) { SET_V_ERROR("subdomain graph is NULL"); } n2o_row = ctx->sg->n2o_row; o2n_col = ctx->sg->o2n_col; beg_row = ctx->sg->beg_row[myid_dh]; beg_rowP = ctx->sg->beg_rowP[myid_dh]; /* allocate and initialize working space */ list = (HYPRE_Int*)MALLOC_DH((m+1)*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; i<m; ++i) marker[i] = -1; rp[0] = 0; /* working space for values */ for (i=0; i<m; ++i) work[i] = 0.0; /* ----- main loop start ----- */ for (i=from; i<to; ++i) { HYPRE_Int row = n2o_row[i]; /* local row number */ HYPRE_Int globalRow = row + beg_row; /* global row number */ EuclidGetRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* compute scaling value for row(i) */ compute_scaling_private(i, len, AVAL, ctx); CHECK_V_ERROR; /* compute factor for row i */ count = ilut_row_private(i, list, o2n_col, marker, len, CVAL, AVAL, work, ctx, debug); CHECK_V_ERROR; EuclidRestoreRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* Ensure adequate storage; reallocate, if necessary. */ if (idx + count > F->alloc) { Factor_dhReallocate(F, idx, count); CHECK_V_ERROR; SET_INFO("REALLOCATED from ilu_seq"); cval = F->cval; aval = F->aval; } /* Copy factored row to permanent storage, apply 2nd drop test, and re-zero work vector */ col = list[m]; while (count--) { val = work[col]; if (col == i || fabs(val) > droptol) { cval[idx] = col; aval[idx++] = val; work[col] = 0.0; } col = list[col]; } /* add row-pointer to start of next row. */ rp[i+1] = idx; /* Insert pointer to diagonal */ temp = rp[i]; while (cval[temp] != i) ++temp; diag[i] = temp; /* check for zero diagonal */ if (! aval[diag[i]]) { hypre_sprintf(msgBuf_dh, "zero diagonal in local row %i", i+1); SET_V_ERROR(msgBuf_dh); } } /* --------- main loop end --------- */ /* adjust column indices back to global */ if (beg_rowP) { HYPRE_Int start = rp[from]; HYPRE_Int stop = rp[to]; for (i=start; i<stop; ++i) cval[i] += beg_rowP; } FREE_DH(list); FREE_DH(marker); END_FUNC_DH }
void iluk_seq_block(Euclid_dh ctx) { START_FUNC_DH HYPRE_Int *rp, *cval, *diag; HYPRE_Int *CVAL; HYPRE_Int h, i, j, len, count, col, idx = 0; HYPRE_Int *list, *marker, *fill, *tmpFill; HYPRE_Int temp, m; HYPRE_Int *n2o_row, *o2n_col, *beg_rowP, *n2o_sub, blocks; HYPRE_Int *row_count, *dummy = NULL, dummy2[1]; double *AVAL; REAL_DH *work, *aval; Factor_dh F = ctx->F; SubdomainGraph_dh sg = ctx->sg; bool bj = false, constrained = false; HYPRE_Int discard = 0; HYPRE_Int gr = -1; /* globalRow */ bool debug = false; if (logFile != NULL && Parser_dhHasSwitch(parser_dh, "-debug_ilu")) debug = true; /*hypre_fprintf(stderr, "====================== starting iluk_seq_block; level= %i\n\n", ctx->level); */ if (!strcmp(ctx->algo_par, "bj")) bj = true; constrained = ! Parser_dhHasSwitch(parser_dh, "-unconstrained"); m = F->m; rp = F->rp; cval = F->cval; fill = F->fill; diag = F->diag; aval = F->aval; work = ctx->work; if (sg != NULL) { n2o_row = sg->n2o_row; o2n_col = sg->o2n_col; row_count = sg->row_count; /* beg_row = sg->beg_row ; */ beg_rowP = sg->beg_rowP; n2o_sub = sg->n2o_sub; blocks = sg->blocks; } else { dummy = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; i<m; ++i) dummy[i] = i; n2o_row = dummy; o2n_col = dummy; dummy2[0] = m; row_count = dummy2; /* beg_row = 0; */ beg_rowP = dummy; n2o_sub = dummy; blocks = 1; } /* allocate and initialize working space */ list = (HYPRE_Int*)MALLOC_DH((m+1)*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; tmpFill = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; i<m; ++i) marker[i] = -1; /* working space for values */ for (i=0; i<m; ++i) work[i] = 0.0; /*---------- main loop ----------*/ for (h=0; h<blocks; ++h) { /* 1st and last row in current block, with respect to A */ HYPRE_Int curBlock = n2o_sub[h]; HYPRE_Int first_row = beg_rowP[curBlock]; HYPRE_Int end_row = first_row + row_count[curBlock]; if (debug) { hypre_fprintf(logFile, "\n\nILU_seq BLOCK: %i @@@@@@@@@@@@@@@ \n", curBlock); } for (i=first_row; i<end_row; ++i) { HYPRE_Int row = n2o_row[i]; ++gr; if (debug) { hypre_fprintf(logFile, "ILU_seq global: %i local: %i =================================\n", 1+gr, 1+i-first_row); } /*prinft("first_row= %i end_row= %i\n", first_row, end_row); */ EuclidGetRow(ctx->A, row, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* compute scaling value for row(i) */ if (ctx->isScaled) { compute_scaling_private(i, len, AVAL, ctx); CHECK_V_ERROR; } /* Compute symbolic factor for row(i); this also performs sparsification */ count = symbolic_row_private(i, list, marker, tmpFill, len, CVAL, AVAL, o2n_col, ctx, debug); CHECK_V_ERROR; /* Ensure adequate storage; reallocate, if necessary. */ if (idx + count > F->alloc) { Factor_dhReallocate(F, idx, count); CHECK_V_ERROR; SET_INFO("REALLOCATED from ilu_seq"); cval = F->cval; fill = F->fill; aval = F->aval; } /* Copy factored symbolic row to permanent storage */ col = list[m]; while (count--) { /* constrained pilu */ if (constrained && !bj) { if (col >= first_row && col < end_row) { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; } else { if (check_constraint_private(ctx, curBlock, col)) { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; } else { ++discard; } } col = list[col]; } /* block jacobi case */ else if (bj) { if (col >= first_row && col < end_row) { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; } else { ++discard; } col = list[col]; } /* general case */ else { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; col = list[col]; } } /* add row-pointer to start of next row. */ rp[i+1] = idx; /* Insert pointer to diagonal */ temp = rp[i]; while (cval[temp] != i) ++temp; diag[i] = temp; /* compute numeric factor for current row */ numeric_row_private(i, len, CVAL, AVAL, work, o2n_col, ctx, debug); CHECK_V_ERROR EuclidRestoreRow(ctx->A, row, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* Copy factored numeric row to permanent storage, and re-zero work vector */ if (debug) { hypre_fprintf(logFile, "ILU_seq: "); for (j=rp[i]; j<rp[i+1]; ++j) { col = cval[j]; aval[j] = work[col]; work[col] = 0.0; hypre_fprintf(logFile, "%i,%i,%g ; ", 1+cval[j], fill[j], aval[j]); } hypre_fprintf(logFile, "\n"); } /* normal operation */ else { for (j=rp[i]; j<rp[i+1]; ++j) { col = cval[j]; aval[j] = work[col]; work[col] = 0.0; } } /* check for zero diagonal */ if (! aval[diag[i]]) { hypre_sprintf(msgBuf_dh, "zero diagonal in local row %i", i+1); SET_V_ERROR(msgBuf_dh); } } } /* hypre_printf("bj= %i constrained= %i discarded= %i\n", bj, constrained, discard); */ if (dummy != NULL) { FREE_DH(dummy); CHECK_V_ERROR; } FREE_DH(list); CHECK_V_ERROR; FREE_DH(tmpFill); CHECK_V_ERROR; FREE_DH(marker); CHECK_V_ERROR; END_FUNC_DH }
static void data_generic_set_type_info(TDSCOLUMN * col, struct _drecord *drec, SQLINTEGER odbc_ver) { TDS_SERVER_TYPE col_type = col->on_server.column_type; int col_size = col->on_server.column_size; switch (tds_get_conversion_type(col_type, col_size)) { case XSYBNCHAR: drec->sql_desc_concise_type = SQL_WCHAR; drec->sql_desc_display_size = col->on_server.column_size / 2; SET_INFO2("nchar", "'", "'", col->on_server.column_size / 2); case XSYBCHAR: case SYBCHAR: drec->sql_desc_concise_type = SQL_CHAR; drec->sql_desc_display_size = col->on_server.column_size; SET_INFO("char", "'", "'"); /* TODO really sure ?? SYBNVARCHAR sybase only ?? */ case SYBNVARCHAR: case XSYBNVARCHAR: drec->sql_desc_concise_type = SQL_WVARCHAR; drec->sql_desc_display_size = col->on_server.column_size / 2; drec->sql_desc_length = col->on_server.column_size / 2u; if (is_blob_col(col)) { drec->sql_desc_display_size = SQL_SS_LENGTH_UNLIMITED; drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; } SET_INFO("nvarchar", "'", "'"); case XSYBVARCHAR: case SYBVARCHAR: drec->sql_desc_concise_type = SQL_VARCHAR; drec->sql_desc_display_size = col->on_server.column_size; if (is_blob_col(col)) { drec->sql_desc_display_size = SQL_SS_LENGTH_UNLIMITED; drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; } SET_INFO("varchar", "'", "'"); case SYBNTEXT: drec->sql_desc_concise_type = SQL_WLONGVARCHAR; drec->sql_desc_display_size = col->on_server.column_size / 2; SET_INFO2("ntext", "'", "'", col->on_server.column_size / 2); case SYBTEXT: drec->sql_desc_concise_type = SQL_LONGVARCHAR; drec->sql_desc_display_size = col->on_server.column_size; SET_INFO("text", "'", "'"); case SYBBIT: case SYBBITN: drec->sql_desc_concise_type = SQL_BIT; drec->sql_desc_display_size = 1; drec->sql_desc_unsigned = SQL_TRUE; SET_INFO2("bit", "", "", 1); #if (ODBCVER >= 0x0300) case SYB5INT8: case SYBINT8: /* TODO return numeric for odbc2 and convert bigint to numeric */ drec->sql_desc_concise_type = SQL_BIGINT; drec->sql_desc_display_size = 20; SET_INFO2("bigint", "", "", 19); #endif case SYBINT4: drec->sql_desc_concise_type = SQL_INTEGER; drec->sql_desc_display_size = 11; /* -1000000000 */ SET_INFO2("int", "", "", 10); case SYBINT2: drec->sql_desc_concise_type = SQL_SMALLINT; drec->sql_desc_display_size = 6; /* -10000 */ SET_INFO2("smallint", "", "", 5); case SYBUINT1: case SYBINT1: drec->sql_desc_unsigned = SQL_TRUE; case SYBSINT1: /* TODO not another type_name ?? */ drec->sql_desc_concise_type = SQL_TINYINT; drec->sql_desc_display_size = 3; /* 255 */ SET_INFO2("tinyint", "", "", 3); #if (ODBCVER >= 0x0300) case SYBUINT8: drec->sql_desc_unsigned = SQL_TRUE; drec->sql_desc_concise_type = SQL_BIGINT; drec->sql_desc_display_size = 20; /* TODO return numeric for odbc2 and convert bigint to numeric */ SET_INFO2("unsigned bigint", "", "", 20); #endif case SYBUINT4: drec->sql_desc_unsigned = SQL_TRUE; drec->sql_desc_concise_type = SQL_INTEGER; drec->sql_desc_display_size = 10; SET_INFO2("unsigned int", "", "", 10); case SYBUINT2: drec->sql_desc_unsigned = SQL_TRUE; drec->sql_desc_concise_type = SQL_SMALLINT; drec->sql_desc_display_size = 5; /* 65535 */ SET_INFO2("unsigned smallint", "", "", 5); case SYBREAL: drec->sql_desc_concise_type = SQL_REAL; drec->sql_desc_display_size = 14; SET_INFO2("real", "", "", odbc_ver == SQL_OV_ODBC3 ? 24 : 7); case SYBFLT8: drec->sql_desc_concise_type = SQL_DOUBLE; drec->sql_desc_display_size = 24; /* FIXME -- what should the correct size be? */ SET_INFO2("float", "", "", odbc_ver == SQL_OV_ODBC3 ? 53 : 15); case SYBMONEY: /* TODO check money format returned by propretary ODBC, scale == 4 but we use 2 digits */ drec->sql_desc_concise_type = SQL_DECIMAL; drec->sql_desc_octet_length = 21; drec->sql_desc_display_size = 21; drec->sql_desc_precision = 19; drec->sql_desc_scale = 4; SET_INFO2("money", "$", "", 19); case SYBMONEY4: drec->sql_desc_concise_type = SQL_DECIMAL; drec->sql_desc_octet_length = 12; drec->sql_desc_display_size = 12; drec->sql_desc_precision = 10; drec->sql_desc_scale = 4; SET_INFO2("money", "$", "", 10); case SYBDATETIME: drec->sql_desc_concise_type = SQL_TYPE_TIMESTAMP; drec->sql_desc_display_size = 23; drec->sql_desc_octet_length = sizeof(TIMESTAMP_STRUCT); drec->sql_desc_precision = 3; drec->sql_desc_scale = 3; drec->sql_desc_datetime_interval_code = SQL_CODE_TIMESTAMP; SET_INFO2("datetime", "'", "'", 23); case SYBDATETIME4: drec->sql_desc_concise_type = SQL_TYPE_TIMESTAMP; /* TODO dependent on precision (decimal second digits) */ /* we always format using yyyy-mm-dd hh:mm:ss[.fff], see convert_tds2sql.c */ drec->sql_desc_display_size = 19; drec->sql_desc_octet_length = sizeof(TIMESTAMP_STRUCT); drec->sql_desc_datetime_interval_code = SQL_CODE_TIMESTAMP; SET_INFO2("datetime", "'", "'", 16); /* The following two types are just Sybase types but as mainly our ODBC * driver is much more compatible with Windows use attributes similar * to MS one. For instance Sybase ODBC returns TIME into a TIME_STRUCT * however this truncate the precision to 0 as TIME does not have * fraction of seconds. Also Sybase ODBC have different concepts for * PRECISION for many types and making these 2 types compatibles with * Sybase would break this driver compatibility. */ case SYBTIME: drec->sql_desc_concise_type = SQL_SS_TIME2; drec->sql_desc_octet_length = sizeof(SQL_SS_TIME2_STRUCT); /* we always format using hh:mm:ss[.fff], see convert_tds2sql.c */ drec->sql_desc_display_size = 12; drec->sql_desc_precision = 3; drec->sql_desc_scale = 3; SET_INFO2("time", "'", "'", 12); case SYBDATE: drec->sql_desc_octet_length = sizeof(DATE_STRUCT); drec->sql_desc_concise_type = SQL_TYPE_DATE; /* we always format using yyyy-mm-dd, see convert_tds2sql.c */ drec->sql_desc_display_size = 10; SET_INFO2("date", "'", "'", 10); case XSYBBINARY: case SYBBINARY: drec->sql_desc_concise_type = SQL_BINARY; drec->sql_desc_display_size = col->column_size * 2; /* handle TIMESTAMP using usertype */ if (col->column_usertype == 80) SET_INFO("timestamp", "0x", ""); SET_INFO("binary", "0x", ""); case SYBLONGBINARY: case SYBIMAGE: drec->sql_desc_concise_type = SQL_LONGVARBINARY; drec->sql_desc_display_size = col->column_size * 2; SET_INFO("image", "0x", ""); case XSYBVARBINARY: case SYBVARBINARY: drec->sql_desc_concise_type = SQL_VARBINARY; drec->sql_desc_display_size = col->column_size * 2; if (is_blob_col(col)) { drec->sql_desc_display_size = SQL_SS_LENGTH_UNLIMITED; drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; } SET_INFO("varbinary", "0x", ""); case SYBINTN: case SYBDATETIMN: case SYBFLTN: case SYBMONEYN: case SYBUINTN: case SYBTIMEN: case SYBDATEN: assert(0); case SYBVOID: case SYBINTERVAL: case SYBUNITEXT: case SYBXML: case SYBMSUDT: break; #if (ODBCVER >= 0x0300) case SYBUNIQUE: #ifdef SQL_GUID drec->sql_desc_concise_type = SQL_GUID; #else drec->sql_desc_concise_type = SQL_CHAR; #endif drec->sql_desc_display_size = 36; /* FIXME for Sybase ?? */ SET_INFO2("uniqueidentifier", "'", "'", 36); #endif case SYBMSXML: drec->sql_desc_concise_type = SQL_SS_XML; drec->sql_desc_display_size = SQL_SS_LENGTH_UNLIMITED; drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; SET_INFO("xml", "'", "'"); /* types already handled in other types, just to silent warnings */ case SYBNUMERIC: case SYBDECIMAL: case SYBVARIANT: case SYBMSDATE: case SYBMSTIME: case SYBMSDATETIME2: case SYBMSDATETIMEOFFSET: case SYB5BIGDATETIME: case SYB5BIGTIME: break; } SET_INFO("", "", ""); }
void bicgstab_euclid(Mat_dh A, Euclid_dh ctx, double *x, double *b, HYPRE_Int *itsOUT) { START_FUNC_DH HYPRE_Int its, m = ctx->m; bool monitor; HYPRE_Int maxIts = ctx->maxIts; double atol = ctx->atol, rtol = ctx->rtol; /* scalars */ double alpha, alpha_1, beta_1, widget, widget_1, rho_1, rho_2, s_norm, eps, exit_a, b_iprod, r_iprod; /* vectors */ double *t, *s, *s_hat, *v, *p, *p_hat, *r, *r_hat; monitor = Parser_dhHasSwitch(parser_dh, "-monitor"); /* allocate working space */ t = (double*)MALLOC_DH(m*sizeof(double)); s = (double*)MALLOC_DH(m*sizeof(double)); s_hat = (double*)MALLOC_DH(m*sizeof(double)); v = (double*)MALLOC_DH(m*sizeof(double)); p = (double*)MALLOC_DH(m*sizeof(double)); p_hat = (double*)MALLOC_DH(m*sizeof(double)); r = (double*)MALLOC_DH(m*sizeof(double)); r_hat = (double*)MALLOC_DH(m*sizeof(double)); /* r = b - Ax */ Mat_dhMatVec(A, x, s); /* s = Ax */ CopyVec(m, b, r); /* r = b */ Axpy(m, -1.0, s, r); /* r = b-Ax */ CopyVec(m, r, r_hat); /* r_hat = r */ /* compute stopping criteria */ b_iprod = InnerProd(m, b, b); CHECK_V_ERROR; exit_a = atol*atol*b_iprod; CHECK_V_ERROR; /* absolute stopping criteria */ eps = rtol*rtol*b_iprod; /* relative stoping criteria (residual reduction) */ its = 0; while(1) { ++its; rho_1 = InnerProd(m, r_hat, r); if (rho_1 == 0) { SET_V_ERROR("(r_hat . r) = 0; method fails"); } if (its == 1) { CopyVec(m, r, p); /* p = r_0 */ CHECK_V_ERROR; } else { beta_1 = (rho_1/rho_2)*(alpha_1/widget_1); /* p_i = r_(i-1) + beta_(i-1)*( p_(i-1) - w_(i-1)*v_(i-1) ) */ Axpy(m, -widget_1, v, p); CHECK_V_ERROR; ScaleVec(m, beta_1, p); CHECK_V_ERROR; Axpy(m, 1.0, r, p); CHECK_V_ERROR; } /* solve M*p_hat = p_i */ Euclid_dhApply(ctx, p, p_hat); CHECK_V_ERROR; /* v_i = A*p_hat */ Mat_dhMatVec(A, p_hat, v); CHECK_V_ERROR; /* alpha_i = rho_(i-1) / (r_hat^T . v_i ) */ { double tmp = InnerProd(m, r_hat, v); CHECK_V_ERROR; alpha = rho_1/tmp; } /* s = r_(i-1) - alpha_i*v_i */ CopyVec(m, r, s); CHECK_V_ERROR; Axpy(m, -alpha, v, s); CHECK_V_ERROR; /* check norm of s; if small enough: * set x_i = x_(i-1) + alpha_i*p_i and stop. * (Actually, we use the square of the norm) */ s_norm = InnerProd(m, s, s); if (s_norm < exit_a) { SET_INFO("reached absolute stopping criteria"); break; } /* solve M*s_hat = s */ Euclid_dhApply(ctx, s, s_hat); CHECK_V_ERROR; /* t = A*s_hat */ Mat_dhMatVec(A, s_hat, t); CHECK_V_ERROR; /* w_i = (t . s)/(t . t) */ { double tmp1, tmp2; tmp1 = InnerProd(m, t, s); CHECK_V_ERROR; tmp2 = InnerProd(m, t, t); CHECK_V_ERROR; widget = tmp1/tmp2; } /* x_i = x_(i-1) + alpha_i*p_hat + w_i*s_hat */ Axpy(m, alpha, p_hat, x); CHECK_V_ERROR; Axpy(m, widget, s_hat, x); CHECK_V_ERROR; /* r_i = s - w_i*t */ CopyVec(m, s, r); CHECK_V_ERROR; Axpy(m, -widget, t, r); CHECK_V_ERROR; /* check convergence; continue if necessary; * for continuation it is necessary thea w != 0. */ r_iprod = InnerProd(m, r, r); CHECK_V_ERROR; if (r_iprod < eps) { SET_INFO("stipulated residual reduction achieved"); break; } /* monitor convergence */ if (monitor && myid_dh == 0) { hypre_fprintf(stderr, "[it = %i] %e\n", its, sqrt(r_iprod/b_iprod)); } /* prepare for next iteration */ rho_2 = rho_1; widget_1 = widget; alpha_1 = alpha; if (its >= maxIts) { its = -its; break; } } *itsOUT = its; FREE_DH(t); FREE_DH(s); FREE_DH(s_hat); FREE_DH(v); FREE_DH(p); FREE_DH(p_hat); FREE_DH(r); FREE_DH(r_hat); END_FUNC_DH }
int main (int argc, char** argv) { char *kfile, *tfile, *cfile, *tafile, *lfile; size_t ksize, tsize, iter, depth; // FILE *kout, *tout; char opt; graph_t **gtrain; int* target; FILE* fileTrain, *fileResult, *fileTarget; core_t core; gtrain = NULL; lfile = tafile = cfile = kfile = NULL; depth = 3; CLEAR_FLAGS(); SET_RUNNABLE(); ksize = tsize = 0; core = TanimotoCore; while((opt = getopt(argc, argv, "mvihd:k:t:c:a:l:")) != -1) { switch(opt){ case 'v': SET_VERBOSE(); break; case 'i': SET_INFO(); break; case 'd': depth = strtol((const char*) optarg, 0, 10); break; case 'k': SET_K_FILE(); kfile = optarg; break; case 't': SET_T_FILE(); tfile = optarg; break; case 'c': SET_C_FILE(); cfile = optarg; break; case 'a': SET_TA_FILE(); tafile = optarg; break; case 'l': SET_L_FILE(); lfile = optarg; break; case 'h': SET_HELP(); break; case 'm': SET_PROVA_MINMAX(); break; default: SET_HELP(); } } if(HELP()) { usage(); CLEAR_FLAGS(); } if(INFO()) { info(); CLEAR_FLAGS(); } if(optind < argc) { if(!strcmp(argv[optind], "tanimoto")) { printf("tanimoto kernel\n"); } else if(!strcmp(argv[optind], "minmax")) { printf("minmax kernel\n"); core = MinMaxCore; } else printf("unavailable core ...\ndefault core: tanimoto\n"); } //else //printf("default core: tanimoto\n"); if(RUNNABLE()) { if(K_FILE() && T_FILE()) { if((fileTrain = fopen(kfile, "r")) && (fileTarget = fopen(tfile, "r"))) { gtrain = parse(fileTrain, &ksize); fclose(fileTrain); target=parseTarget(fileTarget, ksize); fclose(fileTarget); if(target==0) printf("\ncheck training file and target file"); else FoldCrossValidation(core, gtrain, target, ksize, depth); for(iter = 0; iter < ksize; iter++) free_graph(gtrain[iter]); XFREE(gtrain); XFREE(target); }else fatal("unable to open training file"); } if(TA_FILE() && C_FILE()) { fileResult = fopen(cfile, "r"); fileTarget = fopen(tafile, "r"); printf("\nAccuratezza: %f\n", getAccuracy(fileTarget, fileResult)); fclose(fileResult); fclose(fileTarget); } /* if(L_FILE()) { file = fopen(lfile, "r"); printf("\nLeaveOneOut: %f\n", getLeaveOneOut(file)); fclose(file); } */ if(K_FILE() && !T_FILE()) { if((fileTrain = fopen(kfile, "r"))) { gtrain = parse(fileTrain, &ksize); fclose(fileTrain); print_graph(gtrain[0]); print_graph(gtrain[1]); if(gtrain==0) printf("\ncheck training file"); else WriteKernelMatrix(core, gtrain, ksize, depth); for(iter = 0; iter < ksize; iter++) free_graph(gtrain[iter]); XFREE(gtrain); }else fatal("unable to open training file"); } if(PROVA_MINMAX()) { printf("\nPROVA MIN MAX\n"); fileTrain = fopen("./minmax.mol2", "r"); gtrain = parse(fileTrain, &ksize); //print_graph(gtrain[0]); //print_graph(gtrain[1]); print_graph(gtrain[0]); print_graph(gtrain[1]); printf("\nMinMax: %f", MinMaxCore(gtrain[0], gtrain[1], 3)); //printf("\nTanimoto: %f", TanimotoCore(gtrain[0], gtrain[1], 3)); fclose(fileTrain); } } //else usage(); return EXIT_SUCCESS; }
void generateBlocked(MatGenFD mg, HYPRE_Int *rp, HYPRE_Int *cval, double *aval, Mat_dh A, Vec_dh b) { START_FUNC_DH bool applyBdry = true; double *stencil = mg->stencil; HYPRE_Int id = mg->id; bool threeD = mg->threeD; HYPRE_Int px = mg->px, py = mg->py, pz = mg->pz; /* processor grid dimensions */ HYPRE_Int p, q, r; /* this proc's position in processor grid */ HYPRE_Int cc = mg->cc; /* local grid dimension (grid of unknowns) */ HYPRE_Int nx = cc, ny = cc, nz = cc; HYPRE_Int lowerx, upperx, lowery, uppery, lowerz, upperz; HYPRE_Int startRow; HYPRE_Int x, y, z; bool debug = false; HYPRE_Int idx = 0, localRow = 0; /* nabor; */ HYPRE_Int naborx1, naborx2, nabory1, nabory2, naborz1, naborz2; double *rhs; double hhalf = 0.5 * mg->hh; double bcx1 = mg->bcX1; double bcx2 = mg->bcX2; double bcy1 = mg->bcY1; double bcy2 = mg->bcY2; /* double bcz1 = mg->bcZ1; */ /* double bcz2 = mg->bcZ2; */ Vec_dhInit(b, A->m); CHECK_V_ERROR; rhs = b->vals; if (mg->debug && logFile != NULL) debug = true; if (! threeD) nz = 1; /* compute p,q,r from P,Q,R and myid */ p = id % px; q = (( id - p)/px) % py; r = ( id - p - px*q)/( px*py ); if (debug) { hypre_sprintf(msgBuf_dh, "this proc's position in subdomain grid: p= %i q= %i r= %i", p,q,r); SET_INFO(msgBuf_dh); } /* compute ilower and iupper from p,q,r and nx,ny,nz */ /* zero-based */ lowerx = nx*p; upperx = lowerx + nx; lowery = ny*q; uppery = lowery + ny; lowerz = nz*r; upperz = lowerz + nz; if (debug) { hypre_sprintf(msgBuf_dh, "local grid parameters: lowerx= %i upperx= %i", lowerx, upperx); SET_INFO(msgBuf_dh); hypre_sprintf(msgBuf_dh, "local grid parameters: lowery= %i uppery= %i", lowery, uppery); SET_INFO(msgBuf_dh); hypre_sprintf(msgBuf_dh, "local grid parameters: lowerz= %i upperz= %i", lowerz, upperz); SET_INFO(msgBuf_dh); } startRow = mg->first; rp[0] = 0; for (z=lowerz; z<upperz; z++) { for (y=lowery; y<uppery; y++) { for (x=lowerx; x<upperx; x++) { if (debug) { hypre_fprintf(logFile, "row= %i x= %i y= %i z= %i\n", localRow+startRow+1, x, y, z); } /* compute row values and rhs, at the current node */ getstencil(mg,x,y,z); /* down plane */ if (threeD) { if (z > 0) { naborz1 = rownum(threeD, x,y,z-1,nx,ny,nz,px,py); cval[idx] = naborz1; aval[idx++] = FRONT(stencil); } } /* south */ if (y > 0) { nabory1 = rownum(threeD, x,y-1,z,nx,ny,nz,px,py); cval[idx] = nabory1; aval[idx++] = SOUTH(stencil); } /* west */ if (x > 0) { naborx1 = rownum(threeD, x-1,y,z,nx,ny,nz,px,py); cval[idx] = naborx1; aval[idx++] = WEST(stencil); /*hypre_fprintf(logFile, "--- row: %i; naborx1= %i\n", localRow+startRow+1, 1+naborx1); */ } /* else { hypre_fprintf(logFile, "--- row: %i; x >= nx*px-1; naborx1 has old value: %i\n", localRow+startRow+1,1+naborx1); } */ /* center node */ cval[idx] = localRow+startRow; aval[idx++] = CENTER(stencil); /* east */ if (x < nx*px-1) { naborx2 = rownum(threeD,x+1,y,z,nx,ny,nz,px,py); cval[idx] = naborx2; aval[idx++] = EAST(stencil); } /* else { hypre_fprintf(logFile, "--- row: %i; x >= nx*px-1; nobors2 has old value: %i\n", localRow+startRow,1+naborx2); } */ /* north */ if (y < ny*py-1) { nabory2 = rownum(threeD,x,y+1,z,nx,ny,nz,px,py); cval[idx] = nabory2; aval[idx++] = NORTH(stencil); } /* up plane */ if (threeD) { if (z < nz*pz-1) { naborz2 = rownum(threeD,x,y,z+1,nx,ny,nz,px,py); cval[idx] = naborz2; aval[idx++] = BACK(stencil); } } /* rhs[rhsIdx++] = RHS(stencil); */ rhs[localRow] = 0.0; ++localRow; rp[localRow] = idx; /* apply boundary conditions; only for 2D! */ if (!threeD && applyBdry) { HYPRE_Int globalRow = localRow+startRow-1; HYPRE_Int offset = rp[localRow-1]; HYPRE_Int len = rp[localRow] - rp[localRow-1]; double ctr, coeff; /* hypre_fprintf(logFile, "globalRow = %i; naborx2 = %i\n", globalRow+1, naborx2+1); */ if (x == 0) { /* if x1 */ coeff = mg->A(mg->a, x+hhalf,y,z); ctr = mg->A(mg->a, x-hhalf,y,z); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcx1, coeff, ctr, naborx2); } else if (x == nx*px-1) { /* if x2 */ coeff = mg->A(mg->a, x-hhalf,y,z); ctr = mg->A(mg->a, x+hhalf,y,z); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcx2, coeff, ctr, naborx1); } else if (y == 0) { /* if y1 */ coeff = mg->B(mg->b, x, y+hhalf,z); ctr = mg->B(mg->b, x, y-hhalf,z); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcy1, coeff, ctr, nabory2); } else if (y == ny*py-1) { /* if y2 */ coeff = mg->B(mg->b, x, y-hhalf,z); ctr = mg->B(mg->b, x, y+hhalf,z); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcy2, coeff, ctr, nabory1); } else if (threeD) { if (z == 0) { coeff = mg->B(mg->b, x, y, z+hhalf); ctr = mg->B(mg->b, x, y, z-hhalf); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcy1, coeff, ctr, naborz2); } else if (z == nz*nx-1) { coeff = mg->B(mg->b, x, y, z-hhalf); ctr = mg->B(mg->b, x, y, z+hhalf); setBoundary_private(globalRow, cval+offset, aval+offset, len, &(rhs[localRow-1]), bcy1, coeff, ctr, naborz1); } } } } } } END_FUNC_DH }
void MatGenFD_Run(MatGenFD mg, HYPRE_Int id, HYPRE_Int np, Mat_dh *AOut, Vec_dh *rhsOut) { /* What this function does: * 0. creates return objects (A and rhs) * 1. computes "nice to have" values; * 2. allocates storage, if required; * 3. calls generateBlocked() or generateStriped(). * 4. initializes variable in A and rhs. */ START_FUNC_DH Mat_dh A; Vec_dh rhs; bool threeD = mg->threeD; HYPRE_Int nnz; HYPRE_Int m = mg->m; /* local unknowns */ bool debug = false, striped; if (mg->debug && logFile != NULL) debug = true; striped = Parser_dhHasSwitch(parser_dh,"-striped"); /* 0. create objects */ Mat_dhCreate(AOut); CHECK_V_ERROR; Vec_dhCreate(rhsOut); CHECK_V_ERROR; A = *AOut; rhs = *rhsOut; /* ensure that processor grid contains the same number of nodes as there are processors. */ if (! Parser_dhHasSwitch(parser_dh, "-noChecks")) { if (!striped) { HYPRE_Int npTest = mg->px*mg->py; if (threeD) npTest *= mg->pz; if (npTest != np) { hypre_sprintf(msgBuf_dh, "numbers don't match: np_dh = %i, px*py*pz = %i", np, npTest); SET_V_ERROR(msgBuf_dh); } } } /* 1. compute "nice to have" values */ /* each proc's subgrid dimension */ mg->cc = m; if (threeD) { m = mg->m = m*m*m; } else { m = mg->m = m*m; } mg->first = id*m; mg->hh = 1.0/(mg->px*mg->cc - 1); if (debug) { hypre_sprintf(msgBuf_dh, "cc (local grid dimension) = %i", mg->cc); SET_INFO(msgBuf_dh); if (threeD) { hypre_sprintf(msgBuf_dh, "threeD = true"); } else { hypre_sprintf(msgBuf_dh, "threeD = false"); } SET_INFO(msgBuf_dh); hypre_sprintf(msgBuf_dh, "np= %i id= %i", np, id); SET_INFO(msgBuf_dh); } mg->id = id; mg->np = np; nnz = threeD ? m*7 : m*5; /* 2. allocate storage */ if (mg->allocateMem) { A->rp = (HYPRE_Int*)MALLOC_DH((m+1)*sizeof(HYPRE_Int)); CHECK_V_ERROR; A->rp[0] = 0; A->cval = (HYPRE_Int*)MALLOC_DH(nnz*sizeof(HYPRE_Int)); CHECK_V_ERROR A->aval = (double*)MALLOC_DH(nnz*sizeof(double)); CHECK_V_ERROR; /* rhs->vals = (double*)MALLOC_DH(m*sizeof(double)); CHECK_V_ERROR; */ } /* 4. initialize variables in A and rhs */ rhs->n = m; A->m = m; A->n = m*mg->np; A->beg_row = mg->first; /* 3. generate matrix */ isThreeD = threeD; /* yuck! used in box_XX() */ if (Parser_dhHasSwitch(parser_dh,"-striped")) { generateStriped(mg, A->rp, A->cval, A->aval, A, rhs); CHECK_V_ERROR; } else { generateBlocked(mg, A->rp, A->cval, A->aval, A, rhs); CHECK_V_ERROR; } /* add in bdry conditions */ /* only implemented for 2D mats! */ if (! threeD) { /* fdaddbc(nx, ny, nz, rp, cval, diag, aval, rhs, h, mg); */ } END_FUNC_DH }
static void data_set_type_info(TDSCOLUMN * col, struct _drecord *drec, SQLINTEGER odbc_ver) { const char *type; #define SET_INFO(type, prefix, suffix) do { \ drec->sql_desc_literal_prefix = prefix; \ drec->sql_desc_literal_suffix = suffix; \ drec->sql_desc_type_name = type; \ return; \ } while(0) #define SET_INFO2(type, prefix, suffix, len) do { \ drec->sql_desc_length = (len); \ SET_INFO(type, prefix, suffix); \ } while(0) drec->sql_desc_unsigned = SQL_FALSE; drec->sql_desc_octet_length = drec->sql_desc_length = col->on_server.column_size; switch (tds_get_conversion_type(col->column_type, col->column_size)) { case XSYBCHAR: case SYBCHAR: if (col->on_server.column_type == XSYBNCHAR) SET_INFO2("nchar", "'", "'", col->on_server.column_size / 2); SET_INFO("char", "'", "'"); case XSYBVARCHAR: case SYBVARCHAR: type = "varchar"; if (col->on_server.column_type == SYBNVARCHAR || col->on_server.column_type == XSYBNVARCHAR) { drec->sql_desc_length = col->on_server.column_size / 2u; type = "nvarchar"; } if (is_blob_col(col)) drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; SET_INFO(type, "'", "'"); case SYBTEXT: if (col->on_server.column_type == SYBNTEXT) SET_INFO2("ntext", "'", "'", col->on_server.column_size / 2); SET_INFO("text", "'", "'"); case SYBBIT: case SYBBITN: drec->sql_desc_unsigned = SQL_TRUE; SET_INFO2("bit", "", "", 1); #if (ODBCVER >= 0x0300) case SYBINT8: /* TODO return numeric for odbc2 and convert bigint to numeric */ SET_INFO2("bigint", "", "", 19); #endif case SYBINT4: SET_INFO2("int", "", "", 10); case SYBINT2: SET_INFO2("smallint", "", "", 5); case SYBUINT1: case SYBINT1: drec->sql_desc_unsigned = SQL_TRUE; SET_INFO2("tinyint", "", "", 3); #if (ODBCVER >= 0x0300) case SYBUINT8: drec->sql_desc_unsigned = SQL_TRUE; /* TODO return numeric for odbc2 and convert bigint to numeric */ SET_INFO2("unsigned bigint", "", "", 19); #endif case SYBUINT4: drec->sql_desc_unsigned = SQL_TRUE; SET_INFO2("unsigned int", "", "", 10); case SYBUINT2: drec->sql_desc_unsigned = SQL_TRUE; SET_INFO2("unsigned smallint", "", "", 5); case SYBREAL: SET_INFO2("real", "", "", odbc_ver == SQL_OV_ODBC3 ? 24 : 7); case SYBFLT8: SET_INFO2("float", "", "", odbc_ver == SQL_OV_ODBC3 ? 53 : 15); case SYBMONEY: drec->sql_desc_octet_length = 21; SET_INFO2("money", "$", "", 19); case SYBMONEY4: drec->sql_desc_octet_length = 12; SET_INFO2("money", "$", "", 10); case SYBDATETIME: drec->sql_desc_octet_length = sizeof(TIMESTAMP_STRUCT); SET_INFO2("datetime", "'", "'", 23); case SYBDATETIME4: drec->sql_desc_octet_length = sizeof(TIMESTAMP_STRUCT); SET_INFO2("datetime", "'", "'", 16); case SYBBINARY: /* handle TIMESTAMP using usertype */ if (col->column_usertype == 80) SET_INFO("timestamp", "0x", ""); SET_INFO("binary", "0x", ""); case SYBIMAGE: SET_INFO("image", "0x", ""); case SYBVARBINARY: if (is_blob_col(col)) drec->sql_desc_octet_length = drec->sql_desc_length = SQL_SS_LENGTH_UNLIMITED; SET_INFO("varbinary", "0x", ""); case SYBNUMERIC: drec->sql_desc_octet_length = col->column_prec + 2; SET_INFO2("numeric", "", "", col->column_prec); case SYBDECIMAL: drec->sql_desc_octet_length = col->column_prec + 2; SET_INFO2("decimal", "", "", col->column_prec); case SYBINTN: case SYBDATETIMN: case SYBFLTN: case SYBMONEYN: assert(0); case SYBVOID: case SYBNTEXT: case SYBNVARCHAR: case XSYBNVARCHAR: case XSYBNCHAR: break; #if (ODBCVER >= 0x0300) case SYBUNIQUE: /* FIXME for Sybase ?? */ SET_INFO2("uniqueidentifier", "'", "'", 36); case SYBVARIANT: SET_INFO("sql_variant", "", ""); break; #endif case SYBMSDATETIMEOFFSET: SET_INFO2("datetimeoffset", "'", "'", col->column_prec + 27); case SYBMSDATETIME2: SET_INFO2("datetime2", "'", "'", col->column_prec + 20); case SYBMSTIME: SET_INFO2("time", "'", "'", col->column_prec + 9); case SYBMSDATE: SET_INFO2("date", "'", "'", 10); case SYBMSXML: SET_INFO("xml", "'", "'"); } SET_INFO("", "", ""); #undef SET_INFO #undef SET_INFO2 }
static void eval_thunk_selector (StgClosure **q, StgSelector * p, rtsBool evac) // NB. for legacy reasons, p & q are swapped around :( { nat field; StgInfoTable *info; StgWord info_ptr; StgClosure *selectee; StgSelector *prev_thunk_selector; bdescr *bd; StgClosure *val; prev_thunk_selector = NULL; // this is a chain of THUNK_SELECTORs that we are going to update // to point to the value of the current THUNK_SELECTOR. Each // closure on the chain is a WHITEHOLE, and points to the next in the // chain with payload[0]. selector_chain: bd = Bdescr((StgPtr)p); if (HEAP_ALLOCED_GC(p)) { // If the THUNK_SELECTOR is in to-space or in a generation that we // are not collecting, then bale out early. We won't be able to // save any space in any case, and updating with an indirection is // trickier in a non-collected gen: we would have to update the // mutable list. if (bd->flags & BF_EVACUATED) { unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; // shortcut, behave as for: if (evac) evacuate(q); if (evac && bd->gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } return; } // we don't update THUNK_SELECTORS in the compacted // generation, because compaction does not remove the INDs // that result, this causes confusion later // (scavenge_mark_stack doesn't deal with IND). BEWARE! This // bit is very tricky to get right. If you make changes // around here, test by compiling stage 3 with +RTS -c -RTS. if (bd->flags & BF_MARKED) { // must call evacuate() to mark this closure if evac==rtsTrue *q = (StgClosure *)p; if (evac) evacuate(q); unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); return; } } // WHITEHOLE the selector thunk, since it is now under evaluation. // This is important to stop us going into an infinite loop if // this selector thunk eventually refers to itself. #if defined(THREADED_RTS) // In threaded mode, we'll use WHITEHOLE to lock the selector // thunk while we evaluate it. { do { info_ptr = xchg((StgPtr)&p->header.info, (W_)&stg_WHITEHOLE_info); } while (info_ptr == (W_)&stg_WHITEHOLE_info); // make sure someone else didn't get here first... if (IS_FORWARDING_PTR(info_ptr) || INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->type != THUNK_SELECTOR) { // v. tricky now. The THUNK_SELECTOR has been evacuated // by another thread, and is now either a forwarding ptr or IND. // We need to extract ourselves from the current situation // as cleanly as possible. // - unlock the closure // - update *q, we may have done *some* evaluation // - if evac, we need to call evacuate(), because we // need the write-barrier stuff. // - undo the chain we've built to point to p. SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); *q = (StgClosure *)p; if (evac) evacuate(q); unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); return; } } #else // Save the real info pointer (NOTE: not the same as get_itbl()). info_ptr = (StgWord)p->header.info; SET_INFO((StgClosure *)p,&stg_WHITEHOLE_info); #endif field = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr)->layout.selector_offset; // The selectee might be a constructor closure, // so we untag the pointer. selectee = UNTAG_CLOSURE(p->selectee); selector_loop: // selectee now points to the closure that we're trying to select // a field from. It may or may not be in to-space: we try not to // end up in to-space, but it's impractical to avoid it in // general. The compacting GC scatters to-space pointers in // from-space during marking, for example. We rely on the property // that evacuate() doesn't mind if it gets passed a to-space pointer. info = (StgInfoTable*)selectee->header.info; if (IS_FORWARDING_PTR(info)) { // We don't follow pointers into to-space; the constructor // has already been evacuated, so we won't save any space // leaks by evaluating this selector thunk anyhow. goto bale_out; } info = INFO_PTR_TO_STRUCT(info); switch (info->type) { case WHITEHOLE: goto bale_out; // about to be evacuated by another thread (or a loop). case CONSTR: case CONSTR_1_0: case CONSTR_0_1: case CONSTR_2_0: case CONSTR_1_1: case CONSTR_0_2: case CONSTR_STATIC: case CONSTR_NOCAF_STATIC: { // check that the size is in range ASSERT(field < (StgWord32)(info->layout.payload.ptrs + info->layout.payload.nptrs)); // Select the right field from the constructor val = selectee->payload[field]; #ifdef PROFILING // For the purposes of LDV profiling, we have destroyed // the original selector thunk, p. if (era > 0) { // Only modify the info pointer when LDV profiling is // enabled. Note that this is incompatible with parallel GC, // because it would allow other threads to start evaluating // the same selector thunk. SET_INFO((StgClosure*)p, (StgInfoTable *)info_ptr); OVERWRITING_CLOSURE((StgClosure*)p); SET_INFO((StgClosure*)p, &stg_WHITEHOLE_info); } #endif // the closure in val is now the "value" of the // THUNK_SELECTOR in p. However, val may itself be a // THUNK_SELECTOR, in which case we want to continue // evaluating until we find the real value, and then // update the whole chain to point to the value. val_loop: info_ptr = (StgWord)UNTAG_CLOSURE(val)->header.info; if (!IS_FORWARDING_PTR(info_ptr)) { info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr); switch (info->type) { case IND: case IND_STATIC: val = ((StgInd *)val)->indirectee; goto val_loop; case THUNK_SELECTOR: ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; prev_thunk_selector = p; p = (StgSelector*)val; goto selector_chain; default: break; } } ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; prev_thunk_selector = p; *q = val; // update the other selectors in the chain *before* // evacuating the value. This is necessary in the case // where the value turns out to be one of the selectors // in the chain (i.e. we have a loop), and evacuating it // would corrupt the chain. unchain_thunk_selectors(prev_thunk_selector, val); // evacuate() cannot recurse through // eval_thunk_selector(), because we know val is not // a THUNK_SELECTOR. if (evac) evacuate(q); return; } case IND: case IND_STATIC: // Again, we might need to untag a constructor. selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); goto selector_loop; case BLACKHOLE: { StgClosure *r; const StgInfoTable *i; r = ((StgInd*)selectee)->indirectee; // establish whether this BH has been updated, and is now an // indirection, as in evacuate(). if (GET_CLOSURE_TAG(r) == 0) { i = r->header.info; if (IS_FORWARDING_PTR(i)) { r = (StgClosure *)UN_FORWARDING_PTR(i); i = r->header.info; } if (i == &stg_TSO_info || i == &stg_WHITEHOLE_info || i == &stg_BLOCKING_QUEUE_CLEAN_info || i == &stg_BLOCKING_QUEUE_DIRTY_info) { goto bale_out; } ASSERT(i != &stg_IND_info); } selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); goto selector_loop; } case THUNK_SELECTOR: { StgClosure *val; // recursively evaluate this selector. We don't want to // recurse indefinitely, so we impose a depth bound. if (gct->thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) { goto bale_out; } gct->thunk_selector_depth++; // rtsFalse says "don't evacuate the result". It will, // however, update any THUNK_SELECTORs that are evaluated // along the way. eval_thunk_selector(&val, (StgSelector*)selectee, rtsFalse); gct->thunk_selector_depth--; // did we actually manage to evaluate it? if (val == selectee) goto bale_out; // Of course this pointer might be tagged... selectee = UNTAG_CLOSURE(val); goto selector_loop; } case AP: case AP_STACK: case THUNK: case THUNK_1_0: case THUNK_0_1: case THUNK_2_0: case THUNK_1_1: case THUNK_0_2: case THUNK_STATIC: // not evaluated yet goto bale_out; default: barf("eval_thunk_selector: strange selectee %d", (int)(info->type)); } bale_out: // We didn't manage to evaluate this thunk; restore the old info // pointer. But don't forget: we still need to evacuate the thunk itself. SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); // THREADED_RTS: we just unlocked the thunk, so another thread // might get in and update it. copy() will lock it again and // check whether it was updated in the meantime. *q = (StgClosure *)p; if (evac) { copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no); } unchain_thunk_selectors(prev_thunk_selector, *q); return; }
void iluk_mpi_bj(Euclid_dh ctx) { START_FUNC_DH HYPRE_Int *rp, *cval, *diag; HYPRE_Int *CVAL; HYPRE_Int i, j, len, count, col, idx = 0; HYPRE_Int *list, *marker, *fill, *tmpFill; HYPRE_Int temp, m, from = ctx->from, to = ctx->to; HYPRE_Int *n2o_row, *o2n_col; HYPRE_Int first_row, last_row; double *AVAL; REAL_DH *work, *aval; Factor_dh F = ctx->F; SubdomainGraph_dh sg = ctx->sg; if (ctx->F == NULL) { SET_V_ERROR("ctx->F is NULL"); } if (ctx->F->rp == NULL) { SET_V_ERROR("ctx->F->rp is NULL"); } /* printf_dh("====================== starting iluk_mpi_bj; level= %i\n\n", ctx->level); */ m = F->m; rp = F->rp; cval = F->cval; fill = F->fill; diag = F->diag; aval = F->aval; work = ctx->work; n2o_row = sg->n2o_row; o2n_col = sg->o2n_col; /* allocate and initialize working space */ list = (HYPRE_Int*)MALLOC_DH((m+1)*sizeof(HYPRE_Int)); CHECK_V_ERROR; marker = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; tmpFill = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; for (i=0; i<m; ++i) { marker[i] = -1; work[i] = 0.0; } /*---------- main loop ----------*/ /* global numbers of first and last locally owned rows, with respect to A */ first_row = sg->beg_row[myid_dh]; last_row = first_row + sg->row_count[myid_dh]; for (i=from; i<to; ++i) { HYPRE_Int row = n2o_row[i]; /* local row number */ HYPRE_Int globalRow = row + first_row; /* global row number */ EuclidGetRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* compute scaling value for row(i) */ if (ctx->isScaled) { compute_scaling_private(i, len, AVAL, ctx); CHECK_V_ERROR; } /* Compute symbolic factor for row(i); this also performs sparsification */ count = symbolic_row_private(i, first_row, last_row, list, marker, tmpFill, len, CVAL, AVAL, o2n_col, ctx); CHECK_V_ERROR; /* Ensure adequate storage; reallocate, if necessary. */ if (idx + count > F->alloc) { Factor_dhReallocate(F, idx, count); CHECK_V_ERROR; SET_INFO("REALLOCATED from lu_mpi_bj"); cval = F->cval; fill = F->fill; aval = F->aval; } /* Copy factored symbolic row to permanent storage */ col = list[m]; while (count--) { cval[idx] = col; fill[idx] = tmpFill[col]; ++idx; col = list[col]; } /* add row-pointer to start of next row. */ rp[i+1] = idx; /* Insert pointer to diagonal */ temp = rp[i]; while (cval[temp] != i) ++temp; diag[i] = temp; /* compute numeric factor for current row */ numeric_row_private(i, first_row, last_row, len, CVAL, AVAL, work, o2n_col, ctx); CHECK_V_ERROR EuclidRestoreRow(ctx->A, globalRow, &len, &CVAL, &AVAL); CHECK_V_ERROR; /* Copy factored numeric row to permanent storage, and re-zero work vector */ for (j=rp[i]; j<rp[i+1]; ++j) { col = cval[j]; aval[j] = work[col]; work[col] = 0.0; } /* check for zero diagonal */ if (! aval[diag[i]]) { hypre_sprintf(msgBuf_dh, "zero diagonal in local row %i", i+1); SET_V_ERROR(msgBuf_dh); } } FREE_DH(list); CHECK_V_ERROR; FREE_DH(tmpFill); CHECK_V_ERROR; FREE_DH(marker); CHECK_V_ERROR; END_FUNC_DH }