static EVENT hlpwait( VTAB *tab ) { bool done; static EVENT bumpev = EV_NO_EVENT; char *next_name; unsigned len1; unsigned len2; helpCur = field_find( helpTab, helpStack->cur ); if( helpTab != NULL && helpCur == NULL ) { helpCur = helpTab; } tab->other = helpCur; tab->curr = helpCur; if( helpCur != NULL ) { tab->home = helpCur->area.col; } uipushlist( helpEventList ); if( bumpev != EV_NO_EVENT ) { uitabfilter( bumpev, tab ); helpCur = tab->curr; bumpev = EV_NO_EVENT; } done = false; while( !done ) { if( helpTab != NULL ) { uivattribute( &helpScreen, helpCur->area, AT( ATTR_CURR_EDIT ) ); } do { uipushlist( keyShift ); curEvent = uivget( &helpScreen ); if( curEvent == EV_MOUSE_PRESS ) { ignoreMouseRelease = false; } uipoplist(); curEvent = uigadgetfilter( curEvent, &vGadget ); curEvent = uitabfilter( curEvent, tab ); } while( curEvent == EV_NO_EVENT ); if( eventMapFn != NULL ) { curEvent = (*eventMapFn)( curEvent ); } curEvent = uihotspotfilter( &helpScreen, hotSpotFields, curEvent ); if( helpTab != NULL ) { uivattribute( &helpScreen, helpCur->area, AT( ATTR_EDIT ) ); } switch( curEvent ) { case EV_HELP: nexttopic( helpWord ); done = true; break; case EV_BOTTOM: case E_UP: case EV_PAGE_UP: case EV_PAGE_DOWN: case EV_CURSOR_UP: case EV_CURSOR_DOWN: case EV_TOP: case E_DOWN: case EV_SCROLL_VERTICAL: if( curEvent == EV_BOTTOM ) { bumpev = EV_CURSOR_DOWN; } else if( curEvent == EV_TOP ) { bumpev = EV_CURSOR_UP; } helpStack->cur = field_count( helpTab, helpCur ); done = true; break; case '-': case EV_MOUSE_RELEASE_R: case EV_ALT_B: case 'b': case 'B': case EV_F8: case EV_F4: prevtopic(); if( strcmp( helpStack->helpfname, curFile ) ) { len1 = strlen( helpStack->word ); len2 = strlen( helpStack->helpfname ); helpCur = HelpMemAlloc( sizeof( a_field ) + len1 + len2 ); memcpy( helpCur->keyword, helpStack->word, len1 ); memcpy( helpCur->keyword + len1, helpStack->helpfname, len2 ); helpCur->keyword[len1 + len2] = '\0'; helpCur->key1_len = len1; helpCur->key2_len = len2; helpCur->next = NULL; // prevtopic(); helpTab = helpCur; } done = true; break; case EV_ALT_S: case 'S': case 's': if( helpSearchHdl != NULL ) { uipoplist(); next_name = HelpSearch( helpSearchHdl ); if( next_name != NULL ) { nexttopic( next_name ); HelpMemFree( next_name ); done = true; } uipushlist( helpEventList ); } break; case EV_FIELD_CHANGE: helpCur = tab->curr; break; case EV_MOUSE_RELEASE: if( tab->other == NULL ) break; if( ignoreMouseRelease ) { /* this mouse release is for a mouse press that occured * before this help topic was opened */ ignoreMouseRelease = false; break; } tab->other = tab->curr; /* fall through */ case EV_ENTER: /*same as page-down if there are other topics*/ case EV_F7: case '+': // DEN 90/04/12 - next line necessary for mouse release kludge helpCur = tab->curr; if( helpTab != NULL ) { if( helpCur->key2_len == 0 ) { nexttopic( helpCur->keyword ); } done = true; } break; case EV_KILL_UI: uiforceevadd( EV_KILL_UI ); /* fall through */ case EV_ESCAPE: done = true; break; } } uipoplist(); return( curEvent ); }
/** Purpose ------- ZGESSM applies the factors L computed by ZGETRF_INCPIV to a complex M-by-N tile A. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in] k INTEGER The number of columns of the matrix L. K >= 0. @param[in] ib INTEGER The inner-blocking size. IB >= 0. @param[in] ipiv INTEGER array on the cpu. The pivot indices array of size K as returned by ZGETRF_INCPIV. @param[in] dL1 DOUBLE COMPLEX array, dimension(LDDL1, N) The IB-by-K matrix in which is stored L^(-1) as returned by GETRF_INCPIV @param[in] lddl1 INTEGER The leading dimension of the array L1. LDDL1 >= max(1,2*IB). @param[in] dL DOUBLE COMPLEX array, dimension(LDDL, N) The M-by-K lower triangular tile on the gpu. @param[in] lddl INTEGER The leading dimension of the array L. LDDL >= max(1,M). @param[in,out] dA DOUBLE COMPLEX array, dimension (LDDA, N) On entry, the M-by-N tile A on the gpu. On exit, updated by the application of L on the gpu. @param[in] ldda INTEGER The leading dimension of the array A. LDDA >= max(1,M). @ingroup magma_zgesv_tile ********************************************************************/ extern "C" magma_int_t magma_zgessm_gpu( magma_order_t order, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL, magma_int_t lddl, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info) { #define AT(i,j) (dAT + (i)*ldda + (j) ) #define L(i,j) (dL + (i) + (j)*lddl ) #define dL1(j) (dL1 + (j)*lddl1) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; int i, s, sb; magmaDoubleComplex *dAT; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (ldda < max(1,m)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; if ( order == MagmaColMajor ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } else { dAT = dA; } s = k / ib; for (i = 0; i < k; i += ib) { sb = min(ib, k-i); magmablas_zlaswp( n, dAT, ldda, i+1, i+sb, ipiv, 1 ); #ifndef WITHOUTTRTRI magma_ztrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, dL1(i), lddl1, AT(i, 0), ldda); #else magma_ztrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, L( i, i), lddl, AT(i, 0), ldda); #endif if ( (i+sb) < m) { magma_zgemm( MagmaNoTrans, MagmaTrans, n, m-(i+sb), sb, c_neg_one, AT(i, 0), ldda, L( i+sb, i), lddl, c_one, AT(i+sb, 0), ldda ); } } if ( order == MagmaColMajor ) { magmablas_zgetmo_in( dA, dAT, ldda, m, n ); } return *info; } /* magma_zgessm_gpu */
inline void HerkLC( T alpha, const DistMatrix<T>& A, T beta, DistMatrix<T>& C ) { #ifndef RELEASE PushCallStack("internal::HerkLC"); if( A.Grid() != C.Grid() ) throw std::logic_error ("A and C must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() ) { std::ostringstream msg; msg << "Nonconformal HerkLC:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; LocalTrrk ( LOWER, ADJOINT, TRANSPOSE, alpha, A1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
PUBLIC int run_vm(VMSTATE vms) { OBJ vm_hold; /* Holding register. NOT SEEN BY GC */ int ticks_left = VM_TIMESLICE_TICKS; while (vms->c.vm_state != VM_STATE_DYING && ticks_left-- && vms->r->vm_acc != yield_thread) { if (vms->c.vm_state > 0) { vms->c.vm_state--; if (vms->c.vm_state == 0) { /* Quota expired. Warn. */ vms->c.vm_state = VM_DEFAULT_CPU_QUOTA; vm_raise(vms, (OBJ) newsym("quota-expired"), NULL); /* Make sure we don't recurse :-) */ vms->r->vm_trap_closure = NULL; } } gc_reach_safepoint(); #ifdef DEBUG debug_dump_instr( vms->r->vm_code->vec , vms->c.vm_ip ); #endif switch (CODEAT(vms->c.vm_ip)) { case OP_AT: { int index = CODEAT(vms->c.vm_ip + 1); if (index < 0 || index >= vms->r->vm_acc->length) { vm_raise(vms, (OBJ) newsym("range-check-error"), vms->r->vm_acc); break; } if (!VECTORP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } vms->r->vm_acc = AT((VECTOR) vms->r->vm_acc, index); vms->c.vm_ip += 2; break; } case OP_ATPUT: { int index = CODEAT(vms->c.vm_ip + 1); vm_hold = PEEK(); if (index < 0 || index >= vm_hold->length) { vm_raise(vms, (OBJ) newsym("range-check-error"), vm_hold); break; } if (!VECTORP(vm_hold)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vm_hold); break; } ATPUT((VECTOR) vm_hold, index, vms->r->vm_acc); vms->c.vm_ip += 2; break; } case OP_MOV_A_LOCL: { int i = CODEAT(vms->c.vm_ip + 1); vm_hold = (OBJ) vms->r->vm_env; while (i-- > 0) vm_hold = AT((VECTOR) vm_hold, 0); vms->r->vm_acc = AT((VECTOR) vm_hold, CODEAT(vms->c.vm_ip + 2) + 1); vms->c.vm_ip += 3; break; } case OP_MOV_A_GLOB: vm_hold = AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); vms->r->vm_acc = AT((OVECTOR) vm_hold, SY_VALUE); vms->c.vm_ip += 2; break; case OP_MOV_A_SLOT: { OVECTOR slot, slotname; if (!OBJECTP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } slotname = (OVECTOR) AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); if (!O_CAN_X((OBJECT) vms->r->vm_acc, vms->r->vm_effuid)) { NOPERMISSION((OBJ) slotname); } slot = findslot((OBJECT) vms->r->vm_acc, slotname, NULL); if (slot == NULL) { vm_raise(vms, (OBJ) newsym("slot-not-found"), (OBJ) slotname); break; } if (!MS_CAN_R(slot, vms->r->vm_effuid)) { NOPERMISSION((OBJ) slotname); } vms->r->vm_acc = AT(slot, SL_VALUE); vms->c.vm_ip += 2; break; } case OP_MOV_A_LITL: vms->r->vm_acc = AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); vms->c.vm_ip += 2; break; case OP_MOV_A_SELF: vms->r->vm_acc = (OBJ) vms->r->vm_self; vms->c.vm_ip++; break; case OP_MOV_A_FRAM: vms->r->vm_acc = (OBJ) vms->r->vm_frame; vms->c.vm_ip++; break; case OP_MOV_LOCL_A: { int i = CODEAT(vms->c.vm_ip + 1); vm_hold = (OBJ) vms->r->vm_env; while (i-- > 0) vm_hold = AT((VECTOR) vm_hold, 0); ATPUT((VECTOR) vm_hold, CODEAT(vms->c.vm_ip + 2) + 1, vms->r->vm_acc); vms->c.vm_ip += 3; break; } case OP_MOV_GLOB_A: if (!PRIVILEGEDP(vms->r->vm_effuid)) { NOPERMISSION((OBJ) newsym("setting-global-value")); } vm_hold = AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); ATPUT((OVECTOR) vm_hold, SY_VALUE, vms->r->vm_acc); vms->c.vm_ip += 2; break; case OP_MOV_SLOT_A: { OVECTOR slot, slotname; OBJECT target = (OBJECT) POP(); OBJECT foundin; if (!OBJECTP(target)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), (OBJ) target); break; } slotname = (OVECTOR) AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); if (!O_CAN_X(target, vms->r->vm_effuid)) { NOPERMISSION((OBJ) slotname); } slot = findslot(target, slotname, &foundin); if (slot == NULL) { vm_raise(vms, (OBJ) newsym("slot-not-found"), (OBJ) slotname); break; } if (!MS_CAN_W(slot, vms->r->vm_effuid)) { NOPERMISSION((OBJ) slotname); } if (foundin == target) { ATPUT(slot, SL_VALUE, vms->r->vm_acc); } else { OVECTOR newslot = addslot(target, slotname, (OBJECT) AT(slot, SL_OWNER)); ATPUT(newslot, SL_FLAGS, AT(slot, SL_FLAGS)); ATPUT(newslot, SL_VALUE, vms->r->vm_acc); } vms->c.vm_ip += 2; break; } case OP_MOV_FRAM_A: if (!PRIVILEGEDP(vms->r->vm_effuid)) { NOPERMISSION((OBJ) newsym("restoring-vm-frame-pointer")); } if (!OVECTORP(vms->r->vm_acc) || ((OVECTOR) vms->r->vm_acc)->type != T_FRAME) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } vms->r->vm_frame = (OVECTOR) vms->r->vm_acc; vms->c.vm_ip++; break; case OP_PUSH: PUSH(vms->r->vm_acc); vms->c.vm_ip++; break; case OP_POP: vms->r->vm_acc = POP(); vms->c.vm_ip++; break; case OP_SWAP: vm_hold = POP(); PUSH(vms->r->vm_acc); vms->r->vm_acc = vm_hold; vms->c.vm_ip++; break; case OP_VECTOR: vms->r->vm_acc = (OBJ) newvector(CODEAT(vms->c.vm_ip+1)); vms->c.vm_ip += 2; break; case OP_ENTER_SCOPE: vm_hold = (OBJ) newvector(CODEAT(vms->c.vm_ip+1) + 1); ATPUT((VECTOR) vm_hold, 0, (OBJ) vms->r->vm_env); vms->r->vm_env = (VECTOR) vm_hold; vms->c.vm_ip += 2; break; case OP_LEAVE_SCOPE: vms->r->vm_env = (VECTOR) AT(vms->r->vm_env, 0); vms->c.vm_ip++; break; case OP_MAKE_VECTOR: { int i = 0; int len = CODEAT(vms->c.vm_ip+1); VECTOR vec = newvector_noinit(len); for (i = len - 1; i >= 0; i--) ATPUT(vec, i, POP()); vms->r->vm_acc = (OBJ) vec; vms->c.vm_ip += 2; break; } case OP_CLOSURE: vms->r->vm_acc = make_closure_from((OVECTOR) vms->r->vm_acc, vms->r->vm_self, vms->r->vm_env, vms->r->vm_effuid); vms->c.vm_ip++; break; case OP_METHOD_CLOSURE: { OVECTOR methname = (OVECTOR) AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); OVECTOR method; if (!OBJECTP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } method = findmethod((OBJECT) vms->r->vm_acc, methname); if (method == NULL) { vm_raise(vms, (OBJ) newsym("method-not-found"), (OBJ) methname); break; } if (!MS_CAN_R(method, vms->r->vm_effuid)) { NOPERMISSION((OBJ) methname); } vm_hold = (OBJ) newovector(CL_MAXSLOTINDEX, T_CLOSURE); ATPUT((OVECTOR) vm_hold, CL_METHOD, (OBJ) method); ATPUT((OVECTOR) vm_hold, CL_SELF, vms->r->vm_acc); vms->r->vm_acc = vm_hold; vms->c.vm_ip += 2; break; } case OP_RET: if (vms->r->vm_frame != NULL) { restoreframe(vms, vms->r->vm_frame); if (vms->r->vm_code != NULL) break; } vms->c.vm_state = VM_STATE_DYING; return 1; /* finished, nothing more to run! */ case OP_CALL: { OVECTOR methname = (OVECTOR) AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); OVECTOR method; if (vms->r->vm_acc == NULL || TAGGEDP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("null-call-error"), AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip+1))); break; } if (!OBJECTP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } method = findmethod((OBJECT) vms->r->vm_acc, methname); if (method == NULL) { vm_raise(vms, (OBJ) newsym("method-not-found"), (OBJ) methname); break; } if (!MS_CAN_X(method, vms->r->vm_effuid)) { NOPERMISSION((OBJ) methname); } vm_hold = POP(); if (vm_hold->length-1 != NUM(AT(method, ME_ARGC))) { vm_raise(vms, (OBJ) newsym("wrong-argc"), (OBJ) methname); break; } vms->c.vm_ip += 2; push_frame(vms); vms->r->vm_env = (VECTOR) vm_hold; ATPUT(vms->r->vm_env, 0, AT(method, ME_ENV)); vms->r->vm_code = (BVECTOR) AT(method, ME_CODE); vms->r->vm_lits = (VECTOR) AT(method, ME_LITS); vms->r->vm_self = (OBJECT) vms->r->vm_acc; if (NUM(AT(method, ME_FLAGS)) & O_SETUID) vms->r->vm_effuid = (OBJECT) AT(method, ME_OWNER); vms->r->vm_method = method; vms->c.vm_ip = 0; break; } case OP_CALL_AS: { OVECTOR methname = (OVECTOR) AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip + 1)); OVECTOR method; if (vms->r->vm_self == NULL || vms->r->vm_acc == NULL || TAGGEDP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("null-call-error"), AT(vms->r->vm_lits, CODEAT(vms->c.vm_ip+1))); break; } if (!OBJECTP(vms->r->vm_acc)) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } method = findmethod((OBJECT) vms->r->vm_acc, methname); if (method == NULL) { vm_raise(vms, (OBJ) newsym("method-not-found"), (OBJ) methname); break; } if (!MS_CAN_X(method, vms->r->vm_effuid)) { NOPERMISSION((OBJ) methname); } vm_hold = POP(); if (vm_hold->length-1 != NUM(AT(method, ME_ARGC))) { vm_raise(vms, (OBJ) newsym("wrong-argc"), (OBJ) methname); break; } vms->c.vm_ip += 2; push_frame(vms); vms->r->vm_env = (VECTOR) vm_hold; ATPUT(vms->r->vm_env, 0, AT(method, ME_ENV)); vms->r->vm_code = (BVECTOR) AT(method, ME_CODE); vms->r->vm_lits = (VECTOR) AT(method, ME_LITS); /* don't set vm_self, this is OP_CALL_AS. */ /* vms->r->vm_self = vms->r->vm_acc; */ if (NUM(AT(method, ME_FLAGS)) & O_SETUID) vms->r->vm_effuid = (OBJECT) AT(method, ME_OWNER); vms->r->vm_method = method; vms->c.vm_ip = 0; break; } case OP_APPLY: vms->c.vm_ip++; apply_closure(vms, (OVECTOR) vms->r->vm_acc, (VECTOR) POP()); break; case OP_JUMP: vms->c.vm_ip += 3 + ((int16_t) CODE16AT(vms->c.vm_ip+1)); break; case OP_JUMP_TRUE: vms->c.vm_ip += (vms->r->vm_acc == false) ? 3 : 3 + ((int16_t) CODE16AT(vms->c.vm_ip+1)); break; case OP_JUMP_FALSE: vms->c.vm_ip += (vms->r->vm_acc != false) ? 3 : 3 + ((int16_t) CODE16AT(vms->c.vm_ip+1)); break; case OP_NOT: vms->r->vm_acc = (vms->r->vm_acc == false) ? true : false; vms->c.vm_ip++; break; case OP_EQ: vms->r->vm_acc = (vms->r->vm_acc == POP()) ? true : false; vms->c.vm_ip++; break; case OP_NE: vms->r->vm_acc = (vms->r->vm_acc != POP()) ? true : false; vms->c.vm_ip++; break; NUMOP(OP_GT, vms->r->vm_acc = (NUM(vms->r->vm_acc) < NUM(POP())) ? true : false); NUMOP(OP_LT, vms->r->vm_acc = (NUM(vms->r->vm_acc) > NUM(POP())) ? true : false); NUMOP(OP_GE, vms->r->vm_acc = (NUM(vms->r->vm_acc) <= NUM(POP())) ? true : false); NUMOP(OP_LE, vms->r->vm_acc = (NUM(vms->r->vm_acc) >= NUM(POP())) ? true : false); NUMOP(OP_NEG, vms->r->vm_acc = MKNUM(-NUM(vms->r->vm_acc))); NUMOP(OP_BNOT, vms->r->vm_acc = MKNUM(~NUM(vms->r->vm_acc))); NUMOP(OP_BOR, vms->r->vm_acc = MKNUM(NUM(vms->r->vm_acc)|NUM(POP()))); NUMOP(OP_BAND, vms->r->vm_acc = MKNUM(NUM(vms->r->vm_acc)&NUM(POP()))); case OP_PLUS: if (vms->r->vm_acc == NULL || PEEK() == NULL) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } if (NUMP(vms->r->vm_acc) && NUMP(PEEK())) vms->r->vm_acc = MKNUM(NUM(vms->r->vm_acc)+NUM(POP())); else if (TAGGEDP(vms->r->vm_acc) || TAGGEDP(PEEK())) { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } else if (BVECTORP(vms->r->vm_acc) && BVECTORP(PEEK())) vms->r->vm_acc = (OBJ) bvector_concat((BVECTOR) POP(), (BVECTOR) vms->r->vm_acc); else if (VECTORP(vms->r->vm_acc) && VECTORP(PEEK())) vms->r->vm_acc = (OBJ) vector_concat((VECTOR) POP(), (VECTOR) vms->r->vm_acc); else { vm_raise(vms, (OBJ) newsym("vm-runtime-type-error"), vms->r->vm_acc); break; } vms->c.vm_ip++; break; NUMOP(OP_MINUS, vms->r->vm_acc = MKNUM(NUM(POP())-NUM(vms->r->vm_acc))); NUMOP(OP_STAR, vms->r->vm_acc = MKNUM(NUM(POP())*NUM(vms->r->vm_acc))); NUMOP(OP_SLASH, if (vms->r->vm_acc == MKNUM(0)) vm_raise(vms, (OBJ) newsym("divide-by-zero"), NULL); else vms->r->vm_acc = MKNUM(NUM(POP())/NUM(vms->r->vm_acc))); NUMOP(OP_PERCENT, if (vms->r->vm_acc == MKNUM(0)) vm_raise(vms, (OBJ) newsym("divide-by-zero"), NULL); else vms->r->vm_acc = MKNUM(NUM(POP())%NUM(vms->r->vm_acc))); default: fprintf(stderr, "Unknown bytecode reached (%d == 0x%x).\n", CODEAT(vms->c.vm_ip), CODEAT(vms->c.vm_ip)); exit(MOVE_EXIT_PROGRAMMER_FUCKUP); } } return vms->c.vm_state == VM_STATE_DYING; }
static F1(jttpoly){A z; RZ(w); RZ(z=atop(amp(ds(CLBRACE),over(AT(w)&CMPX?w:xco1(w),zero)),amp(tally(w),ds(CMIN)))); VAV(z)->flag=VTAYFINITE; R z; }
/* - dissect - figure out what matched what, no back references */ static char * /* == stop (success) always */ dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) { int i; sopno ss; /* start sop of current subRE */ sopno es; /* end sop of current subRE */ char *sp; /* start of string matched by it */ char *stp; /* string matched by it cannot pass here */ char *rest; /* start of rest of string */ char *tail; /* string unmatched by rest of RE */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ char *ssp; /* start of string matched by subsubRE */ char *sep; /* end of string matched by subsubRE */ char *oldssp; /* previous ssp */ char *dp; AT("diss", start, stop, startst, stopst); sp = start; for (ss = startst; ss < stopst; ss = es) { /* identify end of subRE */ es = ss; switch (OP(m->g->strip[es])) { case OPLUS_: case OQUEST_: es += OPND(m->g->strip[es]); break; case OCH_: while (OP(m->g->strip[es]) != O_CH) es += OPND(m->g->strip[es]); break; } es++; /* figure out what it matched */ switch (OP(m->g->strip[ss])) { case OEND: assert(nope); break; case OCHAR: sp++; break; case OBOL: case OEOL: case OBOW: case OEOW: break; case OANY: case OANYOF: sp++; break; case OBACK_: case O_BACK: assert(nope); break; /* cases where length of match is hard to find */ case OQUEST_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; /* did innards match? */ if (slow(m, sp, rest, ssub, esub) != NULL) { dp = dissect(m, sp, rest, ssub, esub); if (dp != rest) return NULL; } else if (sp != rest) return NULL; sp = rest; break; case OPLUS_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = es - 1; ssp = sp; oldssp = ssp; for (;;) { /* find last match of innards */ sep = slow(m, ssp, rest, ssub, esub); if (sep == NULL || sep == ssp) break; /* failed or matched null */ oldssp = ssp; /* on to next try */ ssp = sep; } if (sep == NULL) { /* last successful match */ sep = ssp; ssp = oldssp; } assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); dp = dissect(m, ssp, sep, ssub, esub); assert(dp == sep); sp = rest; break; case OCH_: stp = stop; for (;;) { /* how long could this one be? */ rest = slow(m, sp, stp, ss, es); assert(rest != NULL); /* it did match */ /* could the rest match the rest? */ tail = slow(m, rest, stop, es, stopst); if (tail == stop) break; /* yes! */ /* no -- try a shorter match for this one */ stp = rest - 1; assert(stp >= sp); /* it did work */ } ssub = ss + 1; esub = ss + OPND(m->g->strip[ss]) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ if (slow(m, sp, rest, ssub, esub) == rest) break; /* it matched all of it */ /* that one missed, try next one */ assert(OP(m->g->strip[esub]) == OOR1); esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } dp = dissect(m, sp, rest, ssub, esub); assert(dp == rest); sp = rest; break; case O_PLUS: case O_QUEST: case OOR1: case OOR2: case O_CH: assert(nope); break; case OLPAREN: i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_so = sp - m->offp; break; case ORPAREN: i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_eo = sp - m->offp; break; default: /* uh oh */ assert(nope); break; } } assert(sp == stop); return(sp); }
/* - slow - step through the string more deliberately */ static char * /* where it ended */ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; char *p = start; int c = (start == m->beginp) ? OUT : *(start-1); int lastc; /* previous c */ int flagch; int i; char *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; for (;;) { /* next character */ lastc = c; c = (p == m->endp) ? OUT : *p; /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ( (lastc == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (lastc == OUT && !(m->eflags&R_REGEX_NOTBOL)) ) { flagch = BOL; i = m->g->nbol; } if ( (c == '\n' && m->g->cflags&R_REGEX_NEWLINE) || (c == OUT && !(m->eflags&R_REGEX_NOTEOL)) ) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("sboleol", st, c); } /* how about a word boundary? */ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c)) ) { flagch = BOW; } if ( (lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ if (ISSET(st, stopst)) matchp = p; if (EQ(st, empty) || p == stop) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p++; } return(matchp); }
CMaterial& CMaterialManager::LoadMaterial(const VfsPath& pathname) { if(pathname.empty()) return NullMaterial; std::map<VfsPath, CMaterial*>::iterator iter = m_Materials.find(pathname); if(iter != m_Materials.end()) { if((*iter).second) return *(*iter).second; } CXeromyces xeroFile; if(xeroFile.Load(g_VFS, pathname) != PSRETURN_OK) return NullMaterial; #define EL(x) int el_##x = xeroFile.GetElementID(#x) #define AT(x) int at_##x = xeroFile.GetAttributeID(#x) EL(texture); EL(alpha); AT(usage); #undef AT #undef EL CMaterial *material = NULL; try { XMBElement root = xeroFile.GetRoot(); XMBElementList childNodes = root.GetChildNodes(); material = new CMaterial(); for(int i = 0; i < childNodes.Count; i++) { XMBElement node = childNodes.Item(i); int token = node.GetNodeName(); XMBAttributeList attrs = node.GetAttributes(); CStr temp; if(token == el_texture) { CStr value(node.GetText()); material->SetTexture(value); } else if(token == el_alpha) { temp = CStr(attrs.GetNamedItem(at_usage)); // Determine whether the alpha is used for basic transparency or player color if (temp == "playercolor") material->SetUsePlayerColor(true); else if (temp == "objectcolor") material->SetUseTextureColor(true); else material->SetUsesAlpha(ParseUsage(temp)); } } m_Materials[pathname] = material; } catch(...) { SAFE_DELETE(material); throw; } return *material; }
inline void Syr2kLT ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C, bool conjugate=false ) { #ifndef RELEASE CallStackEntry entry("internal::Syr2kLT"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( A.Width() != C.Height() || A.Width() != C.Width() || B.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal Syr2kLT:\n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE ); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,MR, STAR> A1Trans_MR_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,STAR,VR > A1_STAR_VR(g); DistMatrix<T,STAR,VR > B1_STAR_VR(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,STAR,MC > B1_STAR_MC(g); A1Trans_MR_STAR.AlignWith( C ); B1Trans_MR_STAR.AlignWith( C ); A1_STAR_MC.AlignWith( C ); B1_STAR_MC.AlignWith( C ); // Start the algorithm ScaleTrapezoid( beta, LEFT, LOWER, 0, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); //--------------------------------------------------------------------// A1Trans_MR_STAR.TransposeFrom( A1 ); A1_STAR_VR.TransposeFrom( A1Trans_MR_STAR ); A1_STAR_MC = A1_STAR_VR; B1Trans_MR_STAR.TransposeFrom( B1 ); B1_STAR_VR.TransposeFrom( B1Trans_MR_STAR ); B1_STAR_MC = B1_STAR_VR; LocalTrr2k ( LOWER, orientation, TRANSPOSE, orientation, TRANSPOSE, alpha, A1_STAR_MC, B1Trans_MR_STAR, B1_STAR_MC, A1Trans_MR_STAR, T(1), C ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } }
inline void SUMMA_NNDot ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_NNDot"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must have the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); if( A.Height() > B.Width() ) { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g), CB(g), C1(g), C10(g), C11(g), C12(g), C2(g); // Temporary distributions DistMatrix<T,STAR,VC> A1_STAR_VC(g); DistMatrix<T,VC,STAR> B1_VC_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); A1_STAR_VC = A1; B1_VC_STAR.AlignWith( A1_STAR_VC ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C1, C1L, C1R, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( C1L, /**/ C1R, C10, /**/ C11, C12 ); //------------------------------------------------------------// B1_VC_STAR = B1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VC, B1_VC_STAR, C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( C1L, /**/ C1R, C10, C11, /**/ C12 ); } SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } } else { // Matrix views DistMatrix<T> AT(g), AB(g), A0(g), A1(g), A2(g); DistMatrix<T> BL(g), B0(g), BR(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C1T(g), C01(g), C0(g), C1(g), C2(g), C1B(g), C11(g), C21(g); // Temporary distributions DistMatrix<T,STAR,VR> A1_STAR_VR(g); DistMatrix<T,VR,STAR> B1_VR_STAR(g); DistMatrix<T,STAR,STAR> C11_STAR_STAR(g); // Star the algorithm Scale( beta, C ); LockedPartitionRight( B, BL, BR, 0 ); PartitionRight( C, CL, CR, 0 ); while( BR.Width() > 0 ) { LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); RepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); B1_VR_STAR = B1; A1_STAR_VR.AlignWith( B1_VR_STAR ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C1, C1T, C1B, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( C1T, C01, /***/ /***/ C11, C1B, C21 ); //------------------------------------------------------------// A1_STAR_VR = A1; LocalGemm ( NORMAL, NORMAL, alpha, A1_STAR_VR, B1_VR_STAR, C11_STAR_STAR ); C11.SumScatterUpdate( T(1), C11_STAR_STAR ); //------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( C1T, C01, C11, /***/ /***/ C1B, C21 ); } SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlidePartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); } } }
extern "C" magma_int_t magma_ctstrf_gpu( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, magma_int_t nb, magmaFloatComplex *hU, magma_int_t ldhu, magmaFloatComplex *dU, magma_int_t lddu, magmaFloatComplex *hA, magma_int_t ldha, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *hL, magma_int_t ldhl, magmaFloatComplex *dL, magma_int_t lddl, magma_int_t *ipiv, magmaFloatComplex *hwork, magma_int_t ldhwork, magmaFloatComplex *dwork, magma_int_t lddwork, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= CSSSSM applies the LU factorization update from a complex matrix formed by a lower triangular IB-by-K tile L1 on top of a M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1 tile A1 on top of a M2-by-N2 tile A2 (N1 == N2). This is the right-looking Level 2.5 BLAS version of the algorithm. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. IB (input) INTEGER The inner-blocking size. IB >= 0. NB (input) INTEGER The blocking size. NB >= 0. hU (input,output) COMPLEX array, dimension(LDHU, N), on cpu. On entry, the NB-by-N upper triangular tile hU. On exit, the content is incomplete. Shouldn't be used. LDHU (input) INTEGER The leading dimension of the array hU. LDHU >= max(1,NB). dU (input,output) COMPLEX array, dimension(LDDU, N), on gpu. On entry, the NB-by-N upper triangular tile dU identical to hU. On exit, the new factor U from the factorization. LDDU (input) INTEGER The leading dimension of the array dU. LDDU >= max(1,NB). hA (input,output) COMPLEX array, dimension(LDHA, N), on cpu. On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB). On exit, the content is incomplete. Shouldn't be used. LDHA (input) INTEGER The leading dimension of the array hA. LDHA >= max(1,M). dA (input,output) COMPLEX array, dimension(LDDA, N) , on gpu. On entry, the M-by-N tile to be factored. On exit, the factor L from the factorization LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,M). hL (output) COMPLEX array, dimension(LDHL, K), on vpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. LDHL (input) INTEGER The leading dimension of the array hL. LDHL >= max(1,2*IB). dL (output) COMPLEX array, dimension(LDDL, K), on gpu. On exit, contains in the upper part the IB-by-K lower triangular tile, and in the lower part IB-by-K the inverse of the top part. LDDL (input) INTEGER The leading dimension of the array dL. LDDL >= max(1,2*IB). hWORK (output) COMPLEX array, dimension(LDHWORK, 2*IB), on cpu. Workspace. LDHWORK (input) INTEGER The leading dimension of the array hWORK. LDHWORK >= max(NB, 1). dWORK (output) COMPLEX array, dimension(LDDWORK, 2*IB), on gpu. Workspace. LDDWORK (input) INTEGER The leading dimension of the array dWORK. LDDWORK >= max(NB, 1). IPIV (output) INTEGER array on the cpu. The pivot indices array of size K as returned by CTSTRF INFO (output) INTEGER - PLASMA_SUCCESS successful exit - < 0 if INFO = -k, the k-th argument had an illegal value - > 0 if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== */ #define UT(i,j) (dUT + (i)*ib*lddu + (j)*ib ) #define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib ) #define L(i) (dL + (i)*ib*lddl ) #define L2(i) (dL2 + (i)*ib*lddl ) #define hU(i,j) (hU + (j)*ib*ldhu + (i)*ib ) #define hA(i,j) (hA + (j)*ib*ldha + (i)*ib ) #define hL(i) (hL + (i)*ib*ldhl ) #define hL2(i) (hL2 + (i)*ib*ldhl ) magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; int iinfo = 0; int maxm, mindim; int i, j, im, s, ip, ii, sb, p = 1; magmaFloatComplex *dAT, *dUT; magmaFloatComplex *dAp, *dUp; #ifndef WITHOUTTRTRI magmaFloatComplex *dL2 = dL + ib; magmaFloatComplex *hL2 = hL + ib; p = 2; #endif /* Check input arguments */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ib < 0) { *info = -3; } else if ((lddu < max(1,m)) && (m > 0)) { *info = -6; } else if ((ldda < max(1,m)) && (m > 0)) { *info = -8; } else if ((lddl < max(1,ib)) && (ib > 0)) { *info = -10; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* quick return */ if ((m == 0) || (n == 0) || (ib == 0)) return *info; ip = 0; /* Function Body */ mindim = min(m, n); s = mindim / ib; if ( ib >= mindim ) { /* Use CPU code. */ CORE_ctstrf(m, n, ib, nb, (PLASMA_Complex32_t*)hU, ldhu, (PLASMA_Complex32_t*)hA, ldha, (PLASMA_Complex32_t*)hL, ldhl, ipiv, (PLASMA_Complex32_t*)hwork, ldhwork, info); #ifndef WITHOUTTRTRI CORE_clacpy( PlasmaUpperLower, mindim, mindim, (PLASMA_Complex32_t*)hL, ldhl, (PLASMA_Complex32_t*)hL2, ldhl ); CORE_ctrtri( PlasmaLower, PlasmaUnit, mindim, (PLASMA_Complex32_t*)hL2, ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif if ( (storev == 'R') || (storev == 'r') ) { magma_csetmatrix( m, n, hU, ldhu, dwork, lddwork ); magmablas_ctranspose( dU, lddu, dwork, lddwork, m, n ); magma_csetmatrix( m, n, hA, ldha, dwork, lddwork ); magmablas_ctranspose( dA, ldda, dwork, lddwork, m, n ); } else { magma_csetmatrix( m, n, hU, ldhu, dU, lddu ); magma_csetmatrix( m, n, hA, ldha, dA, ldda ); } magma_csetmatrix( p*ib, n, hL, ldhl, dL, lddl ); } else { /* Use hybrid blocked code. */ maxm = ((m + 31)/32)*32; if ( (storev == 'C') || (storev == 'c') ) { magmablas_cgetmo_in( dU, dUT, lddu, m, n ); magmablas_cgetmo_in( dA, dAT, ldda, m, n ); } else { dUT = dU; dAT = dA; } dAp = dwork; dUp = dAp + ib*lddwork; ip = 0; for( i=0; i<s; i++ ) { ii = i * ib; sb = min(mindim-ii, ib); if ( i>0 ){ // download i-th panel magmablas_ctranspose( dUp, lddu, UT(0, i), lddu, sb, ii ); magmablas_ctranspose( dAp, ldda, AT(0, i), ldda, sb, m ); magma_cgetmatrix( ii, sb, dUp, lddu, hU(0, i), ldhu ); magma_cgetmatrix( m, sb, dAp, ldda, hA(0, i), ldha ); // make sure that gpu queue is empty //magma_device_sync(); #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L2(i-1), lddl, UT(i-1, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-(ii+sb), ib, c_one, L(i-1), lddl, UT(i-1, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, n-(ii+sb), m, ib, c_neg_one, UT(i-1, i+1), lddu, AT(0, i-1), ldda, c_one, AT(0, i+1), ldda ); } // do the cpu part CORE_ctstrf(m, sb, ib, nb, (PLASMA_Complex32_t*)hU(i, i), ldhu, (PLASMA_Complex32_t*)hA(0, i), ldha, (PLASMA_Complex32_t*)hL(i), ldhl, ipiv+ii, (PLASMA_Complex32_t*)hwork, ldhwork, info); if ( (*info == 0) && (iinfo > 0) ) *info = iinfo + ii; // Need to swap betw U and A #ifndef NOSWAPBLK magmablas_cswapblk( 'R', n-(ii+sb), UT(i, i+1), lddu, AT(0, i+1), ldda, 1, sb, ipiv+ii, 1, nb ); for(j=0; j<ib; j++) { im = ipiv[ip]-1; if ( im == j ) { ipiv[ip] += ii; } ip++; } #else for(j=0; j<ib; j++) { im = ipiv[ip]-1; if ( im != (j) ) { im = im - nb; assert( (im>=0) && (im<m) ); magmablas_cswap( n-(ii+sb), UT(i, i+1)+j*lddu, 1, AT(0, i+1)+im*ldda, 1 ); } else { ipiv[ip] += ii; } ip++; } #endif #ifndef WITHOUTTRTRI CORE_clacpy( PlasmaUpperLower, sb, sb, (PLASMA_Complex32_t*)hL(i), ldhl, (PLASMA_Complex32_t*)hL2(i), ldhl ); CORE_ctrtri( PlasmaLower, PlasmaUnit, sb, (PLASMA_Complex32_t*)hL2(i), ldhl, info ); if (*info != 0 ) { fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info); } #endif // upload i-th panel magma_csetmatrix( sb, sb, hU(i, i), ldhu, dUp, lddu ); magma_csetmatrix( m, sb, hA(0, i), ldha, dAp, ldda ); magma_csetmatrix( p*ib, sb, hL(i), ldhl, L(i), lddl ); magmablas_ctranspose( UT(i, i), lddu, dUp, lddu, sb, sb); magmablas_ctranspose( AT(0, i), ldda, dAp, ldda, m, sb); // make sure that gpu queue is empty //magma_device_sync(); // do the small non-parallel computations if ( s > (i+1) ) { #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, sb, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, sb, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } else { #ifndef WITHOUTTRTRI magma_ctrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L2(i), lddl, UT(i, i+1), lddu); #else magma_ctrsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n-mindim, sb, c_one, L(i), lddl, UT(i, i+1), lddu); #endif magma_cgemm( MagmaNoTrans, MagmaNoTrans, n-mindim, m, sb, c_neg_one, UT(i, i+1), lddu, AT(0, i ), ldda, c_one, AT(0, i+1), ldda ); } } if ( (storev == 'C') || (storev == 'c') ) { magmablas_cgetmo_out( dU, dUT, lddu, m, n ); magmablas_cgetmo_out( dA, dAT, ldda, m, n ); } } return *info; }
inline void SUMMA_NNB ( T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, T beta, DistMatrix<T>& C ) { #ifndef RELEASE CallStackEntry entry("gemm::SUMMA_NNB"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) LogicError("{A,B,C} must be distributed over the same grid"); if( A.Height() != C.Height() || B.Width() != C.Width() || A.Width() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal matrices: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; LogicError( msg.str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g); A1_STAR_MC.AlignWith( B ); D1Trans_MR_STAR.AlignWith( B ); // Start the algorithm Scale( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); PartitionDown ( C, CT, CB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); RepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ] LocalGemm ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, D1Trans_MR_STAR ); C1.TransposeSumScatterUpdate( T(1), D1Trans_MR_STAR ); //--------------------------------------------------------------------// SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlidePartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); } }
double Bs2JpsiPhi_mistagObservable_alt::Evaluate(DataPoint * measurement) { // Get observables into member variables t = measurement->GetObservable( timeName )->GetValue() - timeOffset ; ctheta_tr = measurement->GetObservable( cosThetaName )->GetValue(); phi_tr = measurement->GetObservable( phiName )->GetValue(); ctheta_1 = measurement->GetObservable( cosPsiName )->GetValue(); tag = (int)measurement->GetObservable( tagName )->GetValue(); tagFraction = measurement->GetObservable( mistagName )->GetValue(); //tagFraction= 0.5; //PELC double val1, val2 ; double returnValue ; if(resolution1Fraction >= 0.9999 ) { // Set the member variable for time resolution to the first value and calculate resolution = resolution1 ; // Pre calculate the time primitives expL_stored = Mathematics::Exp( t, gamma_l(), resolution ) ; expH_stored = Mathematics::Exp( t, gamma_h(), resolution ) ; expSin_stored = Mathematics::ExpSin( t, gamma(), delta_ms, resolution ) ; expCos_stored = Mathematics::ExpCos( t, gamma(), delta_ms, resolution ) ; val1 = this->diffXsec( ); returnValue = val1 ; } else { // Set the member variable for time resolution to the first value and calculate resolution = resolution1 ; // Pre calculate the time primitives expL_stored = Mathematics::Exp( t, gamma_l(), resolution ) ; expH_stored = Mathematics::Exp( t, gamma_h(), resolution ) ; expSin_stored = Mathematics::ExpSin( t, gamma(), delta_ms, resolution ) ; expCos_stored = Mathematics::ExpCos( t, gamma(), delta_ms, resolution ) ; val1 = this->diffXsec( ); // Set the member variable for time resolution to the second value and calculate resolution = resolution2 ; // Pre calculate the time primitives expL_stored = Mathematics::Exp( t, gamma_l(), resolution ) ; expH_stored = Mathematics::Exp( t, gamma_h(), resolution ) ; expSin_stored = Mathematics::ExpSin( t, gamma(), delta_ms, resolution ) ; expCos_stored = Mathematics::ExpCos( t, gamma(), delta_ms, resolution ) ; val2 = this->diffXsec( ); returnValue = resolution1Fraction*val1 + (1. - resolution1Fraction)*val2 ; } if( ( (returnValue <= 0.) && (t>0.) ) || isnan(returnValue) ) { cout << endl ; cout << " Bs2JpsiPhi_mistagObservable_alt::evaluate() returns <=0 or nan :" << returnValue << endl ; cout << " gamma " << gamma() ; cout << " gl " << gamma_l() ; cout << " gh " << gamma_h() ; cout << " AT " << AT() ; cout << " AP " << AP() ; cout << " A0 " << A0() << endl ; cout << " For event with: " << endl ; cout << " time " << t << endl ; if( isnan(returnValue) ) exit(1) ; } return returnValue ; }
void Matrix::test(void) { int i,j; //Matrix *A,*B,*C,*AT,*K; /*test matrix mult*/ // A = new Matrix(3,2); // B = new Matrix(2,3); // C = new Matrix(3,3); // AT = new Matrix(2,3); // K = new Matrix(2*3,3*2); Matrix A(3,2),B(2,3),C(3,3),AT(2,3),K(2*3,3*2); Matrix M(3,3),x(3,1),b(3,1); A[0][0] = 0; A[0][1] = 1; A[1][0] = 3; A[1][1] = 1; A[2][0] = 2; A[2][1] = 0; B[0][0] = 4; B[0][1] = 1; B[0][2] = 0; B[1][0] = 2; B[1][1] = 1; B[1][2] = 2; std::cout << A.mat[1][0] << std::endl; C = Matrix::matrix_mult(A,B); for(i=0;i<3;i++) { for(j=0;j<3;j++) std::cout << C[i][j] << " "; std::cout << std::endl; } std::cout << "now A transposed" << std::endl; A.transpose(AT); for(i=0;i<2;i++) { for(j=0;j<3;j++) std::cout << AT[i][j] << " "; std::cout << std::endl; } std::cout << "now Kronecker product" << std::endl; K = Matrix::kron_(A,B); std::cout << "product calced " << K.getM() << " " << K.getN() << std::endl; for(i=0;i<2*3;i++) { for(j=0;j<3*2;j++) std::cout << K.mat[i][j] << " "; std::cout << std::endl; } cv::Mat_<double> a(3,2), l(2,3); a(0,0) = 0; a(0,1) = 1; a(1,0) = 3; a(1,1) = 1; a(2,0) = 2; a(2,1) = 0; l(0,0) = 4; l(0,1) = 1; l(0,2) = 0; l(1,0) = 2; l(1,1) = 1; l(1,2) = 2; cv::Mat_<double> k = Matrix::kronecker(a,l); std::cout << "mat cv Kronecker" << std::endl << Matrix(k); Matrix sub(3,6); sub = submatrix_(K,2,4); std::cout << "Submatix of K rows 2 to 4 is : \n" << sub; std::cout << "now test solving a linear system using SVD: " << std::endl; std::cout << "A*x = b; A = U*D*V', inv(A) = V*inv(D)*U', x = V*inv(D)*U'*b" << std::endl; //if element in diagonal of D is 0 set element in inv(D) to 0 proof in numerical recipies M[0][0] = 4; M[0][1] = 1.5; M[0][2] = 2; M[1][0] = 3; M[1][1] = 3; M[1][2] = 1; M[2][0] = 2; M[2][1] = 1; M[2][2] = 5; b[0][0] = 10.6; b[1][0] = 11.3; b[2][0] = 9; //x should be 1.5, 2, 0.8 Matrix result = solveLinSysSvd(M,b); std::cout << " The result of the linear system is : " << std::endl; std::cout << result; A = Matrix::matrix_mult(Matrix::eye(2),Matrix::matrix_mult(Matrix::eye(2),Matrix::eye(2))); cv::Mat_<double> aj(2,2); cv::Mat_<double> bj(2,1); aj(0,0) = 2; aj(0,1) = 1; aj(1,0) = 5; aj(1,1) = 7; bj(0,0) = 11; bj(1,0) = 13; cv::Mat_<double> xj(2,1); Matrix::jacobi(aj,bj,xj); std::cout << "jacobi " << Matrix(xj); }
void cvUpdateTracks(CvBlobs const &blobs, CvTracks &tracks, const double thDistance, const unsigned int thInactive, const unsigned int thActive) { CV_FUNCNAME("cvUpdateTracks"); __CV_BEGIN__; unsigned int nBlobs = blobs.size(); unsigned int nTracks = tracks.size(); CvID *close = new unsigned int[(nBlobs+2)*(nTracks+2)]; try { // Inicialization: unsigned int i=0; for (CvBlobs::const_iterator it = blobs.begin(); it!=blobs.end(); ++it, i++) { AB(i) = 0; IB(i) = it->second->label; } CvID maxTrackID = 0; unsigned int j=0; for (CvTracks::const_iterator jt = tracks.begin(); jt!=tracks.end(); ++jt, j++) { AT(j) = 0; IT(j) = jt->second->id; if (jt->second->id > maxTrackID) maxTrackID = jt->second->id; } // Proximity matrix calculation and "used blob" list inicialization: for (i=0; i<nBlobs; i++) for (j=0; j<nTracks; j++) if (C(i, j) = (distantBlobTrack(B(i), T(j)) < thDistance)) { AB(i)++; AT(j)++; } // Detect inactive tracks for (j=0; j<nTracks; j++) { unsigned int c = AT(j); if (c==0) { // Inactive track. CvTrack *track = T(j); track->inactive++; track->label = 0; } } // Detect new tracks for (i=0; i<nBlobs; i++) { unsigned int c = AB(i); if (c==0) { // New track. maxTrackID++; CvBlob *blob = B(i); CvTrack *track = new CvTrack; track->id = maxTrackID; track->label = blob->label; track->minx = blob->minx; track->miny = blob->miny; track->maxx = blob->maxx; track->maxy = blob->maxy; track->centroid = blob->centroid; track->lifetime = 0; track->active = 0; track->inactive = 0; tracks.insert(CvIDTrack(maxTrackID, track)); } } // Clustering for (j=0; j<nTracks; j++) { unsigned int c = AT(j); if (c) { list<CvTrack*> tt; tt.push_back(T(j)); list<CvBlob*> bb; getClusterForTrack(j, close, nBlobs, nTracks, blobs, tracks, bb, tt); // Select track CvTrack *track; unsigned int area = 0; for (list<CvTrack*>::const_iterator it=tt.begin(); it!=tt.end(); ++it) { CvTrack *t = *it; unsigned int a = (t->maxx-t->minx)*(t->maxy-t->miny); if (a>area) { area = a; track = t; } } // Select blob CvBlob *blob; area = 0; //cout << "Matching blobs: "; for (list<CvBlob*>::const_iterator it=bb.begin(); it!=bb.end(); ++it) { CvBlob *b = *it; //cout << b->label << " "; if (b->area>area) { area = b->area; blob = b; } } track->label = blob->label; track->centroid = blob->centroid; track->minx = blob->minx; track->miny = blob->miny; track->maxx = blob->maxx; track->maxy = blob->maxy; if (track->inactive) track->active = 0; track->inactive = 0; // Others to inactive for (list<CvTrack*>::const_iterator it=tt.begin(); it!=tt.end(); ++it) { CvTrack *t = *it; if (t!=track) { t->inactive++; t->label = 0; } } } } for (CvTracks::iterator jt=tracks.begin(); jt!=tracks.end();) if ((jt->second->inactive>=thInactive)||((jt->second->inactive)&&(thActive)&&(jt->second->active<thActive))) { delete jt->second; tracks.erase(jt++); } else { jt->second->lifetime++; if (!jt->second->inactive) jt->second->active++; ++jt; } } catch (...) { delete[] close; throw; } delete[] close; __CV_END__; }
inline void HouseholderSolve ( Orientation orientation, DistMatrix<Complex<R> >& A, const DistMatrix<Complex<R> >& B, DistMatrix<Complex<R> >& X ) { #ifndef RELEASE PushCallStack("HouseholderSolve"); if( A.Grid() != B.Grid() || A.Grid() != X.Grid() ) throw std::logic_error("Grids do not match"); if( orientation == TRANSPOSE ) throw std::logic_error("Invalid orientation"); #endif typedef Complex<R> C; const Grid& g = A.Grid(); // TODO: Add scaling const int m = A.Height(); const int n = A.Width(); DistMatrix<C,MD,STAR> t( g ); if( orientation == NORMAL ) { if( m != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X = B; // Apply Q' to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, FORWARD, CONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( n, X.Width() ); // Solve against R (checking for singularities) DistMatrix<C> AT( g ); LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, NORMAL, NON_UNIT, C(1), AT, X, true ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in it) LQ( A, t ); // Copy B into X X.ResizeTo( n, B.Width() ); DistMatrix<C> XT( g ), XB( g ); PartitionDown( X, XT, XB, m ); XT = B; Zero( XB ); // Solve against L (checking for singularities) DistMatrix<C> AL( g ); LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, NORMAL, NON_UNIT, C(1), AL, XT, true ); // Apply Q' to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, BACKWARD, CONJUGATED, 0, A, t, X ); } } else // orientation == ADJOINT { if( n != B.Height() ) throw std::logic_error("A and B do not conform"); if( m >= n ) { // Overwrite A with its packed QR factorization (and store the // corresponding Householder scalars in t) QR( A, t ); // Copy B into X X.ResizeTo( m, B.Width() ); DistMatrix<C> XT( g ), XB( g ); PartitionDown( X, XT, XB, n ); XT = B; Zero( XB ); // Solve against R' (checking for singularities) DistMatrix<C> AT( g ); LockedView( AT, A, 0, 0, n, n ); Trsm( LEFT, UPPER, ADJOINT, NON_UNIT, C(1), AT, XT, true ); // Apply Q to X ApplyPackedReflectors ( LEFT, LOWER, VERTICAL, BACKWARD, UNCONJUGATED, 0, A, t, X ); } else { // Overwrite A with its packed LQ factorization (and store the // corresponding Householder scalars in t) LQ( A, t ); // Copy B into X X = B; // Apply Q to X ApplyPackedReflectors ( LEFT, UPPER, HORIZONTAL, FORWARD, UNCONJUGATED, 0, A, t, X ); // Shrink X to its new height X.ResizeTo( m, X.Width() ); // Solve against L' (check for singularities) DistMatrix<C> AL( g ); LockedView( AL, A, 0, 0, m, m ); Trsm( LEFT, LOWER, ADJOINT, NON_UNIT, C(1), AL, X, true ); } } #ifndef RELEASE PopCallStack(); #endif }
int edit_at(int row, int col, int max_length, int screen_length, char *string_to_edit, char filler, int fixed, int (*check_func)( char ), int convert_upper, int screen_height) { #define BLINK 2 #define NOBLINK 3 char *edit_string; int blink,logical_cursor, cursor, last_char, insert; int first_char_on_screen; int finished, esc_pressed; long key; int upper,lower; char asc; int loop,loop2/*,delete_loop*/; char *blank=(char *)malloc(screen_length+1); edit_bottom_title(screen_height); for(loop=0;loop<=screen_length;loop++) blank[loop]=' '; blank[loop]='\0'; /* set up the string - create space, copy old, pad out new with filler */ edit_string=(char *)malloc(max_length+1); strcpy(edit_string,string_to_edit); logical_cursor=0; while(edit_string[logical_cursor]!='\0') logical_cursor++; last_char=logical_cursor; /* for(;logical_cursor<max_length;logical_cursor++) edit_string[logical_cursor]=filler; */ edit_string[logical_cursor]='\0'; cursor=0; logical_cursor=0; first_char_on_screen=0; insert=1; esc_pressed=0; if(insert==1) { blink=BLINK; Cursconf(blink,0); } else { blink=NOBLINK; Cursconf(blink,0); } /* print string */ print_at_from(row,col,first_char_on_screen,screen_length,edit_string); SHOW_CURSOR AT(row,col) finished=0; while(!finished) { /* AT(0,0) printf(" %d ",last_char); */ while(!Bconstat(2)) ; key=Bconin(2); upper=(int)(key>>16); lower=(int)(key%256); asc=(char)lower&0x00FF; switch(upper) { case 0x4D: /* right arrow */ if(logical_cursor<last_char/*max_length*/) { logical_cursor++; if(cursor<screen_length) { cursor++; } else { cursor=screen_length; if(first_char_on_screen+screen_length<last_char /*max_length*/ && !fixed) { first_char_on_screen++; print_at_from(row,col,first_char_on_screen,screen_length,edit_string); } } } else { BELL logical_cursor=last_char/*max_length*/; } break; case 0x4B: /* left arrow */ if(logical_cursor>0) { logical_cursor--; } else { BELL logical_cursor=0; } if(cursor>0) { cursor--; } else { cursor=0; if(first_char_on_screen>0 && !fixed) { first_char_on_screen--; print_at_from(row,col,first_char_on_screen,screen_length,edit_string); } } /* logical_cursor--; cursor--; if(logical_cursor<0) { BELL logical_cursor=0; cursor=0; } else if(cursor<0) { cursor=0; if(first_char_on_screen>0 && !fixed) { first_char_on_screen--; print_at_from(row,col,first_char_on_screen,screen_length,edit_string); } }*/ break; case 0x1C: /* return */ finished=1; /* print_at_from(row,col,0,screen_length,blank); */ AT(row,col) printf("%s",blank+1); /* printf("%s",blank);*/ AT(row,col) strcpy(string_to_edit,edit_string); print_at_from(row,col,0,screen_length,string_to_edit); break; case 0x0E: /* backspace */ if(logical_cursor>0) { loop=logical_cursor-1; loop2=logical_cursor; while(loop2<=last_char) { edit_string[loop]=edit_string[loop2]; loop++; loop2++; } edit_string[last_char]='\0'; last_char--; logical_cursor--; if(cursor>0 && last_char<screen_length) { cursor--; } else { if(cursor==0) { first_char_on_screen--; if(first_char_on_screen<0) first_char_on_screen=0; } else { cursor--; } } if(first_char_on_screen+screen_length>last_char) { /* for(delete_loop=0;cursor+delete_loop<screen_length;delete_loop++) { AT(row,delete_loop+col) Bconout(2,' '); }*/ AT(row,col+cursor) printf("%s",blank+cursor+1); } } else BELL break; case 0x53: /* delete */ if(logical_cursor<last_char) { loop=logical_cursor; loop2=logical_cursor+1; while(loop2<=last_char) { edit_string[loop]=edit_string[loop2]; loop++; loop2++; } edit_string[last_char]='\0'; last_char--; if(first_char_on_screen+screen_length>last_char) { /* for(delete_loop=0;cursor+delete_loop<=screen_length;delete_loop++) { AT(row,delete_loop+col) Bconout(2,' '); } */ AT(row,col+cursor) printf("%s",blank+cursor+1); } } else BELL break; case 0x01: /* escape */ finished=1; esc_pressed=1; /* print_at_from(row,col,0,screen_length,blank); *//* AT(row,col) CLEAR_EOL */ AT(row,col) printf("%s",blank+1); /* printf("%s",blank); */ AT(row,col) print_at_from(row,col,0,screen_length,string_to_edit); break; case 0x52: /* insert */ BELL if(insert==1) insert=0; else insert=1; if(insert==1) { blink=BLINK; Cursconf(blink,0); } else { blink=NOBLINK; Cursconf(blink,0); } break; default: if((*check_func)(asc)) { if(insert==1) { if(last_char<max_length) { loop=last_char-1; loop2=last_char; while(loop2>logical_cursor) { edit_string[loop2]=edit_string[loop]; loop--; loop2--; } last_char++; edit_string[last_char]='\0'; if(convert_upper) edit_string[logical_cursor]=toupper(asc); else edit_string[logical_cursor]=asc; logical_cursor++; cursor++; if(cursor>screen_length) { cursor=screen_length; if(first_char_on_screen+screen_length<last_char) first_char_on_screen++; } } else BELL } else { if(logical_cursor<last_char) { if(convert_upper) edit_string[logical_cursor]=toupper(asc); else edit_string[logical_cursor]=asc; logical_cursor++; cursor++; if(cursor>screen_length) { first_char_on_screen++; cursor=screen_length; } } else { if(logical_cursor>=last_char && last_char<max_length) { if(convert_upper) edit_string[last_char]=toupper(asc); else edit_string[last_char]=asc; logical_cursor=last_char; last_char++; logical_cursor++; if(logical_cursor>max_length) logical_cursor=max_length; edit_string[last_char]='\0'; cursor++; if(cursor>screen_length) { cursor=screen_length; if(first_char_on_screen+screen_length<max_length) first_char_on_screen++; } } else BELL } } } break; } if(finished!=1) { /* if(last_char<screen_length) { AT(row,col) CLEAR_EOL }*/ print_at_from(row,col,first_char_on_screen,screen_length,edit_string); AT(row,col+cursor) }
/*------------------------------------------------------------------------- * Function: test_fill * * Purpose: Tests the H5VM_hyper_fill() function. * * Return: Success: SUCCEED * * Failure: FAIL * * Programmer: Robb Matzke * Saturday, October 11, 1997 * *------------------------------------------------------------------------- */ static herr_t test_fill(size_t nx, size_t ny, size_t nz, size_t di, size_t dj, size_t dk, size_t ddx, size_t ddy, size_t ddz) { uint8_t *dst = NULL; /*destination array */ hsize_t hs_size[3]; /*hyperslab size */ hsize_t dst_size[3]; /*destination total size */ hsize_t dst_offset[3]; /*offset of hyperslab in dest */ unsigned ref_value; /*reference value */ unsigned acc; /*accumulator */ size_t i, j, k, dx, dy, dz; /*counters */ size_t u, v, w; unsigned ndims; /*hyperslab dimensionality */ char dim[64], s[256]; /*temp string */ unsigned fill_value; /*fill value */ /* * Dimensionality. */ if(0 == nz) { if(0 == ny) { ndims = 1; ny = nz = 1; sprintf(dim, "%lu", (unsigned long) nx); } /* end if */ else { ndims = 2; nz = 1; sprintf(dim, "%lux%lu", (unsigned long) nx, (unsigned long) ny); } /* end else */ } /* end if */ else { ndims = 3; sprintf(dim, "%lux%lux%lu", (unsigned long) nx, (unsigned long) ny, (unsigned long) nz); } /* end else */ sprintf(s, "Testing hyperslab fill %-11s variable hyperslab", dim); printf("%-70s", s); fflush(stdout); /* Allocate array */ if(NULL == (dst = (uint8_t *)HDcalloc((size_t)1, nx * ny * nz))) TEST_ERROR init_full(dst, nx, ny, nz); for(i = 0; i < nx; i += di) { for(j = 0; j < ny; j += dj) { for(k = 0; k < nz; k += dk) { for(dx = 1; dx <= nx - i; dx += ddx) { for(dy = 1; dy <= ny - j; dy += ddy) { for(dz = 1; dz <= nz - k; dz += ddz) { /* Describe the hyperslab */ dst_size[0] = nx; dst_size[1] = ny; dst_size[2] = nz; dst_offset[0] = i; dst_offset[1] = j; dst_offset[2] = k; hs_size[0] = dx; hs_size[1] = dy; hs_size[2] = dz; for(fill_value = 0; fill_value < 256; fill_value += 64) { /* * Initialize the full array, then subtract the * original * fill values and add the new ones. */ ref_value = init_full(dst, nx, ny, nz); for(u = (size_t)dst_offset[0]; u < dst_offset[0] + dx; u++) for(v = (size_t)dst_offset[1]; v < dst_offset[1] + dy; v++) for(w = (size_t)dst_offset[2]; w < dst_offset[2] + dz; w++) ref_value -= dst[u * ny * nz + v * nz + w]; ref_value += fill_value * (unsigned)dx * (unsigned)dy * (unsigned)dz; /* Fill the hyperslab with some value */ H5VM_hyper_fill(ndims, hs_size, dst_size, dst_offset, dst, fill_value); /* * Sum the array and compare it to the * reference value. */ acc = 0; for(u = 0; u < nx; u++) for(v = 0; v < ny; v++) for(w = 0; w < nz; w++) acc += dst[u * ny * nz + v * nz + w]; if(acc != ref_value) { H5_FAILED() if(!HDisatty(1)) { /* * Print debugging info unless output * is going directly to a terminal. */ AT(); printf(" acc != ref_value\n"); printf(" i=%lu, j=%lu, k=%lu, " "dx=%lu, dy=%lu, dz=%lu, " "fill=%d\n", (unsigned long)i, (unsigned long)j, (unsigned long)k, (unsigned long)dx, (unsigned long)dy, (unsigned long)dz, fill_value); print_ref(nx, ny, nz); printf("\n Result is:\n"); print_array(dst, nx, ny, nz); } /* end if */ goto error; } /* end if */ } /* end for */ } /* end for */ } /* end for */ } /* end for */ } /* end for */ } /* end for */ } /* end for */
/* - backref - figure out what matched what, figuring in back references */ static char * /* == stop (success) or NULL (failure) */ backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev, int rec) /* PLUS nesting level */ { int i; sopno ss; /* start sop of current subRE */ char *sp; /* start of string matched by it */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ char *ssp; /* start of string matched by subsubRE */ char *dp; size_t len; int hard; sop s; ut64 offsave; cset *cs; AT("back", start, stop, startst, stopst); sp = start; /* get as far as we can with easy stuff */ hard = 0; for (ss = startst; !hard && ss < stopst; ss++) switch (OP(s = m->g->strip[ss])) { case OCHAR: if (sp == stop || *sp++ != (char)OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); sp++; break; case OANYOF: cs = &m->g->sets[OPND(s)]; if (sp == stop || !CHIN(cs, *sp++)) return(NULL); break; case OBOL: if ( (sp == m->beginp && !(m->eflags&R_REGEX_NOTBOL)) || (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags&R_REGEX_NEWLINE)) ) { /* yes */ } else return(NULL); break; case OEOL: if ( (sp == m->endp && !(m->eflags&R_REGEX_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags&R_REGEX_NEWLINE)) ) { /* yes */ } else return(NULL); break; case OBOW: if (( (sp == m->beginp && !(m->eflags&R_REGEX_NOTBOL)) || (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags&R_REGEX_NEWLINE)) || (sp > m->beginp && !ISWORD((unsigned char)*(sp-1))) ) && (sp < m->endp && ISWORD((unsigned char)*sp)) ) { /* yes */ } else return(NULL); break; case OEOW: if (( (sp == m->endp && !(m->eflags&R_REGEX_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags&R_REGEX_NEWLINE)) || (sp < m->endp && !ISWORD((unsigned char)*sp)) ) && (sp > m->beginp && ISWORD((unsigned char)*(sp-1))) ) { /* yes */ } else return(NULL); break; case O_QUEST: break; case OOR1: /* matches null but needs to skip */ ss++; s = m->g->strip[ss]; do { assert(OP(s) == OOR2); ss += OPND(s); } while (OP(s = m->g->strip[ss]) != O_CH); /* note that the ss++ gets us past the O_CH */ break; default: /* have to make a choice */ hard = 1; break; } if (!hard) { /* that was it! */ if (sp != stop) return(NULL); return(sp); } ss--; /* adjust for the for's final increment */ /* the hard stuff */ AT("hard", sp, stop, ss, stopst); s = m->g->strip[ss]; switch (OP(s)) { case OBACK_: /* the vilest depths */ i = OPND(s); assert(0 < i && i <= m->g->nsub); if (m->pmatch[i].rm_eo == -1) return(NULL); assert(m->pmatch[i].rm_so != -1); len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; if (len == 0 && rec++ > MAX_RECURSION) return(NULL); assert(stop - m->beginp >= len); if (sp > stop - len) return(NULL); /* not enough left to match */ ssp = m->offp + m->pmatch[i].rm_so; if (memcmp(sp, ssp, len) != 0) return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); break; case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); /* not */ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); /* try another pass */ m->lastpos[lev] = sp; dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); if (dp == NULL) return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); else return(dp); break; case OCH_: /* find the right one, if any */ ssub = ss + 1; esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ dp = backref(m, sp, stop, ssub, esub, lev, rec); if (dp != NULL) return(dp); /* that one missed, try next one */ if (OP(m->g->strip[esub]) == O_CH) return(NULL); /* there is none */ esub++; assert(OP(m->g->strip[esub]) == OOR2); ssub = esub + 1; esub += OPND(m->g->strip[esub]); if (OP(m->g->strip[esub]) == OOR2) esub--; else assert(OP(m->g->strip[esub]) == O_CH); } break; case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; return(NULL); break; case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; return(NULL); break; default: /* uh oh */ assert(nope); break; } /* "can't happen" */ assert(nope); /* NOTREACHED */ return NULL; }
/* - slow - step through the string more deliberately == static const char *slow(struct match *m, const char *start, \ == const char *stop, sopno startst, sopno stopst); */ static const char * /* where it ended */ slow( struct match *m, const char *start, const char *stop, sopno startst, sopno stopst) { states st = m->st; states empty = m->empty; states tmp = m->tmp; const char *p = start; wint_t c; wint_t lastc; /* previous c */ wint_t flagch; int i; const char *matchp; /* last p at which a match ended */ size_t clen; AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; if (start == m->beginp) c = OUT; else { /* * XXX Wrong if the previous character was multi-byte. * Newline never is (in encodings supported by FreeBSD), * so this only breaks the ISWORD tests below. */ c = (uch)*(start - 1); } for (;;) { /* next character */ lastc = c; if (p == m->endp) { c = OUT; clen = 0; } else clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; i = 0; if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || (lastc == OUT && !(m->eflags®_NOTBOL)) ) { flagch = BOL; i = m->g->nbol; } if ( (c == '\n' && m->g->cflags®_NEWLINE) || (c == OUT && !(m->eflags®_NOTEOL)) ) { flagch = (flagch == BOL) ? BOLEOL : EOL; i += m->g->neol; } if (i != 0) { for (; i > 0; i--) st = step(m->g, startst, stopst, st, flagch, st); SP("sboleol", st, c); } /* how about a word boundary? */ if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && (c != OUT && ISWORD(c)) ) { flagch = BOW; } if ( (lastc != OUT && ISWORD(lastc)) && (flagch == EOL || (c != OUT && !ISWORD(c))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { st = step(m->g, startst, stopst, st, flagch, st); SP("sboweow", st, c); } /* are we done? */ if (ISSET(st, stopst)) matchp = p; if (EQ(st, empty) || p == stop || clen > stop - p) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ ASSIGN(tmp, st); ASSIGN(st, empty); assert(c != OUT); st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); p += clen; } return(matchp); }
PUBLIC INLINE void apply_closure(VMSTATE vms, OVECTOR closure, VECTOR argvec) { if (closure == NULL || TAGGEDP(closure)) { vm_raise(vms, (OBJ) newsym("invalid-callable"), (OBJ) closure); } else if (closure->type == T_PRIM) { int primargc; prim_fn fnp = lookup_prim(NUM(AT(closure, PR_NUMBER)), &primargc); if (fnp != NULL) { if ((primargc >= 0 && argvec->_.length-1 != primargc) || (primargc < 0 && argvec->_.length-1 < -primargc)) vm_raise(vms, (OBJ) newsym("wrong-argc"), (OBJ) closure); else vms->r->vm_acc = fnp(vms, argvec); } else vm_raise(vms, (OBJ) newsym("invalid-primitive"), AT(closure, PR_NUMBER)); } else if (closure->type == T_CLOSURE) { OVECTOR meth = (OVECTOR) AT(closure, CL_METHOD); if (!MS_CAN_X(meth, vms->r->vm_effuid)) { vm_raise(vms, (OBJ) newsym("no-permission"), AT(meth, ME_NAME)); return; } if (argvec->_.length-1 != NUM(AT(meth, ME_ARGC))) { vm_raise(vms, (OBJ) newsym("wrong-argc"), (OBJ) meth); return; } push_frame(vms); vms->r->vm_env = argvec; ATPUT(vms->r->vm_env, 0, AT(meth, ME_ENV)); vms->r->vm_lits = (VECTOR) AT(meth, ME_LITS); vms->r->vm_code = (BVECTOR) AT(meth, ME_CODE); vms->r->vm_self = (OBJECT) AT(closure, CL_SELF); vms->c.vm_ip = 0; vms->r->vm_method = meth; if (NUM(AT(meth, ME_FLAGS)) & O_SETUID) vms->r->vm_effuid = (OBJECT) AT(meth, ME_OWNER); } else if (closure->type == T_CONTINUATION) { int i; VECTOR cstk = (VECTOR) AT(closure, CONT_STACK); for (i = 0; i < cstk->_.length; i++) ATPUT(vms->r->vm_stack, i, AT(cstk, i)); vms->c.vm_top = cstk->_.length; restoreframe(vms, (OVECTOR) AT(closure, CONT_FRAME)); vms->r->vm_acc = AT(argvec, 1); } else { vm_raise(vms, (OBJ) newsym("invalid-callable"), (OBJ) closure); } }
inline void internal::GemmTNC ( Orientation orientationOfA, T alpha, const DistMatrix<T,MC,MR>& A, const DistMatrix<T,MC,MR>& B, T beta, DistMatrix<T,MC,MR>& C ) { #ifndef RELEASE PushCallStack("internal::GemmTNC"); if( A.Grid() != B.Grid() || B.Grid() != C.Grid() ) throw std::logic_error ("{A,B,C} must be distributed over the same grid"); if( orientationOfA == NORMAL ) throw std::logic_error("GemmTNC assumes A is (Conjugate)Transposed"); if( A.Width() != C.Height() || B.Width() != C.Width() || A.Height() != B.Height() ) { std::ostringstream msg; msg << "Nonconformal GemmTNC: \n" << " A ~ " << A.Height() << " x " << A.Width() << "\n" << " B ~ " << B.Height() << " x " << B.Width() << "\n" << " C ~ " << C.Height() << " x " << C.Width() << "\n"; throw std::logic_error( msg.str().c_str() ); } #endif const Grid& g = A.Grid(); // Matrix views DistMatrix<T,MC,MR> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T,MC,MR> BT(g), B0(g), BB(g), B1(g), B2(g); // Temporary distributions DistMatrix<T,STAR,MC> A1_STAR_MC(g); DistMatrix<T,STAR,MR> B1_STAR_MR(g); // Start the algorithm Scal( beta, C ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); while( AB.Height() > 0 ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); A1_STAR_MC.AlignWith( C ); B1_STAR_MR.AlignWith( C ); //--------------------------------------------------------------------// A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR] B1_STAR_MR = B1; // B1[*,MR] <- B1[MC,MR] // C[MC,MR] += alpha (A1[*,MC])^T B1[*,MR] // = alpha (A1^T)[MC,*] B1[*,MR] internal::LocalGemm ( orientationOfA, NORMAL, alpha, A1_STAR_MC, B1_STAR_MR, (T)1, C ); //--------------------------------------------------------------------// A1_STAR_MC.FreeAlignments(); B1_STAR_MR.FreeAlignments(); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); } #ifndef RELEASE PopCallStack(); #endif }
PRIVATE INLINE OBJ peek(VMSTATE vms) { return AT(vms->r->vm_stack, vms->c.vm_top - 1); }
CMaterial CMaterialManager::LoadMaterial(const VfsPath& pathname) { if (pathname.empty()) return CMaterial(); std::map<VfsPath, CMaterial>::iterator iter = m_Materials.find(pathname); if (iter != m_Materials.end()) return iter->second; CXeromyces xeroFile; if (xeroFile.Load(g_VFS, pathname, "material") != PSRETURN_OK) return CMaterial(); #define EL(x) int el_##x = xeroFile.GetElementID(#x) #define AT(x) int at_##x = xeroFile.GetAttributeID(#x) EL(alpha_blending); EL(alternative); EL(define); EL(shader); EL(uniform); EL(renderquery); EL(required_texture); EL(conditional_define); AT(effect); AT(if); AT(define); AT(quality); AT(material); AT(name); AT(value); AT(type); AT(min); AT(max); AT(conf); #undef AT #undef EL CMaterial material; XMBElement root = xeroFile.GetRoot(); CPreprocessorWrapper preprocessor; preprocessor.AddDefine("CFG_FORCE_ALPHATEST", g_Renderer.m_Options.m_ForceAlphaTest ? "1" : "0"); CVector4D vec(qualityLevel,0,0,0); material.AddStaticUniform("qualityLevel", vec); XERO_ITER_EL(root, node) { int token = node.GetNodeName(); XMBAttributeList attrs = node.GetAttributes(); if (token == el_alternative) { CStr cond = attrs.GetNamedItem(at_if); if (cond.empty() || !preprocessor.TestConditional(cond)) { cond = attrs.GetNamedItem(at_quality); if (cond.empty()) continue; else { if (cond.ToFloat() <= qualityLevel) continue; } } material = LoadMaterial(VfsPath("art/materials") / attrs.GetNamedItem(at_material).FromUTF8()); break; } else if (token == el_alpha_blending) { material.SetUsesAlphaBlending(true); } else if (token == el_shader) { material.SetShaderEffect(attrs.GetNamedItem(at_effect)); } else if (token == el_define) { material.AddShaderDefine(CStrIntern(attrs.GetNamedItem(at_name)), CStrIntern(attrs.GetNamedItem(at_value))); } else if (token == el_conditional_define) { std::vector<float> args; CStr type = attrs.GetNamedItem(at_type).c_str(); int typeID = -1; if (type == CStr("draw_range")) { typeID = DCOND_DISTANCE; float valmin = -1.0f; float valmax = -1.0f; CStr conf = attrs.GetNamedItem(at_conf); if (!conf.empty()) { CFG_GET_VAL("materialmgr." + conf + ".min", valmin); CFG_GET_VAL("materialmgr." + conf + ".max", valmax); } else { CStr dmin = attrs.GetNamedItem(at_min); if (!dmin.empty()) valmin = attrs.GetNamedItem(at_min).ToFloat(); CStr dmax = attrs.GetNamedItem(at_max); if (!dmax.empty()) valmax = attrs.GetNamedItem(at_max).ToFloat(); } args.push_back(valmin); args.push_back(valmax); if (valmin >= 0.0f) { std::stringstream sstr; sstr << valmin; material.AddShaderDefine(CStrIntern(conf + "_MIN"), CStrIntern(sstr.str())); } if (valmax >= 0.0f) { std::stringstream sstr; sstr << valmax; material.AddShaderDefine(CStrIntern(conf + "_MAX"), CStrIntern(sstr.str())); } } material.AddConditionalDefine(attrs.GetNamedItem(at_name).c_str(), attrs.GetNamedItem(at_value).c_str(), typeID, args); } else if (token == el_uniform) { std::stringstream str(attrs.GetNamedItem(at_value)); CVector4D vec; str >> vec.X >> vec.Y >> vec.Z >> vec.W; material.AddStaticUniform(attrs.GetNamedItem(at_name).c_str(), vec); }
extern "C" magma_int_t magma_sgessm_gpu( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, float *dL1, magma_int_t lddl1, float *dL, magma_int_t lddl, float *dA, magma_int_t ldda, magma_int_t *info) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= SGESSM applies the factors L computed by SGETRF_INCPIV to a real M-by-N tile A. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. K (input) INTEGER The number of columns of the matrix L. K >= 0. IB (input) INTEGER The inner-blocking size. IB >= 0. IPIV (input) INTEGER array on the cpu. The pivot indices array of size K as returned by SGETRF_INCPIV. dL1 (input) DOUBLE COMPLEX array, dimension(LDDL1, N) The IB-by-K matrix in which is stored L^(-1) as returned by GETRF_INCPIV LDDL1 (input) INTEGER The leading dimension of the array L1. LDDL1 >= max(1,2*IB). dL (input) DOUBLE COMPLEX array, dimension(LDDL, N) The M-by-K lower triangular tile on the gpu. LDDL (input) INTEGER The leading dimension of the array L. LDDL >= max(1,M). dA (input/output) DOUBLE COMPLEX array, dimension (LDDA, N) On entry, the M-by-N tile A on the gpu. On exit, updated by the application of L on the gpu. ===================================================================== */ #define AT(i,j) (dAT + (i)*ldda + (j) ) #define L(i,j) (dL + (i) + (j)*lddl ) #define dL1(j) (dL1 + (j)*lddl1) float c_one = MAGMA_S_ONE; float c_neg_one = MAGMA_S_NEG_ONE; int i, s, sb; float *dAT; /* Check arguments */ *info = 0; if (m < 0) *info = -1; else if (n < 0) *info = -2; else if (ldda < max(1,m)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) return *info; if ( (storev == 'C') || (storev == 'c') ) { magmablas_sgetmo_in( dA, dAT, ldda, m, n ); } else { dAT = dA; } s = k / ib; for(i = 0; i < k; i += ib) { sb = min(ib, k-i); magmablas_slaswp( n, dAT, ldda, i+1, i+sb, ipiv, 1 ); #ifndef WITHOUTTRTRI magma_strmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, dL1(i), lddl1, AT(i, 0), ldda); #else magma_strsm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit, n, sb, c_one, L( i, i), lddl, AT(i, 0), ldda); #endif if ( (i+sb) < m) { magma_sgemm( MagmaNoTrans, MagmaTrans, n, m-(i+sb), sb, c_neg_one, AT(i, 0), ldda, L( i+sb, i), lddl, c_one, AT(i+sb, 0), ldda ); } } if ( (storev == 'C') || (storev == 'c') ) { magmablas_sgetmo_in( dA, dAT, ldda, m, n ); } return *info; /* End of MAGMA_SGETRF_GPU */ }
static DF1(fork1){DECLFG;A hs=sv->h;AF h1=VAV(hs)->f1; PREF1(fork1); R CLBKCO==ID(fs) ? g1(h1(w,hs),gs) : (NOUN&AT(fs) ? g2(fs,h1(w,hs),gs) : g2(f1(w,fs),h1(w,hs),gs)); }
bool CShaderManager::NewProgram(const char* name, const CShaderDefines& baseDefines, CShaderProgramPtr& program) { PROFILE2("loading shader"); PROFILE2_ATTR("name: %s", name); if (strncmp(name, "fixed:", 6) == 0) { program = CShaderProgramPtr(CShaderProgram::ConstructFFP(name+6, baseDefines)); if (!program) return false; program->Reload(); return true; } VfsPath xmlFilename = L"shaders/" + wstring_from_utf8(name) + L".xml"; CXeromyces XeroFile; PSRETURN ret = XeroFile.Load(g_VFS, xmlFilename); if (ret != PSRETURN_OK) return false; #if USE_SHADER_XML_VALIDATION { TIMER_ACCRUE(tc_ShaderValidation); // Serialize the XMB data and pass it to the validator XML_Start(); XML_SetPrettyPrint(false); XML_WriteXMB(XeroFile); bool ok = m_Validator.ValidateEncoded(wstring_from_utf8(name), XML_GetOutput()); if (!ok) return false; } #endif // Define all the elements and attributes used in the XML file #define EL(x) int el_##x = XeroFile.GetElementID(#x) #define AT(x) int at_##x = XeroFile.GetAttributeID(#x) EL(attrib); EL(define); EL(fragment); EL(stream); EL(uniform); EL(vertex); AT(file); AT(if); AT(loc); AT(name); AT(semantics); AT(type); AT(value); #undef AT #undef EL CPreprocessorWrapper preprocessor; preprocessor.AddDefines(baseDefines); XMBElement Root = XeroFile.GetRoot(); bool isGLSL = (Root.GetAttributes().GetNamedItem(at_type) == "glsl"); VfsPath vertexFile; VfsPath fragmentFile; CShaderDefines defines = baseDefines; std::map<CStrIntern, int> vertexUniforms; std::map<CStrIntern, CShaderProgram::frag_index_pair_t> fragmentUniforms; std::map<CStrIntern, int> vertexAttribs; int streamFlags = 0; XERO_ITER_EL(Root, Child) { if (Child.GetNodeName() == el_define) { defines.Add(Child.GetAttributes().GetNamedItem(at_name).c_str(), Child.GetAttributes().GetNamedItem(at_value).c_str()); } else if (Child.GetNodeName() == el_vertex) { vertexFile = L"shaders/" + Child.GetAttributes().GetNamedItem(at_file).FromUTF8(); XERO_ITER_EL(Child, Param) { XMBAttributeList Attrs = Param.GetAttributes(); CStr cond = Attrs.GetNamedItem(at_if); if (!cond.empty() && !preprocessor.TestConditional(cond)) continue; if (Param.GetNodeName() == el_uniform) { vertexUniforms[CStrIntern(Attrs.GetNamedItem(at_name))] = Attrs.GetNamedItem(at_loc).ToInt(); } else if (Param.GetNodeName() == el_stream) { CStr StreamName = Attrs.GetNamedItem(at_name); if (StreamName == "pos") streamFlags |= STREAM_POS; else if (StreamName == "normal") streamFlags |= STREAM_NORMAL; else if (StreamName == "color") streamFlags |= STREAM_COLOR; else if (StreamName == "uv0") streamFlags |= STREAM_UV0; else if (StreamName == "uv1") streamFlags |= STREAM_UV1; else if (StreamName == "uv2") streamFlags |= STREAM_UV2; else if (StreamName == "uv3") streamFlags |= STREAM_UV3; } else if (Param.GetNodeName() == el_attrib) { int attribLoc = ParseAttribSemantics(Attrs.GetNamedItem(at_semantics)); vertexAttribs[CStrIntern(Attrs.GetNamedItem(at_name))] = attribLoc; } } }
static DF2(fork2){DECLFG;A hs=sv->h;AF h2=VAV(hs)->f2; PREF2(fork2); R CLBKCO==ID(fs) ? g1(h2(a,w,hs),gs) : (NOUN&AT(fs) ? g2(fs,h2(a,w,hs),gs) : g2(f2(a,w,fs),h2(a,w,hs),gs)); }
double Matrix3::at(unsigned int i, unsigned int j) const { //(!!!) Watch out for i and j out of bounds! return this->e_[ AT(i,j) ]; }
int showhelp( const char *topic, EVENT (*rtn)( EVENT ), HelpLangType lang ) { bool first; int err; char filename[_MAX_PATH]; const char *hfiles[] = { NULL, NULL }; char ext[_MAX_EXT]; char *buffer; char *helptopic; if( HelpFiles[0].name == NULL ) { return( HELP_NO_FILE ); } switch( lang ) { case HELPLANG_FRENCH: hotSpots[0].str = "F4=Sujet pr�c�dent"; hotSpots[1].str = "Sortir"; break; case HELPLANG_ENGLISH: break; } helpStack = NULL; currentColour = C_PLAIN; currentAttr = AT( ATTR_NORMAL ); /* initialize the tab filter */ tabFilter.tab = (unsigned (*)(void *,void *))help_in_tab; tabFilter.next = (a_tab_field *(*)(void *,void *))help_next_field; tabFilter.parm = helpTab; tabFilter.mousepos = (void *(*)(void *,ORD *, ORD *))uivmousepos; tabFilter.mouseparm = &helpScreen; tabFilter.first = helpTab; tabFilter.wrap = false; tabFilter.enter = false; _splitpath( HelpFiles[0].name, NULL, NULL, filename, ext ); strcat( filename, ext ); hfiles[0] = filename; if( topic != NULL ) { size_t len = strlen( topic ) + 1; helptopic = HelpMemAlloc( len ); memcpy( helptopic, topic, len ); } else { helptopic = NULL; } err = HELP_OK; first = true; while( helptopic != NULL || first ) { if( first || help_reinit( hfiles ) ) { err = do_showhelp( &helptopic, filename, rtn, first ); if( err == HELP_NO_SUBJECT ) { break; } } else { // cannot open help file for hyperlink buffer = HelpMemAlloc( 28 + strlen( filename ) ); sprintf( buffer, "Unable to open helpfile \"%s\".", filename ); ShowMsgBox( "Error", buffer ); HelpMemFree( buffer ); HelpMemFree( helptopic ); helptopic = HelpMemAlloc( strlen( helpStack->word ) + 1 ); strcpy( helptopic, helpStack->word ); strcpy( filename, helpStack->helpfname ); prevtopic(); } first = false; } if( helptopic != NULL ) HelpMemFree( helptopic ); return( err ); }