bool AVRDAGToDAGISel::selectMultiplication(llvm::SDNode *N) { SDLoc DL(N); MVT Type = N->getSimpleValueType(0); assert(Type == MVT::i8 && "unexpected value type"); bool isSigned = N->getOpcode() == ISD::SMUL_LOHI; unsigned MachineOp = isSigned ? AVR::MULSRdRr : AVR::MULRdRr; SDValue Lhs = N->getOperand(0); SDValue Rhs = N->getOperand(1); SDNode *Mul = CurDAG->getMachineNode(MachineOp, DL, MVT::Glue, Lhs, Rhs); SDValue InChain = CurDAG->getEntryNode(); SDValue InGlue = SDValue(Mul, 0); // Copy the low half of the result, if it is needed. if (N->hasAnyUseOfValue(0)) { SDValue CopyFromLo = CurDAG->getCopyFromReg(InChain, DL, AVR::R0, Type, InGlue); ReplaceUses(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); InGlue = CopyFromLo.getValue(2); } // Copy the high half of the result, if it is needed. if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = CurDAG->getCopyFromReg(InChain, DL, AVR::R1, Type, InGlue); ReplaceUses(SDValue(N, 1), CopyFromHi); InChain = CopyFromHi.getValue(1); InGlue = CopyFromHi.getValue(2); } CurDAG->RemoveDeadNode(N); // We need to clear R1. This is currently done (dirtily) // using a custom inserter. return true; }
/* Dump format 1 encoding table */ static void dumpFormat1(Format1 *format, IntX level) { IntX i; DLu(2, "format=", format->format); DLu(2, "count =", format->count); DL(3, (OUTPUTBUFF, "--- glyphId[index]=glyphId\n")); for (i = 0; i < format->count; i++) DL(3, (OUTPUTBUFF, "[%d]=%hu ", i, format->glyphId[i])); DL(3, (OUTPUTBUFF, "\n")); DL(3, (OUTPUTBUFF, "--- code[index]=code\n")); for (i = 0; i < format->count; i++) DL(3, (OUTPUTBUFF, "[%d]=%d ", i, format->code[i])); DL(3, (OUTPUTBUFF, "\n")); }
template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) { SDValue InFlag; SDValue Chain = N->getOperand(0); SDValue Callee = N->getOperand(1); unsigned LastOpNum = N->getNumOperands() - 1; // Direct calls are autogenerated. unsigned Op = Callee.getOpcode(); if (Op == ISD::TargetGlobalAddress || Op == ISD::TargetExternalSymbol) { return false; } // Skip the incoming flag if present if (N->getOperand(LastOpNum).getValueType() == MVT::Glue) { --LastOpNum; } SDLoc DL(N); Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Callee, InFlag); SmallVector<SDValue, 8> Ops; Ops.push_back(CurDAG->getRegister(AVR::R31R30, MVT::i16)); // Map all operands into the new node. for (unsigned i = 2, e = LastOpNum + 1; i != e; ++i) { Ops.push_back(N->getOperand(i)); } Ops.push_back(Chain); Ops.push_back(Chain.getValue(1)); SDNode *ResNode = CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1)); CurDAG->RemoveDeadNode(N); return true; }
template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) { // Use the STD{W}SPQRr pseudo instruction when passing arguments through // the stack on function calls for further expansion during the PEI phase. const StoreSDNode *ST = cast<StoreSDNode>(N); SDValue BasePtr = ST->getBasePtr(); // Early exit when the base pointer is a frame index node or a constant. if (isa<FrameIndexSDNode>(BasePtr) || isa<ConstantSDNode>(BasePtr) || BasePtr.isUndef()) { return false; } const RegisterSDNode *RN = dyn_cast<RegisterSDNode>(BasePtr.getOperand(0)); // Only stores where SP is the base pointer are valid. if (!RN || (RN->getReg() != AVR::SP)) { return false; } int CST = (int)cast<ConstantSDNode>(BasePtr.getOperand(1))->getZExtValue(); SDValue Chain = ST->getChain(); EVT VT = ST->getValue().getValueType(); SDLoc DL(N); SDValue Offset = CurDAG->getTargetConstant(CST, DL, MVT::i16); SDValue Ops[] = {BasePtr.getOperand(0), Offset, ST->getValue(), Chain}; unsigned Opc = (VT == MVT::i16) ? AVR::STDWSPQRr : AVR::STDSPQRr; SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, MVT::Other, Ops); // Transfer memory operands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = ST->getMemOperand(); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); CurDAG->RemoveDeadNode(N); return true; }
void DE(int x) { node* newNode = (node*)malloc(sizeof(node)); node* aux = (node*)malloc(sizeof(node)); if(head->data == x) DF(); if(tail->data == x) DL(); newNode = head; while(newNode->next != NULL) { aux = newNode->next; if(aux->data == x) { newNode->next = aux->next; } newNode = newNode->next; } }
void main() { int m; printf("1:复利计算\n"); printf("2:单利计算\n"); printf("3:求本金\n"); printf("4:求时间\n"); printf("5:求利率\n"); printf("请输入序号"); scanf("%d",&m); if(m==1) FL(); else if(m==2) DL(); else if(m==3) BJ(); else if(m==4) Time(); else if(m==5) LL(); }
void DE(int x) { NODE *p, *position,*obliterate; while(lista->head!=NULL&&lista->head->data==x) DF(); while(lista->tail!=NULL&&lista->tail->data==x) DL(); position=p=lista->head; p=p->next; while(p!=NULL) { while(p!=NULL&&p->data==x) { position=p; p->previous->next=p->next; p->next->previous=p->previous; p=p->next; free(position); } p=p->next; } }
S tos(O o){ S r,t;L z,i;switch(o->t){ case TD:r=alc(BZ)/*hope this is big enough!*/;sprintf(r,"%f",o->d);z=strlen(r)-1;while(r[z]=='0')r[z--]=0;if(r[z]=='.')r[z]=0;BK; case TS:r=alc(o->s.z+1);memcpy(r,o->s.s,o->s.z);r[o->s.z]=0;BK; case TA:r=alc(1);r[0]='[';z=1;for(i=0;i<len(o->a);++i){L l;if(i){r=rlc(r,z+1);r[z++]=',';}t=tos(o->a->st[i]);l=strlen(t);r=rlc(r,z+l);memcpy(r+z,t,l);z+=l;DL(t);}r=rlc(r,z+2);r[z]=']';r[z+1]=0;BK; case TCB:r=alc(o->s.z+3);r[0]='{';memcpy(r+1,o->s.s,o->s.z);memcpy(r+1+o->s.z,"}",2);BK; }R r; } //tostring (copies)
void MMFXDump(IntX level, LongN start) { IntX i, pos; Int16 tmp; Card8 *ptr; Card16 nMasters; DL(1, (OUTPUTBUFF, "### [MMFX] (%08lx)\n", start)); DLV(2, "Version =", MMFX->version); DLu(2, "nMetrics =", MMFX->nMetrics); DLu(2, "offSize =", MMFX->offSize); DL(2, (OUTPUTBUFF, "--- offset[index]=offset\n")); if (MMFX->offSize == 2) { for (i = 0; i < MMFX->nMetrics; i++) { tmp = (Int16)MMFX->offset[i]; DL(2, (OUTPUTBUFF, "[%d]=%04hx ", i, tmp) ); } DL(2, (OUTPUTBUFF, "\n")); } else { for (i = 0; i < MMFX->nMetrics; i++) DL(2, (OUTPUTBUFF, "[%d]=%08lx ", i, MMFX->offset[i]) ); DL(2, (OUTPUTBUFF, "\n")); } DL(2, (OUTPUTBUFF, "\n")); CFF_GetNMasters(&nMasters, MMFX_); DL(2, (OUTPUTBUFF, "--- cstring[index]=<charstring ops>\n")); for (i = 0; i < MMFX->nMetrics; i++) { pos = MMFX->offset[i] - minoffset; ptr = &(MMFX->cstrs[pos]); if (i < 8) switch (i) { case 0: DL(2, (OUTPUTBUFF, "[0=Zero] = <") ); break; case 1: DL(2, (OUTPUTBUFF, "[1=Ascender] = <") ); break; case 2: DL(2, (OUTPUTBUFF, "[2=Descender] = <") ); break; case 3: DL(2, (OUTPUTBUFF, "[3=LineGap] = <") ); break; case 4: DL(2, (OUTPUTBUFF, "[4=AdvanceWidthMax]= <") ); break; case 5: DL(2, (OUTPUTBUFF, "[5=AvgCharWidth] = <") ); break; case 6: DL(2, (OUTPUTBUFF, "[6=xHeight] = <") ); break; case 7: DL(2, (OUTPUTBUFF, "[7=CapHeight] = <") ); break; } else { DL(2, (OUTPUTBUFF, "[%d]= <", i) ); } dump_csDump(MAX_INT32, ptr, nMasters); DL(2, (OUTPUTBUFF, ">\n")); } DL(2, (OUTPUTBUFF, "\n")); }
void BBOXDump(IntX level, LongN start) { IntX i; IntX j; DL(1, (OUTPUTBUFF, "### [BBOX] (%08lx)\n", start)); DLV(2, "version =", BBOX->version); DLu(2, "flags =", BBOX->flags); DLu(2, "nGlyphs =", BBOX->nGlyphs); DLu(2, "nMasters=", BBOX->nMasters); if (BBOX->nMasters == 1) { DL(3, (OUTPUTBUFF, "--- bbox[glyphId]={left,bottom,right,top}\n")); for (i = 0; i < BBOX->nGlyphs; i++) { BBox *bbox = &BBOX->bbox[i]; DL(3, (OUTPUTBUFF, "[%d]={%hd,%hd,%hd,%hd} ", i, bbox->left[0], bbox->bottom[0], bbox->right[0], bbox->top[0])); } DL(3, (OUTPUTBUFF, "\n")); } else { DL(3, (OUTPUTBUFF, "--- bbox[glyphId]={{left+},{bottom+},{right+},{top+}}\n")); for (i = 0; i < BBOX->nGlyphs; i++) { BBox *bbox = &BBOX->bbox[i]; DL(3, (OUTPUTBUFF, "[%d]={{", i)); for (j = 0; j < BBOX->nMasters; j++) DL(3, (OUTPUTBUFF, "%hd%s", bbox->left[j], j == BBOX->nMasters - 1 ? "},{" : ",")); for (j = 0; j < BBOX->nMasters; j++) DL(3, (OUTPUTBUFF, "%hd%s", bbox->bottom[j], j == BBOX->nMasters - 1 ? "},{" : ",")); for (j = 0; j < BBOX->nMasters; j++) DL(3, (OUTPUTBUFF, "%hd%s", bbox->right[j], j == BBOX->nMasters - 1 ? "},{" : ",")); for (j = 0; j < BBOX->nMasters; j++) DL(3, (OUTPUTBUFF, "%hd%s", bbox->top[j], j == BBOX->nMasters - 1 ? "}} " : ",")); } DL(3, (OUTPUTBUFF, "\n")); } }
/* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, real *dl, real *d, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGTSVX uses the LU factorization to compute the solution to a real system of linear equations A * X = B or A**T * X = B, where A is a tridiagonal matrix of order N and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. Description =========== The following steps are performed: 1. If FACT = 'N', the LU decomposition is used to factor the matrix A as A = L * U, where L is a product of permutation and unit lower bidiagonal matrices and U is upper triangular with nonzeros in only the main diagonal and first two superdiagonals. 2. The factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, steps 3 and 4 are skipped. 3. The system of equations is solved for X using the factored form of A. 4. Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. Arguments ========= FACT (input) CHARACTER*1 Specifies whether or not the factored form of A has been supplied on entry. = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not be modified. = 'N': The matrix will be copied to DLF, DF, and DUF and factored. TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input) REAL array, dimension (N-1) The (n-1) subdiagonal elements of A. D (input) REAL array, dimension (N) The n diagonal elements of A. DU (input) REAL array, dimension (N-1) The (n-1) superdiagonal elements of A. DLF (input or output) REAL array, dimension (N-1) If FACT = 'F', then DLF is an input argument and on entry contains the (n-1) multipliers that define the matrix L from the LU factorization of A as computed by SGTTRF. If FACT = 'N', then DLF is an output argument and on exit contains the (n-1) multipliers that define the matrix L from the LU factorization of A. DF (input or output) REAL array, dimension (N) If FACT = 'F', then DF is an input argument and on entry contains the n diagonal elements of the upper triangular matrix U from the LU factorization of A. If FACT = 'N', then DF is an output argument and on exit contains the n diagonal elements of the upper triangular matrix U from the LU factorization of A. DUF (input or output) REAL array, dimension (N-1) If FACT = 'F', then DUF is an input argument and on entry contains the (n-1) elements of the first superdiagonal of U. If FACT = 'N', then DUF is an output argument and on exit contains the (n-1) elements of the first superdiagonal of U. DU2 (input or output) REAL array, dimension (N-2) If FACT = 'F', then DU2 is an input argument and on entry contains the (n-2) elements of the second superdiagonal of U. If FACT = 'N', then DU2 is an output argument and on exit contains the (n-2) elements of the second superdiagonal of U. IPIV (input or output) INTEGER array, dimension (N) If FACT = 'F', then IPIV is an input argument and on entry contains the pivot indices from the LU factorization of A as computed by SGTTRF. If FACT = 'N', then IPIV is an output argument and on exit contains the pivot indices from the LU factorization of A; row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. B (input) REAL array, dimension (LDB,NRHS) The N-by-NRHS right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) REAL array, dimension (LDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). RCOND (output) REAL The estimate of the reciprocal condition number of the matrix A. If RCOND is less than the machine precision (in particular, if RCOND = 0), the matrix is singular to working precision. This condition is indicated by a return code of INFO > 0, and the solution and error bounds are not computed. FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) REAL array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= N: U(i,i) is exactly zero. The factorization has not been completed unless i = N, but the factor U is exactly singular, so the solution and error bounds could not be computed. = N+1: RCOND is less than machine precision. The factorization has been completed, but the matrix is singular to working precision, and the solution and error bounds have not been computed. ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ static char norm[1]; extern logical lsame_(char *, char *); static real anorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern doublereal slamch_(char *); static logical nofact; extern /* Subroutine */ int xerbla_(char *, integer *); extern doublereal slangt_(char *, integer *, real *, real *, real *); extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), sgtcon_(char *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *, integer *); static logical notran; extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *), sgttrf_(integer *, real *, real *, real *, real *, integer *, integer *), sgttrs_(char *, integer *, integer *, real *, real *, real *, real *, integer *, real *, integer *, integer *); #define DL(I) dl[(I)-1] #define D(I) d[(I)-1] #define DU(I) du[(I)-1] #define DLF(I) dlf[(I)-1] #define DF(I) df[(I)-1] #define DUF(I) duf[(I)-1] #define DU2(I) du2[(I)-1] #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; nofact = lsame_(fact, "N"); notran = lsame_(trans, "N"); if (! nofact && ! lsame_(fact, "F")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -14; } else if (*ldx < max(1,*n)) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SGTSVX", &i__1); return 0; } if (nofact) { /* Compute the LU factorization of A. */ scopy_(n, &D(1), &c__1, &DF(1), &c__1); if (*n > 1) { i__1 = *n - 1; scopy_(&i__1, &DL(1), &c__1, &DLF(1), &c__1); i__1 = *n - 1; scopy_(&i__1, &DU(1), &c__1, &DUF(1), &c__1); } sgttrf_(n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), info); /* Return if INFO is non-zero. */ if (*info != 0) { if (*info > 0) { *rcond = 0.f; } return 0; } } /* Compute the norm of the matrix A. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = slangt_(norm, n, &DL(1), &D(1), &DU(1)); /* Compute the reciprocal of the condition number of A. */ sgtcon_(norm, n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &anorm, rcond, &WORK(1), &IWORK(1), info); /* Return if the matrix is singular to working precision. */ if (*rcond < slamch_("Epsilon")) { *info = *n + 1; return 0; } /* Compute the solution vectors X. */ slacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx); sgttrs_(trans, n, nrhs, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &X(1,1), ldx, info); /* Use iterative refinement to improve the computed solutions and compute error bounds and backward error estimates for them. */ sgtrfs_(trans, n, nrhs, &DL(1), &D(1), &DU(1), &DLF(1), &DF(1), &DUF(1), & DU2(1), &IPIV(1), &B(1,1), ldb, &X(1,1), ldx, &FERR(1), &BERR(1), &WORK(1), &IWORK(1), info); return 0; /* End of SGTSVX */ } /* sgtsvx_ */
void MMSDDump(IntX level, LongN start) { DL(1, (stderr, "### [MMSD] (%08lx)\n", start)); }
/* Subroutine */ int cgtcon_(char *norm, integer *n, complex *dl, complex *d, complex *du, complex *du2, integer *ipiv, real *anorm, real *rcond, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGTCON estimates the reciprocal of the condition number of a complex tridiagonal matrix A using the LU factorization as computed by CGTTRF. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). Arguments ========= NORM (input) CHARACTER*1 Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = '1' or 'O': 1-norm; = 'I': Infinity-norm. N (input) INTEGER The order of the matrix A. N >= 0. DL (input) COMPLEX array, dimension (N-1) The (n-1) multipliers that define the matrix L from the LU factorization of A as computed by CGTTRF. D (input) COMPLEX array, dimension (N) The n diagonal elements of the upper triangular matrix U from the LU factorization of A. DU (input) COMPLEX array, dimension (N-1) The (n-1) elements of the first superdiagonal of U. DU2 (input) COMPLEX array, dimension (N-2) The (n-2) elements of the second superdiagonal of U. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= n, row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. ANORM (input) REAL If NORM = '1' or 'O', the 1-norm of the original matrix A. If NORM = 'I', the infinity-norm of the original matrix A. RCOND (output) REAL The reciprocal of the condition number of the matrix A, computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an estimate of the 1-norm of inv(A) computed in this routine. WORK (workspace) COMPLEX array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer i__1, i__2; /* Local variables */ static integer kase, kase1, i; extern logical lsame_(char *, char *); extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real *, integer *), xerbla_(char *, integer *); static real ainvnm; static logical onenrm; extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex *, complex *, complex *, complex *, integer *, complex *, integer *, integer *); #define WORK(I) work[(I)-1] #define IPIV(I) ipiv[(I)-1] #define DU2(I) du2[(I)-1] #define DU(I) du[(I)-1] #define D(I) d[(I)-1] #define DL(I) dl[(I)-1] *info = 0; onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); if (! onenrm && ! lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*anorm < 0.f) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CGTCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.f; if (*n == 0) { *rcond = 1.f; return 0; } else if (*anorm == 0.f) { return 0; } /* Check that D(1:N) is non-zero. */ i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; if (D(i).r == 0.f && D(i).i == 0.f) { return 0; } /* L10: */ } ainvnm = 0.f; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L20: clacon_(n, &WORK(*n + 1), &WORK(1), &ainvnm, &kase); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(U)*inv(L). */ cgttrs_("No transpose", n, &c__1, &DL(1), &D(1), &DU(1), &DU2(1), &IPIV(1), &WORK(1), n, info); } else { /* Multiply by inv(L')*inv(U'). */ cgttrs_("Conjugate transpose", n, &c__1, &DL(1), &D(1), &DU(1), & DU2(1), &IPIV(1), &WORK(1), n, info); } goto L20; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.f) { *rcond = 1.f / ainvnm / *anorm; } return 0; /* End of CGTCON */ } /* cgtcon_ */
void WDTHDump(IntX level, LongN start) { IntX i; IntX j; IntX k; IntX iWidth; IntX nElements = WDTH->nRanges + 1; DL(1, (OUTPUTBUFF, "### [WDTH] (%08lx)\n", start)); DLV(2, "version =", WDTH->version); DLu(2, "flags =", WDTH->flags); DLu(2, "nMasters=", WDTH->nMasters); DLu(2, "nRanges =", WDTH->nRanges); DL(3, (OUTPUTBUFF, "--- firstGlyph[index]=glyphId\n")); for (i = 0; i < nElements; i++) DL(3, (OUTPUTBUFF, "[%d]=%hu ", i, WDTH->firstGlyph[i])); DL(3, (OUTPUTBUFF, "\n")); DL(3, (OUTPUTBUFF, "--- offset[index]=offset\n")); if (WDTH->flags & LONG_OFFSETS) for (i = 0; i < nElements; i++) DL(3, (OUTPUTBUFF, "[%d]=%08lx ", i, ((Card32 *)WDTH->offset)[i])); else for (i = 0; i < nElements; i++) DL(3, (OUTPUTBUFF, "[%d]=%04hx ", i, ((Card16 *)WDTH->offset)[i])); DL(3, (OUTPUTBUFF, "\n")); iWidth = 0; if (WDTH->nMasters == 1) { DL(3, (OUTPUTBUFF, "--- width[offset]=value\n")); if (WDTH->flags & LONG_OFFSETS) { Card32 *offset = WDTH->offset; for (i = 0; i < WDTH->nRanges; i++) { IntX span = (offset[i + 1] - offset[i]) / sizeof(uFWord); for (j = 0; j < span; j++) DL(3, (OUTPUTBUFF, "[%08lx]=%hu ", offset[i] + j * sizeof(uFWord), WDTH->width[iWidth++])); } } else { Card16 *offset = WDTH->offset; for (i = 0; i < WDTH->nRanges; i++) { IntX span = (offset[i + 1] - offset[i]) / sizeof(uFWord); for (j = 0; j < span; j++) DL(3, (OUTPUTBUFF, "[%04lx]=%hu ", offset[i] + j * sizeof(uFWord), WDTH->width[iWidth++])); } } } else { DL(3, (OUTPUTBUFF, "--- width[offset]={value+}\n")); if (WDTH->flags & LONG_OFFSETS) { Card32 *offset = WDTH->offset; for (i = 0; i < WDTH->nRanges; i++) { IntX span = (offset[i + 1] - offset[i]) / sizeof(uFWord); for (j = 0; j < span; j++) { DL(3, (OUTPUTBUFF, "[%08lx]={", offset[i] + j * sizeof(uFWord))); for (k = 0; k < WDTH->nMasters; k++) DL(3, (OUTPUTBUFF, "%hu%s", WDTH->width[iWidth++], k == WDTH->nMasters - 1 ? "} " : ",")); } } } else { Card16 *offset = WDTH->offset; for (i = 0; i < WDTH->nRanges; i++) { IntX span = (offset[i + 1] - offset[i]) / sizeof(uFWord); for (j = 0; j < span; j++) { DL(3, (OUTPUTBUFF, "[%04lx]={", offset[i] + j * sizeof(uFWord))); for (k = 0; k < WDTH->nMasters; k++) DL(3, (OUTPUTBUFF, "%hu%s", WDTH->width[iWidth++], k == WDTH->nMasters - 1 ? "} " : ",")); } } } } DL(3, (OUTPUTBUFF, "\n")); }
O toso(O o){S s=tos(o);O r=newosz(s);DL(s);R r;} //wrap tostring in object
inline void Trr2kTTTT ( UpperOrLower uplo, Orientation orientationOfA, Orientation orientationOfB, Orientation orientationOfC, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kTTTT"); if( E.Height() != E.Width() || A.Height() != C.Height() || A.Width() != E.Height() || C.Width() != E.Height() || B.Height() != E.Width() || D.Height() != E.Width() || A.Height() != B.Width() || C.Height() != D.Width() ) throw std::logic_error("Nonconformal Trr2kTTTT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AT(g), A0(g), AB(g), A1(g), A2(g); DistMatrix<T> BL(g), BR(g), B0(g), B1(g), B2(g); DistMatrix<T> CT(g), C0(g), CB(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,STAR,MC > A1_STAR_MC(g); DistMatrix<T,VR, STAR> B1_VR_STAR(g); DistMatrix<T,STAR,MR > B1AdjOrTrans_STAR_MR(g); DistMatrix<T,STAR,MC > C1_STAR_MC(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_STAR_MC.AlignWith( E ); B1_VR_STAR.AlignWith( E ); B1AdjOrTrans_STAR_MR.AlignWith( E ); C1_STAR_MC.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionDown ( A, AT, AB, 0 ); LockedPartitionRight( B, BL, BR, 0 ); LockedPartitionDown ( C, CT, CB, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AT.Height() < A.Height() ) { LockedRepartitionDown ( AT, A0, /**/ /**/ A1, AB, A2 ); LockedRepartitionRight ( BL, /**/ BR, B0, /**/ B1, B2 ); LockedRepartitionDown ( CT, C0, /**/ /**/ C1, CB, C2 ); LockedRepartitionRight ( DL, /**/ DR, D0, /**/ D1, D2 ); //--------------------------------------------------------------------// A1_STAR_MC = A1; C1_STAR_MC = C1; B1_VR_STAR = B1; D1_VR_STAR = D1; if( orientationOfB == ADJOINT ) B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR ); else B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR ); if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, orientationOfA, orientationOfC, alpha, A1_STAR_MC, B1AdjOrTrans_STAR_MR, C1_STAR_MC, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionDown ( CT, C0, C1, /**/ /**/ CB, C2 ); SlideLockedPartitionRight ( BL, /**/ BR, B0, B1, /**/ B2 ); SlideLockedPartitionDown ( AT, A0, A1, /**/ /**/ AB, A2 ); } #ifndef RELEASE PopCallStack(); #endif }
/* Subroutine */ int dgtrfs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d, doublereal *du, doublereal *dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGTRFS improves the computed solution to a system of linear equations when the coefficient matrix is tridiagonal, and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) subdiagonal elements of A. D (input) DOUBLE PRECISION array, dimension (N) The diagonal elements of A. DU (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) superdiagonal elements of A. DLF (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) multipliers that define the matrix L from the LU factorization of A as computed by DGTTRF. DF (input) DOUBLE PRECISION array, dimension (N) The n diagonal elements of the upper triangular matrix U from the LU factorization of A. DUF (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) elements of the first superdiagonal of U. DU2 (input) DOUBLE PRECISION array, dimension (N-2) The (n-2) elements of the second superdiagonal of U. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= n, row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DGTTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b18 = -1.; static doublereal c_b19 = 1.; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i, j; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer nz; extern /* Subroutine */ int dlagtm_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static char transn[1]; extern /* Subroutine */ int dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static char transt[1]; static doublereal lstres, eps; #define DL(I) dl[(I)-1] #define D(I) d[(I)-1] #define DU(I) du[(I)-1] #define DLF(I) dlf[(I)-1] #define DF(I) df[(I)-1] #define DUF(I) duf[(I)-1] #define DU2(I) du2[(I)-1] #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("DGTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { FERR(j) = 0.; BERR(j) = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transn = 'N'; *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transn = 'T'; *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1); dlagtm_(trans, n, &c__1, &c_b18, &DL(1), &D(1), &DU(1), &X(1,j), ldx, &c_b19, &WORK(*n + 1), n); /* Compute abs(op(A))*abs(x) + abs(b) for use in the backward error bound. */ if (notran) { if (*n == 1) { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)); } else { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)) + (d__3 = DU(1) * X(2,j), abs(d__3)); i__2 = *n - 1; for (i = 2; i <= *n-1; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = DL(i - 1) * X(i-1,j), abs(d__2)) + ( d__3 = D(i) * X(i,j), abs(d__3)) + ( d__4 = DU(i) * X(i+1,j), abs(d__4)); /* L30: */ } WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = DL(*n - 1) * X(*n-1,j), abs(d__2)) + ( d__3 = D(*n) * X(*n,j), abs(d__3)); } } else { if (*n == 1) { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)); } else { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)) + (d__3 = DL(1) * X(2,j), abs(d__3)); i__2 = *n - 1; for (i = 2; i <= *n-1; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = DU(i - 1) * X(i-1,j), abs(d__2)) + ( d__3 = D(i) * X(i,j), abs(d__3)) + ( d__4 = DL(i) * X(i+1,j), abs(d__4)); /* L40: */ } WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = DU(*n - 1) * X(*n-1,j), abs(d__2)) + ( d__3 = D(*n) * X(*n,j), abs(d__3)); } } /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matr ix or vector Z. If the i-th component of the denominator is le ss than SAFE2, then SAFE1 is added to the i-th components of th e numerator and denominator before dividing. */ s = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i); s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / (WORK(i) + safe1); s = max(d__2,d__3); } /* L50: */ } BERR(j) = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, a nd 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dgttrs_(trans, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV( 1), &WORK(*n + 1), n, info); daxpy_(n, &c_b19, &WORK(*n + 1), &c__1, &X(1,j), &c__1) ; lstres = BERR(j); ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X ) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix o r vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B )) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i); } else { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i) + safe1; } /* L60: */ } kase = 0; L70: dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ dgttrs_(transt, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), & IPIV(1), &WORK(*n + 1), n, info); i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L80: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L90: */ } dgttrs_(transn, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), & IPIV(1), &WORK(*n + 1), n, info); } goto L70; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1)); lstres = max(d__2,d__3); /* L100: */ } if (lstres != 0.) { FERR(j) /= lstres; } /* L110: */ } return 0; /* End of DGTRFS */ } /* dgtrfs_ */
O mods(O a,O b){ L z;S s;C d[BZ];Reprog*p;Resub rs[10];O r,os=pop(top(rst));if(os->t!=TS)TE;s=os->s.s;p=regcomp(a->s.s);if(!p)ex("bad regex");memset(rs,0,sizeof(rs)); for(r=newos("",0);s<os->s.s+os->s.z&®exec(p,s,rs,10);s=rs[0].e.ep,memset(rs,0,sizeof(rs))){if(rs[0].s.sp>s){z=rs[0].s.sp-s;r->s.s=rlc(r->s.s,r->s.z+z);memcpy(r->s.s+r->s.z,s,z);r->s.z+=z;}if(b->s.z==0)continue;regsub(b->s.s,d,BZ,rs,sizeof(rs));z=strlen(d);r->s.s=rlc(r->s.s,r->s.z+z);memcpy(r->s.s+r->s.z,d,z);r->s.z+=z;} if(s<os->s.s+os->s.z){z=os->s.s+os->s.z-s;r->s.s=rlc(r->s.s,r->s.z+z);memcpy(r->s.s+r->s.z,s,z);r->s.z+=z;}r->s.s=rlc(r->s.s,r->s.z+1);r->s.s[r->s.z]=0;dlo(os);DL(p);R r; }
V po(FP f,O o){S s=tos(o);fputs(s,f);DL(s);} //print object
doublereal zlangt_(char *norm, integer *n, doublecomplex *dl, doublecomplex * d, doublecomplex *du) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= ZLANGT returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex tridiagonal matrix A. Description =========== ZLANGT returns the value ZLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in ZLANGT as described above. N (input) INTEGER The order of the matrix A. N >= 0. When N = 0, ZLANGT is set to zero. DL (input) COMPLEX*16 array, dimension (N-1) The (n-1) sub-diagonal elements of A. D (input) COMPLEX*16 array, dimension (N) The diagonal elements of A. DU (input) COMPLEX*16 array, dimension (N-1) The (n-1) super-diagonal elements of A. ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer i__1; doublereal ret_val, d__1, d__2; /* Builtin functions */ double z_abs(doublecomplex *), sqrt(doublereal); /* Local variables */ static integer i; static doublereal scale; extern logical lsame_(char *, char *); static doublereal anorm; extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, doublereal *, doublereal *); static doublereal sum; #define DU(I) du[(I)-1] #define D(I) d[(I)-1] #define DL(I) dl[(I)-1] if (*n <= 0) { anorm = 0.; } else if (lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ anorm = z_abs(&D(*n)); i__1 = *n - 1; for (i = 1; i <= *n-1; ++i) { /* Computing MAX */ d__1 = anorm, d__2 = z_abs(&DL(i)); anorm = max(d__1,d__2); /* Computing MAX */ d__1 = anorm, d__2 = z_abs(&D(i)); anorm = max(d__1,d__2); /* Computing MAX */ d__1 = anorm, d__2 = z_abs(&DU(i)); anorm = max(d__1,d__2); /* L10: */ } } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find norm1(A). */ if (*n == 1) { anorm = z_abs(&D(1)); } else { /* Computing MAX */ d__1 = z_abs(&D(1)) + z_abs(&DL(1)), d__2 = z_abs(&D(*n)) + z_abs( &DU(*n - 1)); anorm = max(d__1,d__2); i__1 = *n - 1; for (i = 2; i <= *n-1; ++i) { /* Computing MAX */ d__1 = anorm, d__2 = z_abs(&D(i)) + z_abs(&DL(i)) + z_abs(&DU( i - 1)); anorm = max(d__1,d__2); /* L20: */ } } } else if (lsame_(norm, "I")) { /* Find normI(A). */ if (*n == 1) { anorm = z_abs(&D(1)); } else { /* Computing MAX */ d__1 = z_abs(&D(1)) + z_abs(&DU(1)), d__2 = z_abs(&D(*n)) + z_abs( &DL(*n - 1)); anorm = max(d__1,d__2); i__1 = *n - 1; for (i = 2; i <= *n-1; ++i) { /* Computing MAX */ d__1 = anorm, d__2 = z_abs(&D(i)) + z_abs(&DU(i)) + z_abs(&DL( i - 1)); anorm = max(d__1,d__2); /* L30: */ } } } else if (lsame_(norm, "F") || lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; zlassq_(n, &D(1), &c__1, &scale, &sum); if (*n > 1) { i__1 = *n - 1; zlassq_(&i__1, &DL(1), &c__1, &scale, &sum); i__1 = *n - 1; zlassq_(&i__1, &DU(1), &c__1, &scale, &sum); } anorm = scale * sqrt(sum); } ret_val = anorm; return ret_val; /* End of ZLANGT */ } /* zlangt_ */
V rdq(ST s,I u){S e,i=rdln();F d=strtod(i,&e);if(*e)psh(s,newoskz(i));else{DL(i);psh(s,newod(d));}if(u)v['Q']=dup(top(s));} //q,Q
int MetaAnalysis(gsl_vector * esVector, gsl_vector * varVector, gsl_vector * metaResultsVector, gsl_combination * comb) { ST_retcode rc; ST_uint4 i, nStudies, subsetLength; ST_double sumOfFixedWeights = 0.0; ST_double sumOfFixedWeights2= 0.0; ST_double sumOfFixedWeightedEffects = 0.0; ST_double sumOfFixedWeightedSquares = 0.0; nStudies = (ST_uint4) esVector->size; subsetLength = gsl_combination_k(comb); /* note the definition of c(i) in the beginning */ if (subsetLength > 1.0) { for(i=0; i< subsetLength ; i++) { sumOfFixedWeights += 1.0 / (gsl_vector_get(varVector, c(i) )); sumOfFixedWeights2 += 1.0 / gsl_pow_2( (gsl_vector_get(varVector, c(i) )) ); sumOfFixedWeightedEffects += gsl_vector_get(esVector, c(i)) / (gsl_vector_get(varVector, c(i))); } /* ES_FEM */ gsl_vector_set(metaResultsVector, 0, sumOfFixedWeightedEffects / sumOfFixedWeights); /* var_FEM*/ gsl_vector_set(metaResultsVector, 1, 1.0 / sumOfFixedWeights); /* df */ gsl_vector_set(metaResultsVector, 4, subsetLength-1.0); /* Q */ for(i=0; i< subsetLength ; i++) { sumOfFixedWeightedSquares += /* see the definition of MARES */ gsl_pow_2(gsl_vector_get(esVector, c(i)) - MARES(0)) / gsl_vector_get(varVector,c(i)); } gsl_vector_set(metaResultsVector, 5, sumOfFixedWeightedSquares); /* I2 */ gsl_vector_set(metaResultsVector, 6, GSL_MAX(0.0, 1.0 - MARES(4) / MARES(5)) ); /****REM****/ /* sets ES_REM var_REM and tau2 */ if ((rc = DL(esVector, varVector, metaResultsVector, comb, sumOfFixedWeights, sumOfFixedWeights2) )) return(rc); } else { gsl_vector_set(metaResultsVector, 2, MARES(0)); gsl_vector_set(metaResultsVector, 3, MARES(1)); gsl_vector_set(metaResultsVector, 5, 0.0); /* Q will set to missing later */ gsl_vector_set(metaResultsVector, 6, 0.0 ); /* I2 will set to missing later */ gsl_vector_set(metaResultsVector, 7, 0.0 ); /* no tau2 */ } return 0; }
F rdlnd(){F r;S s=rdln();r=strtod(s,0);DL(s);R r;} //read number(should this error on wrong input?)
void Trr2kNNNT ( UpperOrLower uplo, Orientation orientationOfD, T alpha, const DistMatrix<T>& A, const DistMatrix<T>& B, const DistMatrix<T>& C, const DistMatrix<T>& D, T beta, DistMatrix<T>& E ) { #ifndef RELEASE PushCallStack("internal::Trr2kNNNT"); if( E.Height() != E.Width() || A.Width() != C.Width() || A.Height() != E.Height() || C.Height() != E.Height() || B.Width() != E.Width() || D.Height() != E.Width() || A.Width() != B.Height() || C.Width() != D.Width() ) throw std::logic_error("Nonconformal Trr2kNNNT"); #endif const Grid& g = E.Grid(); DistMatrix<T> AL(g), AR(g), A0(g), A1(g), A2(g); DistMatrix<T> BT(g), B0(g), BB(g), B1(g), B2(g); DistMatrix<T> CL(g), CR(g), C0(g), C1(g), C2(g); DistMatrix<T> DL(g), DR(g), D0(g), D1(g), D2(g); DistMatrix<T,MC, STAR> A1_MC_STAR(g); DistMatrix<T,MR, STAR> B1Trans_MR_STAR(g); DistMatrix<T,MC, STAR> C1_MC_STAR(g); DistMatrix<T,VR, STAR> D1_VR_STAR(g); DistMatrix<T,STAR,MR > D1AdjOrTrans_STAR_MR(g); A1_MC_STAR.AlignWith( E ); B1Trans_MR_STAR.AlignWith( E ); C1_MC_STAR.AlignWith( E ); D1_VR_STAR.AlignWith( E ); D1AdjOrTrans_STAR_MR.AlignWith( E ); LockedPartitionRight( A, AL, AR, 0 ); LockedPartitionDown ( B, BT, BB, 0 ); LockedPartitionRight( C, CL, CR, 0 ); LockedPartitionRight( D, DL, DR, 0 ); while( AL.Width() < A.Width() ) { LockedRepartitionRight ( AL, /**/ AR, A0, /**/ A1, A2 ); LockedRepartitionDown ( BT, B0, /**/ /**/ B1, BB, B2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); LockedRepartitionRight ( CL, /**/ CR, C0, /**/ C1, C2 ); //--------------------------------------------------------------------// A1_MC_STAR = A1; C1_MC_STAR = C1; B1Trans_MR_STAR.TransposeFrom( B1 ); D1_VR_STAR = D1; if( orientationOfD == ADJOINT ) D1AdjOrTrans_STAR_MR.AdjointFrom( D1_VR_STAR ); else D1AdjOrTrans_STAR_MR.TransposeFrom( D1_VR_STAR ); LocalTrr2k ( uplo, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, C1_MC_STAR, D1AdjOrTrans_STAR_MR, beta, E ); //--------------------------------------------------------------------// SlideLockedPartitionRight ( DL, /**/ DR, D0, D1, /**/ D2 ); SlideLockedPartitionRight ( CL, /**/ CR, C0, C1, /**/ C2 ); SlideLockedPartitionDown ( BT, B0, B1, /**/ /**/ BB, B2 ); SlideLockedPartitionRight ( AL, /**/ AR, A0, A1, /**/ A2 ); } #ifndef RELEASE PopCallStack(); #endif }
V dls(ST s){DL(s->st);DL(s);} //delete
int main(int argc, char** argv) { // Initialize runtime madness::World& world = madness::initialize(argc, argv); // Get command line arguments if(argc < 2) { std::cout << "Usage: fock_build matrix_size block_size df_size df_block_size [repetitions]\n"; return 0; } const long matrix_size = atol(argv[1]); const long block_size = atol(argv[2]); const long df_size = atol(argv[3]); const long df_block_size = atol(argv[4]); if (matrix_size <= 0) { std::cerr << "Error: matrix size must greater than zero.\n"; return 1; } if (df_size <= 0) { std::cerr << "Error: third rank size must greater than zero.\n"; return 1; } if (block_size <= 0 || df_block_size <= 0) { std::cerr << "Error: block size must greater than zero.\n"; return 1; } if(matrix_size % block_size != 0ul && df_size % df_block_size != 0ul) { std::cerr << "Error: tensor size must be evenly divisible by block size.\n"; return 1; } const long repeat = (argc >= 6 ? atol(argv[5]) : 5); if (repeat <= 0) { std::cerr << "Error: number of repititions must greater than zero.\n"; return 1; } const std::size_t num_blocks = matrix_size / block_size; const std::size_t df_num_blocks = df_size / df_block_size; const std::size_t block_count = num_blocks * num_blocks; const std::size_t df_block_count = df_num_blocks * num_blocks * num_blocks; if(world.rank() == 0) std::cout << "TiledArray: Fock Build Test ...\n" << "Number of nodes = " << world.size() << "\nMatrix size = " << matrix_size << "x" << matrix_size << "\nTensor size = " << matrix_size << "x" << matrix_size << "x" << df_size << "\nBlock size = " << block_size << "x" << block_size << "x" << df_block_size << "\nMemory per matrix = " << double(matrix_size * matrix_size * sizeof(double)) / 1.0e9 << " GB\nMemory per tensor = " << double(matrix_size * matrix_size * df_size * sizeof(double)) / 1.0e9 << " GB\nNumber of matrix blocks = " << block_count << "\nNumber of tensor blocks = " << df_block_count << "\nAverage blocks/node matrix = " << double(block_count) / double(world.size()) << "\nAverage blocks/node tensor = " << double(df_block_count) / double(world.size()) << "\n"; // Construct TiledRange std::vector<unsigned int> blocking; blocking.reserve(num_blocks + 1); for(std::size_t i = 0; i <= matrix_size; i += block_size) blocking.push_back(i); std::vector<unsigned int> df_blocking; blocking.reserve(df_num_blocks + 1); for(std::size_t i = 0; i <= df_size; i += df_block_size) df_blocking.push_back(i); std::vector<TiledArray::TiledRange1> blocking2(2, TiledArray::TiledRange1(blocking.begin(), blocking.end())); std::vector<TiledArray::TiledRange1> blocking3 = { TiledArray::TiledRange1(blocking.begin(), blocking.end()), TiledArray::TiledRange1(blocking.begin(), blocking.end()), TiledArray::TiledRange1(df_blocking.begin(), df_blocking.end()) }; TiledArray::TiledRange trange(blocking2.begin(), blocking2.end()); TiledArray::TiledRange df_trange(blocking3.begin(), blocking3.end()); // Construct and initialize arrays TiledArray::Array<double, 2> D(world, trange); TiledArray::Array<double, 2> DL(world, trange); TiledArray::Array<double, 2> F(world, trange); TiledArray::Array<double, 2> G(world, trange); TiledArray::Array<double, 2> H(world, trange); TiledArray::Array<double, 3> TCInts(world, df_trange); TiledArray::Array<double, 3> ExchTemp(world, df_trange); D.set_all_local(1.0); DL.set_all_local(1.0); H.set_all_local(2.0); TCInts.set_all_local(3.0); // Start clock world.gop.fence(); const double wall_time_start = madness::wall_time(); // Do fock build for(int i = 0; i < repeat; ++i) { // Assume we have the cholesky decompositon of the density matrix ExchTemp("s,j,P") = DL("s,n") * TCInts("n,j,P"); // Compute coulomb and exchange G("i,j") = 2.0 * TCInts("i,j,P") * ( D("n,m") * TCInts("n,m,P") ) - ExchTemp("s,i,P") * ExchTemp("s,j,P"); F("i,j") = G("i,j") + H("i,j"); world.gop.fence(); if(world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n"; } // Stop clock const double wall_time_stop = madness::wall_time(); if(world.rank() == 0){ std::cout << "Average wall time = " << (wall_time_stop - wall_time_start) / double(repeat) << " sec\nAverage GFLOPS = " << double(repeat) * (double(4.0 * matrix_size * matrix_size * df_size) + // Coulomb flops double(4.0 * matrix_size * matrix_size * matrix_size * df_size)) // Exchange flops / (wall_time_stop - wall_time_start) / 1.0e9 << "\n"; } madness::finalize(); return 0; }
/* Subroutine */ int dgtsv_(integer *n, integer *nrhs, doublereal *dl, doublereal *d, doublereal *du, doublereal *b, integer *ldb, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGTSV solves the equation A*X = B, where A is an N-by-N tridiagonal matrix, by Gaussian elimination with partial pivoting. Note that the equation A'*X = B may be solved by interchanging the order of the arguments DU and DL. Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input/output) DOUBLE PRECISION array, dimension (N-1) On entry, DL must contain the (n-1) subdiagonal elements of A. On exit, DL is overwritten by the (n-2) elements of the second superdiagonal of the upper triangular matrix U from the LU factorization of A, in DL(1), ..., DL(n-2). D (input/output) DOUBLE PRECISION array, dimension (N) On entry, D must contain the diagonal elements of A. On exit, D is overwritten by the n diagonal elements of U. DU (input/output) DOUBLE PRECISION array, dimension (N-1) On entry, DU must contain the (n-1) superdiagonal elements of A. On exit, DU is overwritten by the (n-1) elements of the first superdiagonal of U. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the N-by-NRHS right hand side matrix B. On exit, if INFO = 0, the N-by-NRHS solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) is exactly zero, and the solution has not been computed. The factorization has not been completed unless i = N. ===================================================================== Parameter adjustments Function Body */ /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ static doublereal temp, mult; static integer j, k; extern /* Subroutine */ int xerbla_(char *, integer *); #define DL(I) dl[(I)-1] #define D(I) d[(I)-1] #define DU(I) du[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("DGTSV ", &i__1); return 0; } if (*n == 0) { return 0; } i__1 = *n - 1; for (k = 1; k <= *n-1; ++k) { if (DL(k) == 0.) { /* Subdiagonal is zero, no elimination is required. */ if (D(k) == 0.) { /* Diagonal is zero: set INFO = K and return; a u nique solution can not be found. */ *info = k; return 0; } } else if ((d__1 = D(k), abs(d__1)) >= (d__2 = DL(k), abs(d__2))) { /* No row interchange required */ mult = DL(k) / D(k); D(k + 1) -= mult * DU(k); i__2 = *nrhs; for (j = 1; j <= *nrhs; ++j) { B(k+1,j) -= mult * B(k,j); /* L10: */ } if (k < *n - 1) { DL(k) = 0.; } } else { /* Interchange rows K and K+1 */ mult = D(k) / DL(k); D(k) = DL(k); temp = D(k + 1); D(k + 1) = DU(k) - mult * temp; if (k < *n - 1) { DL(k) = DU(k + 1); DU(k + 1) = -mult * DL(k); } DU(k) = temp; i__2 = *nrhs; for (j = 1; j <= *nrhs; ++j) { temp = B(k,j); B(k,j) = B(k+1,j); B(k+1,j) = temp - mult * B(k+1,j); /* L20: */ } } /* L30: */ } if (D(*n) == 0.) { *info = *n; return 0; } /* Back solve with the matrix U from the factorization. */ i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { B(*n,j) /= D(*n); if (*n > 1) { B(*n-1,j) = (B(*n-1,j) - DU(*n - 1) * B(*n,j)) / D(*n - 1); } for (k = *n - 2; k >= 1; --k) { B(k,j) = (B(k,j) - DU(k) * B(k+1,j) - DL(k) * B(k+2,j)) / D(k); /* L40: */ } /* L50: */ } return 0; /* End of DGTSV */ } /* dgtsv_ */