/// 2D homography estimation from point correspondences. void HomographyModel::Fit(const std::vector<int> &indices, std::vector<Model> *H) const { if(4 > indices.size()) return; const int n = static_cast<int>( indices.size() ); libNumerics::matrix<double> A = libNumerics::matrix<double>::zeros(n*2,9); for (int i = 0; i < n; ++i) { int index = indices[i]; int j = 2*i; A(j,0) = x1_(0, index); A(j,1) = x1_(1, index); A(j,2) = 1.0; A(j,6) = -x2_(0, index) * x1_(0, index); A(j,7) = -x2_(0, index) * x1_(1, index); A(j,8) = -x2_(0, index); ++j; A(j,3) = x1_(0, index); A(j,4) = x1_(1, index); A(j,5) = 1.0; A(j,6) = -x2_(1, index) * x1_(0, index); A(j,7) = -x2_(1, index) * x1_(1, index); A(j,8) = -x2_(1, index); } libNumerics::vector<double> vecNullspace(9); if( libNumerics::SVD::Nullspace(A,&vecNullspace) ) { libNumerics::matrix<double> M(3,3); M.read(vecNullspace); if(M.det() < 0) M = -M; M /= M(2,2); if(libNumerics::SVD::InvCond(M)>=ICOND_MIN && IsOrientationPreserving(indices,M) ) H->push_back(M); } }
/* Subroutine */ int cggrqf_(integer *m, integer *p, integer *n, complex *a, integer *lda, complex *taua, complex *b, integer *ldb, complex *taub, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGGRQF computes a generalized RQ factorization of an M-by-N matrix A and a P-by-N matrix B: A = R*Q, B = Z*T*Q, where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, and R and T assume one of the forms: if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, N-M M ( R21 ) N N where R12 or R21 is upper triangular, and if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, ( 0 ) P-N P N-P N where T11 is upper triangular. In particular, if B is square and nonsingular, the GRQ factorization of A and B implicitly gives the RQ factorization of A*inv(B): A*inv(B) = (R*inv(T))*Z' where inv(B) denotes the inverse of the matrix B, and Z' denotes the conjugate transpose of the matrix Z. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, if M <= N, the upper triangle of the subarray A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; if M > N, the elements on and above the (M-N)-th subdiagonal contain the M-by-N upper trapezoidal matrix R; the remaining elements, with the array TAUA, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAUA (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors which represent the unitary matrix Q (see Further Details). B (input/output) COMPLEX array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, the elements on and above the diagonal of the array contain the min(P,N)-by-N upper trapezoidal matrix T (T is upper triangular if P >= N); the elements below the diagonal, with the array TAUB, represent the unitary matrix Z as a product of elementary reflectors (see Further Details). LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,P). TAUB (output) COMPLEX array, dimension (min(P,N)) The scalar factors of the elementary reflectors which represent the unitary matrix Z (see Further Details). WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N,M,P). For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), where NB1 is the optimal blocksize for the RQ factorization of an M-by-N matrix, NB2 is the optimal blocksize for the QR factorization of a P-by-N matrix, and NB3 is the optimal blocksize for a call of CUNMRQ. INFO (output) INTEGER = 0: successful exit < 0: if INFO=-i, the i-th argument had an illegal value. Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - taua * v * v' where taua is a complex scalar, and v is a complex vector with v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in A(m-k+i,1:n-k+i-1), and taua in TAUA(i). To form Q explicitly, use LAPACK subroutine CUNGRQ. To use Q to update another matrix, use LAPACK subroutine CUNMRQ. The matrix Z is represented as a product of elementary reflectors Z = H(1) H(2) . . . H(k), where k = min(p,n). Each H(i) has the form H(i) = I - taub * v * v' where taub is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), and taub in TAUB(i). To form Z explicitly, use LAPACK subroutine CUNGQR. To use Z to update another matrix, use LAPACK subroutine CUNMQR. ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ static integer lopt; extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), cgerqf_( integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), xerbla_(char *, integer *), cunmrq_(char *, char *, integer *, integer *, integer *, complex * , integer *, complex *, complex *, integer *, complex *, integer * , integer *); #define TAUA(I) taua[(I)-1] #define TAUB(I) taub[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (*m < 0) { *info = -1; } else if (*p < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*p)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m), i__1 = max(i__1,*p); if (*lwork < max(i__1,*n)) { *info = -11; } } if (*info != 0) { i__1 = -(*info); xerbla_("CGGRQF", &i__1); return 0; } /* RQ factorization of M-by-N matrix A: A = R*Q */ cgerqf_(m, n, &A(1,1), lda, &TAUA(1), &WORK(1), lwork, info); lopt = WORK(1).r; /* Update B := B*Q' */ i__1 = min(*m,*n); /* Computing MAX */ i__2 = 1, i__3 = *m - *n + 1; cunmrq_("Right", "Conjugate Transpose", p, n, &i__1, &A(max(1,*m-*n+1),1), lda, &TAUA(1), &B(1,1), ldb, &WORK(1), lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) WORK(1).r; lopt = max(i__1,i__2); /* QR factorization of P-by-N matrix B: B = Z*T */ cgeqrf_(p, n, &B(1,1), ldb, &TAUB(1), &WORK(1), lwork, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) WORK(1).r; d__1 = (doublereal) max(i__1,i__2); WORK(1).r = d__1, WORK(1).i = 0.f; return 0; /* End of CGGRQF */ } /* cggrqf_ */
A foo () { return A(); }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI // Initialize MPI MPI_Init( &argc, &argv ); //int size, rank; // Number of MPI processes, My process ID //MPI_Comm_size(MPI_COMM_WORLD, &size); //MPI_Comm_rank(MPI_COMM_WORLD, &rank); #else //int size = 1; // Serial case (not using MPI) //int rank = 0; #endif bool verbose = false; int nx = 5; int ny = 5; if( argc > 1 ) { if( argc > 4 ) { cout << "Usage: " << argv[0] << " [-v [nx [ny]]]" << endl; exit(1); } int loc = 1; // Check if we should print results to standard out if(argv[loc][0]=='-' && argv[loc][1]=='v') { verbose = true; ++loc; } if (loc < argc) nx = atoi( argv[loc++] ); if( loc < argc) ny = atoi( argv[loc] ); } #ifdef EPETRA_MPI Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); bool verbose1 = false; if(verbose) verbose1 = (MyPID==0); if(verbose1) cout << EpetraExt::EpetraExt_Version() << endl << endl; Comm.Barrier(); if(verbose) cout << Comm << endl << flush; Comm.Barrier(); int NumGlobalElements = nx * ny; if( NumGlobalElements < NumProc ) { cout << "NumGlobalElements = " << NumGlobalElements << " cannot be < number of processors = " << NumProc; exit(1); } int IndexBase = 0; Epetra_Map Map( NumGlobalElements, IndexBase, Comm ); // Extract the global indices of the elements local to this processor int NumMyElements = Map.NumMyElements(); std::vector<int> MyGlobalElements( NumMyElements ); Map.MyGlobalElements( &MyGlobalElements[0] ); if( verbose ) cout << Map; // Create the number of non-zeros for a tridiagonal (1D problem) or banded // (2D problem) matrix std::vector<int> NumNz( NumMyElements, 5 ); int global_i; int global_j; for (int i = 0; i < NumMyElements; ++i) { global_j = MyGlobalElements[i] / nx; global_i = MyGlobalElements[i] - global_j * nx; if (global_i == 0) NumNz[i] -= 1; // By having separate statements, if (global_i == nx-1) NumNz[i] -= 1; // this works for 2D as well as 1D if (global_j == 0) NumNz[i] -= 1; // systems (i.e. nx x 1 or 1 x ny) if (global_j == ny-1) NumNz[i] -= 1; // or even a 1 x 1 system } if(verbose) { cout << endl << "NumNz: "; for (int i = 0; i < NumMyElements; i++) cout << NumNz[i] << " "; cout << endl; } // end if // Create the Epetra Compressed Row Sparse Graph Epetra_CrsGraph A( Copy, Map, &NumNz[0] ); std::vector<int> Indices(5); int NumEntries; for (int i = 0; i < NumMyElements; ++i ) { global_j = MyGlobalElements[i] / nx; global_i = MyGlobalElements[i] - global_j * nx; NumEntries = 0; // (i,j-1) entry if (global_j > 0 && ny > 1) Indices[NumEntries++] = global_i + (global_j-1)*nx; // (i-1,j) entry if (global_i > 0) Indices[NumEntries++] = global_i-1 + global_j *nx; // (i,j) entry Indices[NumEntries++] = MyGlobalElements[i]; // (i+1,j) entry if (global_i < nx-1) Indices[NumEntries++] = global_i+1 + global_j *nx; // (i,j+1) entry if (global_j < ny-1 && ny > 1) Indices[NumEntries++] = global_i + (global_j+1)*nx; // Insert the global indices A.InsertGlobalIndices( MyGlobalElements[i], NumEntries, &Indices[0] ); } // end i loop // Finish up graph construction A.FillComplete(); EpetraExt::CrsGraph_MapColoring Greedy0MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY, 0, false, verbose ); Epetra_MapColoring & Greedy0ColorMap = Greedy0MapColoringTransform( A ); printColoring(Greedy0ColorMap, &A,verbose); EpetraExt::CrsGraph_MapColoring Greedy1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY, 1, false, verbose ); Epetra_MapColoring & Greedy1ColorMap = Greedy1MapColoringTransform( A ); printColoring(Greedy1ColorMap, &A,verbose); EpetraExt::CrsGraph_MapColoring Greedy2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::GREEDY, 2, false, verbose ); Epetra_MapColoring & Greedy2ColorMap = Greedy2MapColoringTransform( A ); printColoring(Greedy2ColorMap, &A,verbose); EpetraExt::CrsGraph_MapColoring Lubi0MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY, 0, false, verbose ); Epetra_MapColoring & Lubi0ColorMap = Lubi0MapColoringTransform( A ); printColoring(Lubi0ColorMap, &A,verbose); EpetraExt::CrsGraph_MapColoring Lubi1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY, 1, false, verbose ); Epetra_MapColoring & Lubi1ColorMap = Lubi1MapColoringTransform( A ); printColoring(Lubi1ColorMap, &A,verbose); EpetraExt::CrsGraph_MapColoring Lubi2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::LUBY, 2, false, verbose ); Epetra_MapColoring & Lubi2ColorMap = Lubi2MapColoringTransform( A ); printColoring(Lubi2ColorMap, &A,verbose); #ifdef EPETRA_MPI if( verbose ) cout << "Parallel Map Coloring 1!\n"; EpetraExt::CrsGraph_MapColoring Parallel1MapColoringTransform( EpetraExt::CrsGraph_MapColoring::PSEUDO_PARALLEL, 0, false, verbose ); Epetra_MapColoring & Parallel1ColorMap = Parallel1MapColoringTransform( A ); printColoring(Parallel1ColorMap, &A,verbose); if( verbose ) cout << "Parallel Map Coloring 2!\n"; EpetraExt::CrsGraph_MapColoring Parallel2MapColoringTransform( EpetraExt::CrsGraph_MapColoring::JONES_PLASSMAN, 0, false, verbose ); Epetra_MapColoring & Parallel2ColorMap = Parallel2MapColoringTransform( A ); printColoring(Parallel2ColorMap, &A,verbose); #endif #ifdef EPETRA_MPI MPI_Finalize(); #endif return 0; }
void TestFunctors (void) { vector<int> v; v.resize (20); fill (v, 2); foreach (vector<int>::iterator, i, v) *i -= distance(v.begin(), i) & 1; vector<int> v1 (v); cout << "start:\t\t\t"; PrintVector (v); v = v1; cout << "plus:\t\t\t"; transform (v, v.begin(), v.begin(), plus<int>()); PrintVector (v); v = v1; cout << "minus:\t\t\t"; transform (v, v.begin(), v.begin(), minus<int>()); PrintVector (v); v = v1; cout << "divides:\t\t"; transform (v, v.begin(), v.begin(), divides<int>()); PrintVector (v); v = v1; cout << "multiplies:\t\t"; transform (v, v.begin(), v.begin(), multiplies<int>()); PrintVector (v); v = v1; cout << "modulus:\t\t"; transform (v, v.begin(), v.begin(), modulus<int>()); PrintVector (v); v = v1; cout << "logical_and:\t\t"; transform (v, v.begin(), v.begin(), logical_and<int>()); PrintVector (v); v = v1; cout << "logical_or:\t\t"; transform (v, v.begin(), v.begin(), logical_or<int>()); PrintVector (v); v = v1; cout << "equal_to:\t\t"; transform (v, v.begin(), v.begin(), equal_to<int>()); PrintVector (v); v = v1; cout << "not_equal_to:\t\t"; transform (v, v.begin(), v.begin(), not_equal_to<int>()); PrintVector (v); v = v1; cout << "greater:\t\t"; transform (v, v.begin(), v.begin(), greater<int>()); PrintVector (v); v = v1; cout << "less:\t\t\t"; transform (v, v.begin(), v.begin(), less<int>()); PrintVector (v); v = v1; cout << "greater_equal:\t\t"; transform (v, v.begin(), v.begin(), greater_equal<int>()); PrintVector (v); v = v1; cout << "less_equal:\t\t"; transform (v, v.begin(), v.begin(), less_equal<int>()); PrintVector (v); v = v1; cout << "compare:\t\t"; transform (v, v.begin(), v.begin(), compare<int>()); PrintVector (v); v = v1; cout << "negate:\t\t\t"; transform (v, negate<int>()); PrintVector (v); v = v1; cout << "logical_not:\t\t"; transform (v, logical_not<int>()); PrintVector (v); v = v1; cout << "unary_neg(negate):\t"; transform (v, unary_negator(negate<int>())); PrintVector (v); v = v1; cout << "binder1st(plus,5):\t"; transform (v, bind1st(plus<int>(), 5)); PrintVector (v); v = v1; cout << "binder2nd(minus,1):\t"; transform (v, bind2nd(minus<int>(), 1)); PrintVector (v); v = v1; cout << "compose1(-,+5):\t\t"; transform (v, compose1 (negate<int>(), bind2nd(plus<int>(), 5))); PrintVector (v); v = v1; cout << "compose1(-,-4):\t\t"; transform (v, compose1 (negate<int>(), bind2nd(minus<int>(), 4))); PrintVector (v); v = v1; cout << "compose2(/,+6,-4):\t"; transform (v, compose2 (divides<int>(), bind2nd(plus<int>(), 6), bind2nd(minus<int>(), 4))); PrintVector (v); cout << "mem_var(plus,6):\t"; vector<A> av; for (uoff_t i = 0; i < 20; ++ i) av.push_back (A(i)); transform (av, mem_var1(&A::m_v, bind2nd(plus<int>(), 6))); PrintVector (av); vector<A>::iterator found = find_if (av, mem_var_equal_to(&A::m_v, 14)); cout << "14 found at position " << found - av.begin() << endl; found = lower_bound (av.begin(), av.end(), 18, mem_var_less(&A::m_v)); cout << "18 found at position " << found - av.begin() << endl; cout << "add next:\t\t"; transform (av.begin(), av.end() - 1, av.begin() + 1, av.begin(), mem_var2(&A::m_v, plus<int>())); PrintVector (av); }
/** Purpose ------- SSYTRD2_GPU reduces a real symmetric matrix A to real symmetric tridiagonal form T by an orthogonal similarity transformation: Q**H * A * Q = T. This version passes a workspace that is used in an optimized GPU matrix-vector product. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] dA REAL array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = MagmaUpper, the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = MagmaLower, the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. @param[in] ldda INTEGER The leading dimension of the array A. LDDA >= max(1,N). @param[out] d REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). @param[out] e REAL array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower. @param[out] tau REAL array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). @param[out] A (workspace) REAL array, dimension (LDA,N) On exit the diagonal, the upper part (if uplo=MagmaUpper) or the lower part (if uplo=MagmaLower) are copies of DA @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] work (workspace) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB is the optimal blocksize given by magma_get_ssytrd_nb(). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[out] dwork (workspace) REAL array on the GPU, dim (MAX(1,LDWORK)) @param[in] ldwork INTEGER The dimension of the array DWORK. LDWORK >= ldda*ceil(n/64) + 2*ldda*nb, where nb = magma_get_ssytrd_nb(n), and 64 is for the blocksize of magmablas_ssymv. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value Further Details --------------- If UPLO = MagmaUpper, the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = MagmaLower, the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = MagmaUpper: if UPLO = MagmaLower: ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). @ingroup magma_ssyev_comp ********************************************************************/ extern "C" magma_int_t magma_ssytrd2_gpu( magma_uplo_t uplo, magma_int_t n, magmaFloat_ptr dA, magma_int_t ldda, float *d, float *e, float *tau, float *A, magma_int_t lda, float *work, magma_int_t lwork, magmaFloat_ptr dwork, magma_int_t ldwork, magma_int_t *info) { #define A(i_, j_) ( A + (i_) + (j_)*lda ) #define dA(i_, j_) (dA + (i_) + (j_)*ldda) /* Constants */ const float c_zero = MAGMA_S_ZERO; const float c_neg_one = MAGMA_S_NEG_ONE; const float c_one = MAGMA_S_ONE; const float d_one = MAGMA_D_ONE; /* Local variables */ const char* uplo_ = lapack_uplo_const( uplo ); magma_int_t nb = magma_get_ssytrd_nb( n ); magma_int_t kk, nx; magma_int_t i, j, i_n; magma_int_t iinfo; magma_int_t ldw, lddw, lwkopt; magma_int_t lquery; *info = 0; bool upper = (uplo == MagmaUpper); lquery = (lwork == -1); if (! upper && uplo != MagmaLower) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,n)) { *info = -4; } else if (lda < max(1,n)) { *info = -9; } else if (lwork < nb*n && ! lquery) { *info = -11; } else if (ldwork < ldda*magma_ceildiv(n,64) + 2*ldda*nb) { *info = -13; } /* Determine the block size. */ ldw = n; lddw = ldda; // hopefully ldda is rounded up to multiple of 32; ldwork is in terms of ldda, so lddw can't be > ldda. lwkopt = n * nb; if (*info == 0) { work[0] = magma_smake_lwork( lwkopt ); } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) return *info; /* Quick return if possible */ if (n == 0) { work[0] = c_one; return *info; } // nx <= n is required // use LAPACK for n < 3000, otherwise switch at 512 if (n < 3000) nx = n; else nx = 512; float *work2; if (MAGMA_SUCCESS != magma_smalloc_cpu( &work2, n )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } magma_queue_t queue = NULL; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); // clear out dwork in case it has NANs (used as y in ssymv) // rest of dwork (used as work in magmablas_ssymv) doesn't need to be cleared magmablas_slaset( MagmaFull, n, nb, c_zero, c_zero, dwork, lddw, queue ); if (upper) { /* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */ kk = n - magma_roundup( n - nx, nb ); for (i = n - nb; i >= kk; i -= nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ /* Get the current panel */ magma_sgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda, queue ); magma_slatrd2( uplo, i+nb, nb, A(0, 0), lda, e, tau, work, ldw, work2, n, dA(0, 0), ldda, dwork, lddw, dwork + 2*lddw*nb, ldwork - 2*lddw*nb, queue ); /* Update the unreduced submatrix A(0:i-2,0:i-2), using an update of the form: A := A - V*W' - W*V' */ magma_ssetmatrix( i + nb, nb, work, ldw, dwork, lddw, queue ); magma_ssyr2k( uplo, MagmaNoTrans, i, nb, c_neg_one, dA(0, i), ldda, dwork, lddw, d_one, dA(0, 0), ldda, queue ); /* Copy superdiagonal elements back into A, and diagonal elements into D */ for (j = i; j < i+nb; ++j) { *A(j-1,j) = MAGMA_S_MAKE( e[j - 1], 0 ); d[j] = MAGMA_S_REAL( *A(j, j) ); } } magma_sgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda, queue ); /* Use CPU code to reduce the last or only block */ lapackf77_ssytrd( uplo_, &kk, A(0, 0), &lda, d, e, tau, work, &lwork, &iinfo ); magma_ssetmatrix( kk, kk, A(0, 0), lda, dA(0, 0), ldda, queue ); } else { /* Reduce the lower triangle of A */ for (i = 0; i < n-nx; i += nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ /* Get the current panel */ magma_sgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda, queue ); magma_slatrd2( uplo, n-i, nb, A(i, i), lda, &e[i], &tau[i], work, ldw, work2, n, dA(i, i), ldda, dwork, lddw, dwork + 2*lddw*nb, ldwork - 2*lddw*nb, queue ); /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using an update of the form: A := A - V*W' - W*V' */ magma_ssetmatrix( n-i, nb, work, ldw, dwork, lddw, queue ); // cublas 6.5 crashes here if lddw % 32 != 0, e.g., N=250. magma_ssyr2k( MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one, dA(i+nb, i), ldda, &dwork[nb], lddw, d_one, dA(i+nb, i+nb), ldda, queue ); /* Copy subdiagonal elements back into A, and diagonal elements into D */ for (j = i; j < i+nb; ++j) { *A(j+1,j) = MAGMA_S_MAKE( e[j], 0 ); d[j] = MAGMA_S_REAL( *A(j, j) ); } } /* Use CPU code to reduce the last or only block */ magma_sgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda, queue ); i_n = n-i; lapackf77_ssytrd( uplo_, &i_n, A(i, i), &lda, &d[i], &e[i], &tau[i], work, &lwork, &iinfo ); magma_ssetmatrix( n-i, n-i, A(i, i), lda, dA(i, i), ldda, queue ); } magma_free_cpu( work2 ); magma_queue_destroy( queue ); work[0] = magma_smake_lwork( lwkopt ); return *info; } /* magma_ssytrd2_gpu */
/** Purpose ------- SORGHR generates a REAL unitary matrix Q which is defined as the product of IHI-ILO elementary reflectors of order N, as returned by SGEHRD: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments --------- @param[in] n INTEGER The order of the matrix Q. N >= 0. @param[in] ilo INTEGER @param[in] ihi INTEGER ILO and IHI must have the same values as in the previous call of SGEHRD. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. @param[in,out] A REAL array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by SGEHRD. On exit, the N-by-N unitary matrix Q. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[in] tau REAL array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGEHRD. @param[in] T REAL array on the GPU device. T contains the T matrices used in blocking the elementary reflectors H(i), e.g., this can be the 9th argument of magma_sgehrd. @param[in] nb INTEGER This is the block size used in SGEHRD, and correspondingly the size of the T matrices, used in the factorization, and stored in T. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_sgeev_comp ********************************************************************/ extern "C" magma_int_t magma_sorghr_m( magma_int_t n, magma_int_t ilo, magma_int_t ihi, float *A, magma_int_t lda, float *tau, float *T, magma_int_t nb, magma_int_t *info) { #define A(i,j) (A + (i) + (j)*lda) magma_int_t i, j, nh, iinfo; *info = 0; nh = ihi - ilo; if (n < 0) *info = -1; else if (ilo < 1 || ilo > max(1,n)) *info = -2; else if (ihi < min(ilo,n) || ihi > n) *info = -3; else if (lda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0) return *info; /* Shift the vectors which define the elementary reflectors one column to the right, and set the first ilo and the last n-ihi rows and columns to those of the unit matrix */ for (j = ihi-1; j >= ilo; --j) { for (i = 0; i < j; ++i) *A(i, j) = MAGMA_S_ZERO; for (i = j+1; i < ihi; ++i) *A(i, j) = *A(i, j - 1); for (i = ihi; i < n; ++i) *A(i, j) = MAGMA_S_ZERO; } for (j = 0; j < ilo; ++j) { for (i = 0; i < n; ++i) *A(i, j) = MAGMA_S_ZERO; *A(j, j) = MAGMA_S_ONE; } for (j = ihi; j < n; ++j) { for (i = 0; i < n; ++i) *A(i, j) = MAGMA_S_ZERO; *A(j, j) = MAGMA_S_ONE; } if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ magma_sorgqr_m( nh, nh, nh, A(ilo, ilo), lda, tau+ilo-1, T, nb, &iinfo ); } return *info; } /* magma_sorghr */
int main(void) { #define NMAX 8 #define NRHMAX 8 const int lda=NMAX, ldb=NMAX; int i, info, j, n, nrhs; float a[NMAX*NMAX], b[NMAX*NRHMAX]; int ipiv[NMAX]; /* These macros allow access to 1-d arrays as though they are 2-d arrays stored in column-major order, as required by ACML C routines. */ #define A(I,J) a[((J)-1)*lda+(I)-1] #define B(I,J) b[((J)-1)*ldb+(I)-1] printf("ACML example: solution of linear equations using sgetrf/sgetrs\n"); printf("--------------------------------------------------------------\n"); printf("\n"); /* Initialize matrix A */ n = 4; A(1,1) = 1.80; A(1,2) = 2.88; A(1,3) = 2.05; A(1,4) = -0.89; A(2,1) = 5.25; A(2,2) = -2.95; A(2,3) = -0.95; A(2,4) = -3.80; A(3,1) = 1.58; A(3,2) = -2.69; A(3,3) = -2.90; A(3,4) = -1.04; A(4,1) = -1.11; A(4,2) = -0.66; A(4,3) = -0.59; A(4,4) = 0.80; /* Initialize right-hand-side matrix B */ nrhs = 2; B(1,1) = 9.52; B(1,2) = 18.47; B(2,1) = 24.35; B(2,2) = 2.25; B(3,1) = 0.77; B(3,2) = -13.28; B(4,1) = -6.22; B(4,2) = -6.21; printf("Matrix A:\n"); for (i = 1; i <= n; i++) { for (j = 1; j <= n; j++) printf("%8.4f ", A(i,j)); printf("\n"); } printf("\n"); printf("Right-hand-side matrix B:\n"); for (i = 1; i <= n; i++) { for (j = 1; j <= nrhs; j++) printf("%8.4f ", B(i,j)); printf("\n"); } /* Factorize A */ sgetrf(n,n,a,lda,ipiv,&info); printf("\n"); if (info == 0) { /* Compute solution */ sgetrs('N',n,nrhs,a,lda,ipiv,b,ldb,&info); /* Print solution */ printf("Solution matrix X of equations A*X = B:\n"); for (i = 1; i <= n; i++) { for (j = 1; j <= nrhs; j++) printf("%8.4f ", B(i,j)); printf("\n"); } } else printf("The factor U of matrix A is singular\n"); return 0; }
/* Public Domain Curses */ #include "pdcdos.h" RCSID("$Id: pdcdisp.c,v 1.65 2008/07/13 16:08:17 wmcbrine Exp $") /* ACS definitions originally by [email protected] -- these match code page 437 and compatible pages (CP850, CP852, etc.) */ #ifdef CHTYPE_LONG # define A(x) ((chtype)x | A_ALTCHARSET) chtype acs_map[128] = { A(0), A(1), A(2), A(3), A(4), A(5), A(6), A(7), A(8), A(9), A(10), A(11), A(12), A(13), A(14), A(15), A(16), A(17), A(18), A(19), A(20), A(21), A(22), A(23), A(24), A(25), A(26), A(27), A(28), A(29), A(30), A(31), ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', A(0x1a), A(0x1b), A(0x18), A(0x19), '/', 0xdb, '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
extern "C" magma_int_t magma_sgeqp3_gpu( magma_int_t m, magma_int_t n, float *A, magma_int_t lda, magma_int_t *jpvt, float *tau, float *work, magma_int_t lwork, #if defined(PRECISION_z) || defined(PRECISION_c) float *rwork, #endif magma_int_t *info ) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= SGEQP3 computes a QR factorization with column pivoting of a matrix A: A*P = Q*R using Level 3 BLAS. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the upper triangle of the array contains the min(M,N)-by-N upper trapezoidal matrix R; the elements below the diagonal, together with the array TAU, represent the unitary matrix Q as a product of min(M,N) elementary reflectors. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). JPVT (input/output) INTEGER array, dimension (N) On entry, if JPVT(J).ne.0, the J-th column of A is permuted to the front of A*P (a leading column); if JPVT(J)=0, the J-th column of A is a free column. On exit, if JPVT(J)=K, then the J-th column of A*P was the the K-th column of A. TAU (output) REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors. WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO=0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. For [sd]geqp3, LWORK >= (N+1)*NB + 2*N; for [cz]geqp3, LWORK >= (N+1)*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. For [cz]geqp3 only: RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define A(i, j) (A + (i) + (j)*(lda )) magma_int_t ione = 1; //magma_int_t na; magma_int_t n_j; magma_int_t j, jb, nb, sm, sn, fjb, nfxd, minmn; magma_int_t topbmn, sminmn, lwkopt, lquery; *info = 0; lquery = (lwork == -1); if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,m)) { *info = -4; } nb = magma_get_sgeqp3_nb(min(m, n)); if (*info == 0) { minmn = min(m,n); if (minmn == 0) { lwkopt = 1; } else { lwkopt = (n + 1)*nb; #if defined(PRECISION_d) || defined(PRECISION_s) lwkopt += 2*n; #endif } //work[0] = MAGMA_S_MAKE( lwkopt, 0. ); if (lwork < lwkopt && ! lquery) { *info = -8; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } if (minmn == 0) return *info; #if defined(PRECISION_d) || defined(PRECISION_s) float *rwork = work + (n + 1)*nb; #endif float *df; if (MAGMA_SUCCESS != magma_smalloc( &df, (n+1)*nb )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } cudaMemset( df, 0, (n+1)*nb*sizeof(float) ); nfxd = 0; /* Move initial columns up front. * Note jpvt uses 1-based indices for historical compatibility. */ for (j = 0; j < n; ++j) { if (jpvt[j] != 0) { if (j != nfxd) { blasf77_sswap(&m, A(0, j), &ione, A(0, nfxd), &ione); jpvt[j] = jpvt[nfxd]; jpvt[nfxd] = j + 1; } else { jpvt[j] = j + 1; } ++nfxd; } else { jpvt[j] = j + 1; } } /* Factorize fixed columns ======================= Compute the QR factorization of fixed columns and update remaining columns. if (nfxd > 0) { na = min(m,nfxd); lapackf77_sgeqrf(&m, &na, A, &lda, tau, work, &lwork, info); if (na < n) { n_j = n - na; lapackf77_sormqr( MagmaLeftStr, MagmaTransStr, &m, &n_j, &na, A, &lda, tau, A(0, na), &lda, work, &lwork, info ); } }*/ /* Factorize free columns */ if (nfxd < minmn) { sm = m - nfxd; sn = n - nfxd; sminmn = minmn - nfxd; /*if (nb < sminmn) { j = nfxd; // Set the original matrix to the GPU magma_ssetmatrix_async( m, sn, A (0,j), lda, dA(0,j), ldda, stream[0] ); }*/ /* Initialize partial column norms. */ magmablas_snrm2_cols(sm, sn, A(nfxd,nfxd), lda, &rwork[nfxd]); #if defined(PRECISION_d) || defined(PRECISION_z) magma_dcopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn); #else magma_scopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn); #endif /*for (j = nfxd; j < n; ++j) { rwork[j] = cblas_snrm2(sm, A(nfxd, j), ione); rwork[n + j] = rwork[j]; }*/ j = nfxd; //if (nb < sminmn) { /* Use blocked code initially. */ //magma_queue_sync( stream[0] ); /* Compute factorization: while loop. */ topbmn = minmn;// - nb; while(j < topbmn) { jb = min(nb, topbmn - j); /* Factorize JB columns among columns J:N. */ n_j = n - j; /*if (j>nfxd) { // Get panel to the CPU magma_sgetmatrix( m-j, jb, dA(j,j), ldda, A (j,j), lda ); // Get the rows magma_sgetmatrix( jb, n_j - jb, dA(j,j + jb), ldda, A (j,j + jb), lda ); }*/ //magma_slaqps_gpu // this is a cpp-file magma_slaqps2_gpu // this is a cuda-file ( m, n_j, j, jb, &fjb, A (0, j), lda, &jpvt[j], &tau[j], &rwork[j], &rwork[n + j], work, &df[jb], n_j ); j += fjb; /* fjb is actual number of columns factored */ } } /* Use unblocked code to factor the last or only block. if (j < minmn) { n_j = n - j; if (j > nfxd) { magma_sgetmatrix( m-j, n_j, dA(j,j), ldda, A (j,j), lda ); } lapackf77_slaqp2(&m, &n_j, &j, A(0, j), &lda, &jpvt[j], &tau[j], &rwork[j], &rwork[n+j], work ); }*/ } //work[0] = MAGMA_S_MAKE( lwkopt, 0. ); magma_free(df); return *info; } /* sgeqp3 */
Color(float value) { R(value), G(value), B(value), A(value); }
/** * Zero constructor. */ Color() { R(0.0f), G(0.0f), B(0.0f), A(0.0f); }
void Set(float r, float g, float b, float a) { R(r); G(g); B(b); A(a); }