Form Foam::operator*(const Matrix<Form, Type>& a, const Matrix<Form, Type>& b) { if (a.m() != b.n()) { FatalErrorIn ( "Matrix<Form, Type>::operator*" "(const Matrix<Form, Type>&, const Matrix<Form, Type>&)" ) << "attempted to multiply incompatible matrices:" << nl << "Matrix A : " << a.n() << " rows, " << a.m() << " columns" << nl << "Matrix B : " << b.n() << " rows, " << b.m() << " columns" << nl << "In order to multiply matrices, columns of A must equal " << "rows of B" << abort(FatalError); } Form ab(a.n(), b.m(), scalar(0)); for (register label i = 0; i < ab.n(); i++) { for (register label j = 0; j < ab.m(); j++) { for (register label l = 0; l < b.n(); l++) { ab[i][j] += a[i][l]*b[l][j]; } } } return ab; }
void M_Add4(Matrix<Scalar>& M1, Matrix<Scalar>& M2, Matrix<Scalar>& C, double x, bool sequential, Scalar beta) { const int strideM1 = M1.stride(); const int strideM2 = M2.stride(); const int strideC = C.stride(); const Scalar *dataM1 = M1.data(); const Scalar *dataM2 = M2.data(); Scalar *dataC = C.data(); if (beta != Scalar(0.0)) { #ifdef _PARALLEL_ # pragma omp parallel for if(!sequential) #endif for (int j = 0; j < C.n(); ++j) { for (int i = 0; i < C.m(); ++i) { dataC[i + j * strideC] = dataM1[i + j * strideM1] + dataM2[i + j * strideM2] + beta * dataC[i + j * strideC]; } } } else { #ifdef _PARALLEL_ # pragma omp parallel for if(!sequential) #endif for (int j = 0; j < C.n(); ++j) { for (int i = 0; i < C.m(); ++i) { dataC[i + j * strideC] = dataM1[i + j * strideM1] + dataM2[i + j * strideM2]; } } } }
void somp(const Matrix<T>* XT, const Matrix<T>& D, SpMatrix<T>* spalphaT, const int Ngroups, const int LL, const T* eps, const bool adapt, const int numThreads) { if (LL <= 0) return; const int K = D.n(); const int L = MIN(D.m(),MIN(LL,K)); if (!D.isNormalized()) { cerr << "Current implementation of OMP does not support non-normalized dictionaries" << endl; return; } /// compute the Gram Matrix G=D'D Matrix<T> G; D.XtX(G); int NUM_THREADS=init_omp(numThreads); int i; #pragma omp parallel for private(i) for (i = 0; i< Ngroups; ++i) { const Matrix<T>& X = XT[i]; const int M = X.n(); SpMatrix<T>& spalpha = spalphaT[i]; spalpha.clear(); Vector<int> rv; Matrix<T> vM; T thrs = adapt ? eps[i] : M*(*eps); coreSOMP(X,D,G,vM,rv,L,thrs); spalpha.convert2(vM,rv,K); } }
bool test() { Matrix m1(2, 3); Matrix m2(3, 2); m1.at(0, 0) = 1; m1.at(0, 1) = 2; m1.at(0, 2) = 0; m1.at(1, 0) = 4; m1.at(1, 1) = 3; m1.at(1, 2) = -1; m2.at(0, 0) = 5; m2.at(0, 1) = 1; m2.at(1, 0) = 2; m2.at(1, 1) = 3; m2.at(2, 0) = 3; m2.at(2, 1) = 4; Matrix r = Matrix::product(m1, m2); assert(r.m() == 2); assert(r.n() == 2); assert(r.at(0, 0) == 9); assert(r.at(0, 1) == 7); assert(r.at(1, 0) == 23); assert(r.at(1, 1) == 9); return true; }
Form Foam::operator-(const Matrix<Form, Type>& a) { Form na(a.n(), a.m()); if (a.n() && a.m()) { Type* nav = na[0]; const Type* av = a[0]; label nm = a.n()*a.m(); for (register label i=0; i<nm; i++) { nav[i] = -av[i]; } } return na; }
Form Foam::operator*(const scalar s, const Matrix<Form, Type>& a) { Form sa(a.n(), a.m()); if (a.n() && a.m()) { Type* sav = sa[0]; const Type* av = a[0]; label nm = a.n()*a.m(); for (register label i=0; i<nm; i++) { sav[i] = s*av[i]; } } return sa; }
Foam::gpuDiagonalMatrix<Type>::gpuDiagonalMatrix(const Matrix<Form, Type>& a) : gpuList<Type>(min(a.n(), a.m())) { List<Type> tmp(this->size()); forAll(tmp, i) { tmp.operator[](i) = a[i][i]; } this->operator()(tmp); }
Matrix<Scalar> SkewedUniformRandomMatrix4(int m, int n, double a, double b) { Matrix<Scalar> A = SkewedUniformRandomMatrix3<Scalar>(m, n, a, b); Scalar max_norm = A.MaxNorm(); // We can use fancier C++11 random number generators, but they are // still slow on some systems. for (int j = 0; j < A.n(); ++j) { for (int i = 0; i < A.m(); ++i) { A(i, j) = A(i, j) / max_norm; } } return A; }
void S_Add1(Matrix<Scalar>& S1, Matrix<Scalar>& C, double x, bool sequential) { const int strideS1 = S1.stride(); const int strideC = C.stride(); const Scalar *dataS1 = S1.data(); Scalar *dataC = C.data(); #ifdef _PARALLEL_ # pragma omp parallel for if(!sequential) #endif for (int j = 0; j < C.n(); ++j) { for (int i = 0; i < C.m(); ++i) { dataC[i + j * strideC] = dataS1[i + j * strideS1]; } } }
generator( const Matrix& A ) : A_( A ) { const int c = 0; const int *ind = A_.ind(), *col = A_.col(); for ( int i = 0; i < A_.m(); ++i ) base_.join( Element( i, c ) ); for ( int i = 0; i < A_.m(); ++i ) { Neighbour u( i, c ); for ( int k = ind[ i ]; k < ind[ i + 1 ]; ++k ) { int j = col[ k ]; u.join( j, c ); } topo_.join( u ); } }
Form Foam::operator-(const Matrix<Form, Type>& a, const Matrix<Form, Type>& b) { if (a.n() != b.n()) { FatalErrorIn ( "Matrix<Form, Type>::operator-" "(const Matrix<Form, Type>&, const Matrix<Form, Type>&)" ) << "attempted add matrices with different number of rows: " << a.n() << ", " << b.n() << abort(FatalError); } if (a.m() != b.m()) { FatalErrorIn ( "Matrix<Form, Type>::operator-" "(const Matrix<Form, Type>&, const Matrix<Form, Type>&)" ) << "attempted add matrices with different number of columns: " << a.m() << ", " << b.m() << abort(FatalError); } Form ab(a.n(), a.m()); Type* abv = ab[0]; const Type* av = a[0]; const Type* bv = b[0]; label nm = a.n()*a.m(); for (register label i=0; i<nm; i++) { abv[i] = av[i] - bv[i]; } return ab; }
int main( int argc, char** argv ) { struct timespec start, stop; double time; #ifndef NDEBUG std::cout << "-->WARNING: COMPILED *WITH* ASSERTIONS!<--" << std::endl; #endif if( argc<=3 ) { std::cout << "Usage: " << argv[0] << " <mtx> <scheme> <x> <REP1> <REP2>" << std::endl << std::endl; std::cout << "calculates Ax=y and reports average time taken as well as the mean of y." << std::endl; std::cout << "with\t\t <mtx> filename of the matrix A in matrix-market or binary triplet format." << std::endl; std::cout << " \t\t <scheme> number of a sparse scheme to use, see below." << std::endl; std::cout << " \t\t <x> 0 for taking x to be the 1-vector, 1 for taking x to be random (fixed seed)." << std::endl; std::cout << " \t\t <REP1> (optional, default is 1) number of repititions of the entire experiment." << std::endl; std::cout << " \t\t <REP2> (optional, default is 1) number of repititions of the in-place SpMV multiplication, per experiment." << std::endl; std::cout << std::endl << "Possible schemes:" << std::endl; std::cout << " 0: TS (triplet scheme)" << std::endl; std::cout << " 1: CRS (also known as CSR)" << std::endl; std::cout << " 2: ICRS (Incremental CRS)" << std::endl; std::cout << " 3: ZZ-CRS (Zig-zag CRS)" << std::endl; std::cout << " 4: ZZ-ICRS (Zig-zag ICRS)" << std::endl; std::cout << " 5: SVM (Sparse vector matrix)" << std::endl; std::cout << " 6: HTS (Hilbert-ordered triplet scheme)" << std::endl; std::cout << " 7: BICRS (Bi-directional Incremental CRS)" << std::endl; std::cout << " 8: Hilbert (Hilbert-ordered triplets backed by BICRS)" << std::endl; std::cout << " 9: Block Hilbert (Sparse matrix blocking, backed by Hilbert and HBICRS)" << std::endl; std::cout << "10: Bisection Hilbert (Sparse matrix blocking by bisection, backed by Hilbert and HBICRS)" << std::endl; std::cout << "11: CBICRS (Compressed Bi-directional Incremental CRS)" << std::endl; std::cout << "12: Beta Hilbert (known as Block CO-H+ in the paper by Yzelman & Roose, 2012: parallel compressed blocked Hilbert with BICRS)" << std::endl; std::cout << "13: Row-distributed Beta Hilbert (known as Row-distributed block CO-H in the paper by Yzelman & Roose, 2012: same as 12, but simpler distribution)" << std::endl; #ifdef WITH_CSB std::cout << "14: Row-distributed CSB (Uses CSB sequentially within the row-distributed scheme of 13)" << std::endl; #endif std::cout << "15: Row-distributed Hilbert (Parallel row-distributed Hilbert scheme, see also 8)" << std::endl; std::cout << "16: Row-distributed parallel CRS (using OpenMP, known as OpenMP CRS in the paper by Yzelman & Roose, 2012)" << std::endl; std::cout << "17: Row-distributed SpMV using compressed Hilbert indices." << std::endl; #ifdef WITH_MKL std::cout << "18: Intel MKL SpMV based on the CRS data structure." << std::endl; #endif std::cout << "19: Optimised ICRS." << std::endl; #ifdef WITH_CUDA std::cout << "20: CUDA CuSparse HYB format." << std::endl; #endif std::cout << std::endl << "The in-place Ax=y calculation is preceded by a quasi pre-fetch." << std::endl; std::cout << "Add a minus sign before the scheme number to enable use of the CCS wrapper (making each CRS-based structure CCS-based instead)" << std::endl; std::cout << "Note: binary triplet format is machine-dependent. "; std::cout << "Take care when using the same binary files on different machine architectures." << std::endl; return EXIT_FAILURE; } std::string file = std::string( argv[1] ); int scheme = atoi( argv[2] ); int ccs = scheme < 0 ? 1 : 0; if( ccs ) scheme = -scheme; int x_mode = atoi( argv[3] ); unsigned long int rep1 = 1; unsigned long int rep2 = 1; if( argc >= 5 ) rep1 = static_cast< unsigned long int >( atoi( argv[4] ) ); if( argc >= 6 ) rep2 = static_cast< unsigned long int >( atoi( argv[5] ) ); if( scheme != 16 && scheme != -16 && //pin master thread to a single core scheme != 18 && scheme != -18 ) { //but not when OpenMP is used (otherwise serialised computations) cpu_set_t mask; CPU_ZERO( &mask ); CPU_SET ( 0, &mask ); if( pthread_setaffinity_np( pthread_self(), sizeof( mask ), &mask ) != 0 ) { std::cerr << "Error setting main thread affinity!" << std::endl; exit( 1 ); } } else { omp_set_num_threads( MachineInfo::getInstance().cores() ); } #ifdef WITH_MKL if( scheme == 18 ) { mkl_set_num_threads( MachineInfo::getInstance().cores() ); } #endif std::cout << argv[0] << " called with matrix input file " << file << ", scheme number "; std::cout << scheme << " and x being " << (x_mode?"random":"the 1-vector") << "." << std::endl; std::cout << "Number of repititions of in-place zax is " << rep2 << std::endl; std::cout << "Number of repititions of the " << rep2 << " in-place zax(es) is " << rep1 << std::endl; Matrix< double >* checkm = new TS< double >( file ); clock_gettime( CLOCK_ID, &start); Matrix< double >* matrix = selectMatrix( scheme, ccs, file ); clock_gettime( CLOCK_ID, &stop); time = (stop.tv_sec-start.tv_sec)*1000; time += (stop.tv_nsec-start.tv_nsec)/1000000.0; if( matrix == NULL ) { std::cerr << "Error during sparse scheme loading, exiting." << std::endl; return EXIT_FAILURE; } std::cout << "Matrix dimensions: " << matrix->m() << " times " << matrix->n() << "." << std::endl; std::cout << "Datastructure loading time: " << time << " ms." << std::endl << std::endl; srand( FIXED_SEED ); double* x = NULL; #ifdef INTERLEAVE_X if( scheme == 13 || scheme == 14 || scheme == 15 || scheme == 16 || scheme == 17 || scheme == 18 ) x = (double*) numa_alloc_interleaved( matrix->n() * sizeof( double ) ); else #endif x = (double*) _mm_malloc( matrix->n() * sizeof( double ), 64 ); //initialise input vector for( unsigned long int j=0; j<matrix->n(); j++ ) { x[ j ] = x_mode?(rand()/(double)RAND_MAX):1.0; } //do one trial run, also for verification double* c = checkm->mv( x ); clock_gettime( CLOCK_ID, &start ); double* z = matrix->mv( x ); clock_gettime( CLOCK_ID, &stop); time = (stop.tv_sec-start.tv_sec)*1000; time += (stop.tv_nsec-start.tv_nsec)/1000000.0; double checkMSE = 0; unsigned long int max_e_index = 0; double max_e = fabs( z[0] - c[0] ); for( unsigned long int j=0; j<matrix->m(); j++ ) { double curdiff = fabs( z[j] - c[j] ); if( curdiff > max_e ) { max_e = curdiff; max_e_index = j; } curdiff *= curdiff; curdiff /= (double)(matrix->m()); checkMSE += curdiff; } #ifdef OUTPUT_Z for( unsigned long int j=0; j<matrix->m(); j++ ) { std::cout << z[ j ] << std::endl; } #endif std::cout << "out-of-place z=Ax: mean= " << checksum( z, matrix->m() ) << ", "; std::cout << "MSE = " << checkMSE << ", "; std::cout << "max abs error = " << max_e << " while comparing y[ " << max_e_index << " ] = " << z[max_e_index] << " and c[ " << max_e_index << " ] = " << c[max_e_index] << ", "; std::cout << "time= " << time << " ms." << std::endl; #ifdef RDBH_NO_COLLECT if( scheme == 13 ) { std::cout << "WARNING: MSE and max abs error are not correct for the Row-distributed Beta Hilbert scheme; please see the RDBHilbert.hpp file, and look for the RDBH_NO_COLLECT flag." << std::endl; } #else if( scheme == 13 ) { std::cout << "WARNING: timings are pessimistic for the Row-distributed Beta Hilbert scheme; each spmv a (syncing) collect is executed to write local data to the global output vector as required by this library. To get the correct timings, turn this collect off via the RDBH_NO_COLLECT flag in the RDBHilbert.hpp file. Note that this causes the verification process to fail, since all data is kept in private local output subvectors." << std::endl; } #endif double *times = new double[ rep1 ]; //Run rep*rep instances for( unsigned long int run = 0; run < rep1; run++ ) { sleep( 1 ); time = 0.0; //"prefetch" matrix->zax( x, z ); matrix->zax( x, z, rep2, CLOCK_ID, &time ); time /= static_cast<double>( rep2 ); times[ run ] = time; } //calculate statistics double meantime, mintime, vartime; meantime = vartime = 0.0; mintime = times[ 0 ]; for( unsigned long int run = 0; run < rep1; run++ ) { if( times[ run ] < mintime ) mintime = times[ run ]; meantime += times[ run ] / static_cast< double >( rep1 ); } for( unsigned long int run = 0; run < rep1; run++ ) { vartime += ( times[ run ] - meantime ) * ( times[ run ] - meantime ) / static_cast< double >( rep1 - 1 ); } vartime = sqrt( vartime ); std::cout << "In-place:" << std::endl; std::cout << "Mean = " << checksum( z, matrix->m() ) << std::endl; std::cout << "Time = " << meantime << " (average), \t" << mintime << " (fastest), \t" << vartime << " (stddev) ms. " << std::endl; const double avgspeed = static_cast< double >( 2*matrix->nzs() ) / meantime / 1000000.0; const double minspeed = static_cast< double >( 2*matrix->nzs() ) / mintime / 1000000.0; const double varspeed = fabs( avgspeed - static_cast< double >( 2*matrix->nzs() ) / (meantime - vartime) / 1000000.0 ); std::cout << "Speed = " << avgspeed << " (average), \t" << minspeed << " (fastest), \t" << varspeed << " (variance) Gflop/s." << std::endl; const size_t memuse1 = matrix->bytesUsed() + sizeof( double ) * 2 * matrix->nzs(); const double avgmem1 = static_cast< double >( 1000*memuse1 ) / meantime / 1073741824.0; const double minmem1 = static_cast< double >( 1000*memuse1 ) / mintime / 1073741824.0; const double varmem1 = fabs( avgmem1 - static_cast< double >( 1000*memuse1 ) / (meantime-vartime) / 1073741824.0 ); std::cout << " " << avgmem1 << " (average), \t" << minmem1 << " (fastest), \t" << varmem1 << " (variance) Gbyte/s (upper bound)." << std::endl; const size_t memuse2 = matrix->bytesUsed() + sizeof( double ) * ( matrix->m() + matrix->n() ); const double avgmem2 = static_cast< double >( 1000*memuse2 ) / meantime / 1073741824.0; const double minmem2 = static_cast< double >( 1000*memuse2 ) / mintime / 1073741824.0; const double varmem2 = fabs( avgmem2 - static_cast< double >( 1000*memuse2 ) / (meantime-vartime) / 1073741824.0 ); std::cout << " " << avgmem2 << " (average), \t" << minmem2 << " (fastest), \t" << varmem2 << " (variance) Gbyte/s (lower bound)." << std::endl; delete [] times; #ifdef INTERLEAVE_X if( scheme == 13 || scheme == 14 || scheme == 15 || scheme == 16 || scheme == 17 || scheme == 18 ) { numa_free( x, matrix->n() * sizeof( double ) ); } else #endif _mm_free( x ); if( scheme == 12 || scheme == 13 || scheme == 14 || scheme == 15 || scheme == 16 || scheme == 17 || scheme == 18 ) { #ifdef _NO_LIBNUMA _mm_free( z ); #else numa_free( z, matrix->m() * sizeof( double ) ); #endif } else { _mm_free( z ); } _mm_free( c ); delete matrix; delete checkm; return EXIT_SUCCESS; }
jcho::Matrix<T>::Matrix(const Matrix &a) : _storage(NULL), _m(a.m()), _n(a.n()), _size(a.size()) { _storage = new double[_size]; memcpy(_storage, a._storage, _size * sizeof(double)); }
Matrix::Matrix( const Matrix& m ) // copy constructor { Matrix(m.m(), m.n()); memcpy(_matrix, m._matrix,sizeof(float)*_m*_n); }
void L1Graph::L1Minimization(SpMatrix<float>& alpha,std::vector<int>& neighborhood) { //neighborhood contains cidx int p_num=index_->size(); int f_dim=(*features_)[0].size(); Matrix<float> X(f_dim, p_num - 1); //construct matrix X std::map<int,int> map_gl; //map between global and local point index for(int j = 0, k = 0; j < p_num; j++) { //cols int id=(*index_)[j]; if(id==cidx_) { continue; } Util::StdVector2MatrixCol((*features_)[id], X, k); map_gl.insert(make_pair<int,int>(k,id)); ++k; } //TODO:dont use I //construct I // Matrix<float> I(f_dim,f_dim); // I.eye(); // Util::Matrix2File(I,"I.txt"); // Matrix<float> B(f_dim, f_dim + p_num - 1); // X.merge(I, B); // Util::Matrix2File(B,"B.txt"); //clean // X.clear(); // I.clear(); //using lasso Matrix<float> x; //x=Ba Util::StdVector2Matrix((*features_)[cidx_], x); float lambda = 0.01; //lambda int L = (*features_)[0].size(); //max non-zero number // int L = 20; //max non-zero number SpMatrix<float> all; //TODO:debug in matlab ofstream fout("f.txt"); for (int i = 0; i < x.m(); i++) { for (int j = 0; j < x.n(); j++) { fout<<x(i,j)<<" "; } fout<<std::endl; } fout.close(); ofstream dout("d.txt"); for (int i = 0; i < X.m(); i++) { for (int j = 0; j < X.n(); j++) { dout<<X(i,j)<<" "; } dout<<std::endl; } dout.close(); lasso2<float>(x, X, all, L, lambda , 0 , PENALTY , true); //TODO:X->B Util::SubSpMatrix(all,alpha,p_num-1); // all.print("all"); // alpha.print("alpha"); // getchar(); Matrix<float> tmp; X.mult(alpha,tmp); x.add(tmp,-1); std::cout<<(0.5*x.normFsq())<<" "<<(lambda*alpha.asum())<<" "<<(0.5*x.normFsq())/(lambda*alpha.asum())<<std::endl; //save for vis std::vector<int> mk; std::vector<float> mv; for(int ii = 0; ii < alpha.n(); ++ii) { //TODO:calls for pB and pE are not consist for(int j = alpha.pB(ii); j < alpha.pE()[ii]; ++j) { //<i,j>->all.v(j) mk.push_back(map_gl[j]); mv.push_back(alpha.v(j)); neighborhood.push_back(map_gl[j]); } } std::string namev = "debug/" + boost::lexical_cast<std::string>(cidx_) + ".xyznq"; ofstream ov(namev.c_str()); for(int ii = 0; ii < cloud_->size(); ++ii) { if(ii == cidx_) { ov << cloud_->points[ii].x << " " << cloud_->points[ii].y << " " << cloud_->points[ii].z << " 0 0 0 1" << std::endl; } else { std::vector<int>::iterator it = find(mk.begin(), mk.end(), ii); if(it != mk.end()) { int dis = std::distance(mk.begin(), it); ov << cloud_->points[ii].x << " " << cloud_->points[ii].y << " " << cloud_->points[ii].z << " 0 0 0 " << mv[dis] << std::endl; // ov << cloud_->points[ii].x <<" " << cloud_->points[ii].y << " " << cloud_->points[ii].z << " 0 0 0 0" << std::endl; } else { ov << cloud_->points[ii].x << " " << cloud_->points[ii].y << " " << cloud_->points[ii].z << " 0 0 0 -1" << std::endl; } } } ov.close(); return ; } /* ----- end of method L1Graph::L1Minimization ----- */
int main(){ header( "Basic assignment" );{ header( "Constructors" );{ Matrix a; test( a.m() == 0 && a.n() == 0, "Test the base constructor." ); Matrix b( 1, 1 ); test( b.m() == 1 && b.n() == 1, "Test the common constructor." ); Matrix c(b); test( c.m() == 1 && c.n() == 1, "Test the copy constructor." ); } footer(); header( "Input" );{ Matrix a( 4, 4 ); test( a.print_test() == "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "Empty matrix." ); Matrix b; { Matrix c( 4, 4 ); c << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10 << 11 << 12 << 13 << 14 << 15 << 16; test( c.print_test() == "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", "Basic 4x4." ); b = c; } test( b.print_test() == "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", "Op = test" ); } footer(); header( "Resize" );{ Matrix a( 2, 2 ); a << 1 << 2 << 3 << 4; a.resize( 2, 2 ); test( a.print_test() == "1 2 3 4", "Same size resize." ); a.resize( 3, 3 ); test( a.print_test() == "1 2 0 3 4 0 0 0 0", "Scaled up resize." ); a.print(); a.resize( 1, 1 ); test( a.print_test() == "1", "Scaled down resize." ); a.resize( 2, 2 ); } footer(); header( "Submatrix" );{ Matrix a( 3, 3 ); a << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9; Matrix b = a.submatrix(1,1,2,2); test( b.print_test() == "1 2 4 5", "Basic submatrix" ); Matrix c = a.submatrix(2,2,2,2); test( c.print_test() == "5 6 8 9", "Basic submatrix #2" ); test( a.SUBMATRIX(1,1,2,3).print_test() == "1 2 3 4 5 6", "Immediate submatrix" ); test( a.SUBMATRIX(1,1,2,1).print_test() == "1 4", "Immediate submatrix #2" ); } footer(); } footer(); header( "Mathematical functions" );{ header( "Addition" );{ Matrix a(2, 2); a << 1 << 2 << 3 << 4; Matrix b(2, 2); b << 4 << 3 << 2 << 1; Matrix c = b + a; test( c.print_test() == "5 5 5 5", "Basic addition" ); c = a + b; test( c.print_test() == "5 5 5 5", "Addition is transitive" ); b.resize(1, 1); c = b + a; test( c.error() == true, "Unable to add unlike matrices" ); b.resize(2, 2); c = a + b; test( c.print_test() == "5 2 3 4", "Basic addition after a resize" ); test( a.print_test() == "1 2 3 4", " A is unchanged by addition" ); a += b; test( a.print_test() == "5 2 3 4", "Immediate addition." ); } footer(); header( "Subtraction" );{ Matrix a( 2, 2 ); a << 1 << 2 << 3 << 4; Matrix b( 2, 2 ); b << 1 << 1 << 1 << 1; Matrix c( 1, 1 ); test( (a - b).print_test() == "0 1 2 3", "Basic subtraction" ); test( (b - a).print_test() != "0 1 2 3", "Subtraction is NOT transitive" ); c = b + c; test( c.error() == true, "Unable to subtract unlike matrices" ); a -= b; test( a.print_test() == "0 1 2 3", "Immediate subtraction." ); } footer(); header( "Multiplication" );{ Matrix a( 2, 3 ); a << 2 << 4 << 3 << -2 << 3 << 1; Matrix b( 3, 4 ); b << 2 << 4 << 5 << 6 << -2 << 1 << -2 << 1 << -9 << -9 << -9 << 1; Matrix c( 2, 2 ); c << 1 << 2 << 4 << 1; Matrix d( 2, 2 ); d << 3 << 1 << 3 << 0; test( (a * b).print_test()=="-31 -15 -25 19 -19 -14 -25 -8","Basic multiplication"); test( (b*a).error() == true, "Can NOT multiply improper matrices" ); test( (c*d).print_test() != (d*c).print_test(), "Multiplication is NOT transitive"); test( (a * 2).print_test() == "4 8 6 -4 6 2", "Basic scalar multiplication" ); test( a.print_test() == "2 4 3 -2 3 1", " A is unchanged by multiplication" ); c *= a; test( c.print_test() == "-2 10 5 6 19 13","Immediate multiplication"); c *= 2; test( c.print_test() == "-4 20 10 12 38 26", "Immediate scalar multiplication" ); } footer(); header( "Division" );{ } footer(); header( "Exponent" );{ } footer(); header( "Transpose" );{ Matrix a( 2, 3 ); a << 4 << 3 << 2 << 1 << 5 << 6; Matrix b = +a; test( a.m() == b.n() && a.n() == b.m(), "Basic transpose" ); ++a; test( a == b, "Immediate transpose" ); } footer(); header( "Inverse" );{ Matrix a( 3,3 ); a << 0 << 1 << 2 << 1 << 0 << 3 << 4 << -3 << 8; Matrix b = ~a; test( b.print_test() == "-4.5 7 -1.5 -2 4 -1 1.5 -2 0.5", "Inverse" ); test( (!a).print_test() == "-4.5 7 -1.5 -2 4 -1 1.5 -2 0.5", "Immediate inverse" ); } footer(); header( "Determinant" );{ Matrix c(1,1); c << 1; test( c.det() == 1, "Determinant test 1x1" ); Matrix b(2,2); b << 3 << 4 << 1 << 2; test( b.det() == 2, "Determinant test 2x2" ); Matrix a(3,3); a << 1 << 2 << 0 << 4 << 5 << 6 << 7 << 8 << 9; test( a.det() == 9, "Determinant test 3x3" ); Matrix d(6,6); d << 0 << 9 << 3 << 4 << 1 << 2 << 0 << 2 << 1 << 5 << 5 << 2 << 3 << 4 << 1 << 1 << 7 << 3 << 9 << 4 << 2 << 4 << 7 << 6 << 6 << 1 << 8 << 2 << 7 << 3 << 3 << 7 << 2 << 7 << 3 << 4; test( d.det() == 330, "Determinant test 6x6" ); } footer(); } footer(); cout << endl << "Tests passed: " << passed << endl << "Tests failed: " << failed << endl; return 0; }
double FastMatmul(Matrix<Scalar>& A, Matrix<Scalar>& B, Matrix<Scalar>& C, int num_steps, double x=1e-8, Scalar alpha=Scalar(1.0), Scalar beta=Scalar(0.0)) { MemoryManager<Scalar> mem_mngr; #ifdef _PARALLEL_ mem_mngr.Allocate(2, 2, 2, 8, num_steps, A.m(), A.n(), B.n()); #endif A.set_multiplier(alpha); int num_multiplies_per_step = 8; int total_multiplies = pow(num_multiplies_per_step, num_steps); // Set parameters needed for all types of parallelism. int num_threads = 0; #ifdef _PARALLEL_ # pragma omp parallel { if (omp_get_thread_num() == 0) { num_threads = omp_get_num_threads(); } } omp_set_nested(1); #endif #if defined(_PARALLEL_) && (_PARALLEL_ == _BFS_PAR_) # pragma omp parallel { mkl_set_num_threads_local(1); mkl_set_dynamic(0); } #endif #if defined(_PARALLEL_) && (_PARALLEL_ == _DFS_PAR_) mkl_set_dynamic(0); #endif #if defined(_PARALLEL_) && (_PARALLEL_ == _HYBRID_PAR_) if (num_threads > total_multiplies) { mkl_set_dynamic(0); } else { # pragma omp parallel { mkl_set_num_threads_local(1); mkl_set_dynamic(0); } } #endif LockAndCounter locker(total_multiplies - (total_multiplies % num_threads)); using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>; auto t1 = std::chrono::high_resolution_clock::now(); #ifdef _PARALLEL_ # pragma omp parallel { # pragma omp single #endif FastMatmulRecursive(locker, mem_mngr, A, B, C, num_steps, num_steps, 0, x, num_threads, beta); #ifdef _PARALLEL_ } #endif auto t2 = std::chrono::high_resolution_clock::now(); return FpMilliseconds(t2 - t1).count(); }
jcho::Matrix<double> jcho::Matrix<double>::linear_least_squares(const Matrix<double> &a) const { if (a.m() != m()) throw Exception("linear_least_squares(const Matrix<T> &): This operation can only be applied when a.m() == this->m()."); return least_squares<dgels_>(a); }
Matrix::Matrix( const Matrix& m ) // copy constructor { _m = m.m(); _n = m.n(); _matrix = m.matrix(); }
void DMDModel<Type>::printMatrix(const Matrix<Form, Type2>& matrix, word name, label r, label c, label nRows, label nColumns) { if (r >= matrix.n() || r < 0) { FatalErrorIn ( "printMatrix(" "Matrix<Form, Type2>& matrix," "word name, " "label r, " "label c, " "label rows, " "label columns " ) << "r is greater than the number of rows in matrix! " << "r must have a value between 0 and matrix.n()-1" << abort(FatalError); } if (c >= matrix.m() || c < 0) { FatalErrorIn ( "printMatrix(" "Matrix<Form, Type2>& matrix," "word name, " "label r, " "label c, " "label rows, " "label columns " ) << "c is greater than the number of columns in matrix! " << "c must have a value between 0 and matrix.m()-1" << abort(FatalError); } if (nRows < 0) { FatalErrorIn ( "printMatrix(" "Matrix<Form, Type2>& matrix," "word name, " "label r, " "label c, " "label rows, " "label columns " ) << "nRows must have a value greater or equal 0 " << abort(FatalError); } if (nColumns < 0) { FatalErrorIn ( "printMatrix(" "Matrix<Form, Type2>& matrix," "word name, " "label r, " "label c, " "label rows, " "label columns " ) << "nColumns must have a value greater or equal 0 " << abort(FatalError); } Info << endl; Info << endl; Info << "Matrix Coeffs of " << name << endl; Info << endl; if (r+nRows > matrix.n() || nRows < 1) { nRows = matrix.n() - r; } if (c+nColumns > matrix.m() || nColumns < 1) { nColumns = matrix.m() - c; } for (label i = r; i< (r+nRows); i++) { for (label j = c; j<(c+nColumns); j++) { Info << setw(8) << matrix[i][j] << " "; } Info << endl; } Info << endl; }