//--------------------------------------------------------------------------- // Multiply this matrix by the rArg and place result to vector NaVector& NaMatrix::multiply (NaVector& rArg, NaVector& rRes) const { if(dim_cols() != rArg.dim()) throw(na_size_mismatch); rRes.new_dim(dim_rows()); unsigned iR, iC; for(iR = 0; iR < dim_rows(); ++iR){ rRes[iR] = 0.0; for(iC = 0; iC < dim_cols(); ++iC) rRes[iR] += *(row_ptr(iR) + iC) * rArg[iC]; } #if 0 if(dim_rows() != rArg.dim()) throw(na_size_mismatch); rRes.new_dim(dim_cols()); unsigned iR, iC; for(iR = 0; iR < nDimRow; ++iR){ rRes[iR] = 0.0; for(iC = 0; iC < nDimCol; ++iC) rRes[iR] += *(row_ptr(iR) + iC) * rArg[iC]; } #endif return rRes; }
//--------------------------------------------------------------------------- // Print contents of the array via NaPrintLog() facility void NaMatrix::print_contents () const { unsigned iR, iC; NaPrintLog("Matrix(this=%p, rows=%u, cols=%u):\n", this, nDimRow, nDimCol); for(iR = 0; iR < nDimRow; ++iR){ for(iC = 0; (int)iC < (int)nDimCol - 1; ++iC) NaPrintLog("\t%g", *(row_ptr(iR) + iC)); NaPrintLog("\t%g\n", *(row_ptr(iR) + nDimCol - 1)); } }
void img::PixMat::verticalFlip() { int lastRow = height_ - 1; guint8* temp = new guint8[rowstride_]; for (int i = 0; i < height_ / 2; i++) { guint8* rowA = row_ptr(i); guint8* rowB = row_ptr(lastRow - i); memcpy(temp, rowA, rowstride_); memcpy(rowA, rowB, rowstride_); memcpy(rowB, temp, rowstride_); } delete[] temp; }
//--------------------------------------------------------------------------- // Initialization of the diagonal items by the given value and other ones by 0 void NaMatrix::init_diag (NaReal v) { unsigned i, j; for(i = 0; i < nDimRow; ++i){ for(j = 0; j < nDimCol; ++j){ if(i == j) row_ptr(i)[j] = v; else row_ptr(i)[j] = 0.0; } } }
//--------------------------------------------------------------------------- // Access to the value of the item NaReal NaMatrix::get (unsigned iR, unsigned iC) const { if(invalid(iR, iC)) throw(na_out_of_range); return *(row_ptr(iR) + iC); }
//--------------------------------------------------------------------------- // Access to the item NaReal* NaMatrix::operator[] (unsigned iR) { if(invalid(iR, 0)) throw(na_out_of_range); return row_ptr(iR); }
//--------------------------------------------------------------------------- // Access to the item NaReal& NaMatrix::fetch (unsigned iR, unsigned iC) { if(invalid(iR, iC)) throw(na_out_of_range); return *(row_ptr(iR) + iC); }
double LpEq(int m, int n, int nnz) { std::vector<T> val(nnz); std::vector<int> col_ind(nnz); std::vector<int> row_ptr(m + 2); std::vector<T> x(n); std::vector<T> y(m + 1); std::default_random_engine generator; std::uniform_real_distribution<T> u_dist(static_cast<T>(0), static_cast<T>(1)); // Enforce c == rand(n, 1) std::vector<std::tuple<int, int, T>> entries; entries.reserve(n); for (int i = 0; i < n; ++i) { entries.push_back(std::make_tuple(m, i, u_dist(generator))); } // Generate A and c according to: // A = 4 / n * rand(m, n) nnz = MatGenApprox(m + 1, n, nnz, val.data(), row_ptr.data(), col_ind.data(), static_cast<T>(0), static_cast<T>(4.0 / n), entries); pogs::MatrixSparse<T> A_('r', m + 1, n, nnz, val.data(), row_ptr.data(), col_ind.data()); pogs::PogsIndirect<T, pogs::MatrixSparse<T>> pogs_data(A_); std::vector<FunctionObj<T> > f; std::vector<FunctionObj<T> > g; // Generate b according to: // v = rand(n, 1) // b = A * v std::vector<T> v(n); for (unsigned int i = 0; i < n; ++i) v[i] = u_dist(generator); f.reserve(m + 1); for (unsigned int i = 0; i < m; ++i) { T b_i = static_cast<T>(0); for (unsigned int j = row_ptr[i]; j < row_ptr[i + 1]; ++j) b_i += val[j] * v[col_ind[j]]; f.emplace_back(kIndEq0, static_cast<T>(1), b_i); } f.emplace_back(kIdentity); g.reserve(n); for (unsigned int i = 0; i < n; ++i) g.emplace_back(kIndGe0); double t = timer<double>(); pogs_data.Solve(f, g); return timer<double>() - t; }
//--------------------------------------------------------------------------- // Add a matrix to this one NaMatrix& NaMatrix::add (const NaMatrix& rMatr) { if(dim_rows() != rMatr.dim_rows() || dim_cols() != rMatr.dim_cols()) throw(na_size_mismatch); unsigned iR, iC; for(iR = 0; iR < nDimRow; ++iR) for(iC = 0; iC < nDimCol; ++iC) *(row_ptr(iR) + iC) += *(rMatr.row_ptr(iR) + iC); return *this; }
// multigrid v-cycle void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz, cdouble hx, cdouble hy, cdouble hz, cdouble hx2i, cdouble hy2i, cdouble hz2i, cdouble tol, cuint max_iteration, cuint pre_smooth_iteration, cdouble lx, cdouble ly, cdouble lz, cuint level, cuint max_level, double* F, double& Er, double* Uss, double* Vss, double* Wss, cdouble bcs[][6], cdouble dt ) { cout<<"level: "<<level<<" n_dof: "<<n_dof<<endl; // initialize finite difference matrix (+1 for global constraint) // double** M = new double*[n_dof]; // for(int n = 0; n < (n_dof); n++) // M[n] = new double[n_dof]; // // initialize // #pragma omp parallel for shared(n_dof, M) // for(int i=0; i<n_dof; i++) // for(int j=0; j<n_dof; j++) // M[i][j] = 0; cout<<"fd_matrix_sparse"<<endl; vector<tuple <uint, uint, double> > M_sp; vector<double> val; vector<uint> col_ind; vector<uint> row_ptr(1,0); // create finite difference matrix cout<<"create finite difference matrix"<<endl; // build pressure matrix pressure_matrix( M_sp, val, col_ind, row_ptr, nx, ny, nz, hx2i, hy2i, hz2i, n_dof ); // construct load vector // load vector is created only at the level 0 if(level==0){ F = new double[n_dof]; cout<<"create load vector"<<endl; pressure_rhs(F, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt); // load_vector(F, n_dof, I,J,K ); } // cout<<"save matrix and vector"<<endl; // char matrix_file[100]; // char vector_file[100]; // sprintf(vector_file, "vector_%i.dat", level); // if(write_vector(n_dof,F,vector_file)) cout<<"write_vector fail"<<endl; // construct solution vector double* U; if(level==0) U=P; else U = new double[n_dof]; double* U_tmp = new double[n_dof]; // initial guess #pragma omp parallel for shared(U, U_tmp) num_threads(nt) for(int n=0; n<n_dof; n++){ U[n] = 0.0; U_tmp[n] = 0.0; } // residual and error double* R = new double[n_dof]; // perform pre-smoothing and compute residual cout<<"pre-smoothing "<<pre_smooth_iteration<<" times"<<endl; Er = tol*10; jacobi_sparse(tol, pre_smooth_iteration, n_dof, U, U_tmp, val, col_ind, row_ptr, F, Er, R); // restriction of residual on coarse grid double* F_coar; // Restrict the residual cuint nx_coar = (nx)/2; cuint ny_coar = (ny)/2; cuint nz_coar = (nz)/2; uint n_dof_coar = nx_coar*ny_coar*nz_coar; F_coar = new double[n_dof_coar]; // mesh size cdouble hx_coar = lx/(nx_coar); cdouble hy_coar = ly/(ny_coar); cdouble hz_coar = lz/(nz_coar); // inverse of square of mesh sizes cdouble hx2i_coar = 1.0/(hx_coar*hx_coar); cdouble hy2i_coar = 1.0/(hy_coar*hy_coar); cdouble hz2i_coar = 1.0/(hz_coar*hz_coar); // restric residual to the coarrse grid cout<<"restriction"<<endl; restriction( R, F_coar, nx, ny, nz, nx_coar, ny_coar, nz_coar); // construct solution vector on coarse grid double* U_coar = new double[n_dof_coar]; double* U_coar_tmp = new double[n_dof_coar]; // if the grid is coarsest if( level==max_level){ cout<<"level: "<<level+1<<" n_dof: "<<n_dof_coar<<endl; // initial guess #pragma omp parallel for shared(U_coar, U_coar_tmp) num_threads(nt) for(int n=0; n<n_dof_coar; n++){ U_coar[n] = 0.0; U_coar_tmp[n] = 0.0; } vector<tuple <uint, uint, double> > M_sp_coar; vector<double> val_coar; vector<uint> col_ind_coar; vector<uint> row_ptr_coar(1,0); // create finite difference matrix cout<<"create finite difference matrix"<<endl; // fd_matrix_sparse(M_sp_coar, val_coar, col_ind_coar, row_ptr_coar, // nx_coar,ny_coar,nz_coar, // hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar ); pressure_matrix( M_sp_coar, val_coar, col_ind_coar, row_ptr_coar, nx_coar, ny_coar, nz_coar, hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar ); // residual on coarse grid double* R_coar = new double[n_dof_coar]; // exact Jacobi method Er = tol*10; jacobi_sparse(tol, max_iteration, n_dof_coar, U_coar, U_coar_tmp, val_coar, col_ind_coar, row_ptr_coar, F_coar, Er, R_coar); // write_results( U_coar, // n_dof_coar, // I_coar, J_coar, K_coar, // dx_coar, dy_coar, dz_coar, level); delete[] R_coar; // cout<<"R"<<endl; // for(int i=0; i<n_dof; i++) // cout<<R[i]<<endl; } else{ // v_cycle on the coarse grid v_cycle( U_coar, n_dof_coar, nx_coar, ny_coar, nz_coar, hx_coar, hy_coar, hz_coar, hx2i_coar, hy2i_coar, hz2i_coar, tol, max_iteration, pre_smooth_iteration, lx, ly, lz, level+1, max_level, F_coar, Er, Uss, Vss, Wss, bcs, dt ); cdouble dx_coar = lx/(nx_coar); cdouble dy_coar = ly/(ny_coar); cdouble dz_coar = lz/(nz_coar); // // write partial results for test purpose // write_results( U_coar, // n_dof_coar, // I_coar, J_coar, K_coar, // dx_coar, dy_coar, dz_coar, level); } // interpolate to fine grid double* E = new double[n_dof]; interpolation(U_coar, E, nx_coar,ny_coar,nz_coar, nx, ny, nz); // correct the fine grid approximation #pragma omp parallel for shared(U,E) num_threads(nt) for(int i=0; i<n_dof; i++){ // cout<<i<<" "<<U[i]<<" "<<E[i]<<" "<<E[i]/U[i]<<endl; U[i] += E[i]; } // perform post-smoothing and compute residual uint post_smooth_iteration; // if(level==0) post_smooth_iteration=max_iteration; // else // post_smooth_iteration=( pre_smooth_iteration+1)*1000; cout<<"post-smoothing "<<post_smooth_iteration<<" times on level " <<level<<endl; // jacobi(tol, post_smooth_iteration, n_dof, U, U_tmp, M, F, Er, R); Er = tol*10; jacobi_sparse(tol, post_smooth_iteration, n_dof, U, U_tmp, val, col_ind, row_ptr, F, Er, R); // cleanup if (level==0) delete[] F; delete[] U_tmp; delete[] R, F_coar; delete[] E; delete[] U_coar, U_coar_tmp; }
void tImgLinear::insert_data( uint8_t const* data_, uint32_t size_, uint32_t offset_ ) { uint32_t sxp = pitch(); uint32_t sx = size().x(); if( _ImgFormat == PixelFormat::rgb() ) { sx *= PixelFormat::rgb().val(); } if( _ImgFormat == PixelFormat::rgba() ) { sx *= PixelFormat::rgba().val(); } if( sxp == sx ) { // sx == pitch, no pdading image int b = bytes(); if( size_ > 0 && size_ + offset_ <= bytes() && _is_allocated ) { uint8_t* pdata = _data + offset_; memcpy( pdata, data_, size_ ); } return; } if( size_ <= sx ) { // fits in one line uint8_t* pdata = _data + offset_; memcpy( pdata, data_, size_ ); return; } uint32_t pitchdiff = pitch() - sx; uint32_t line = ( offset_ / sx ); uint32_t offset_intern = offset_ + line * pitchdiff; uint32_t off_remainder = offset_intern - ( offset_intern / pitch() ) * pitch() ; // offset starts at pitch boundary if( off_remainder == 0 ) { vector< vector<uint8_t> > _vv; uint32_t i = 0; uint8_t const* data_input = data_; while( i < size_ && ( i <= ( size_ - sx ) ) ) { vector<uint8_t> v( data_input, data_input + sx ); _vv.push_back( v ); i += sx; data_input += sx; } uint32_t i_remainder = i - ( i / sx ) * sx ; if( i_remainder > 0 ) { vector<uint8_t> v1( data_input, data_input + i_remainder ); _vv.push_back( v1 ); } for( vector<uint8_t> v8 : _vv ) { uint8_t* row = row_ptr( line ); memcpy( row, &v8[0], v8.size() ); line++; } } }
row_data row (int y) const { return row_data(0, safe_cast<int>(data_.width() - 1), row_ptr(y)); }
uint8_t const* row_ptr(int, int y, unsigned) {return row_ptr(y);}