CToken::range_type CToken::next_range() { skip(); string::size_type start = cur_; string::size_type tmp = cur_; if ( cur_ != string::npos ) cur_ = str_.find_first_of(del_,cur_); if ( cur_ == string::npos ) return range_type(start,str_.length()-start); if ( ret_ && start == cur_ && del_.find(str_[cur_]) != string::npos ) ++cur_; return range_type(start,cur_-start); }
/* //case other.col is odd 3 ) else if ( other.col() & 1 ) { //<1> if ( other.col() & 2 ) {...} //<2> else { cut the last col from other get two new matrices [new_other < - other] [C, OC - 1] [last_col < - other] [C, 1] do the multications [new_ans < - this * new_other] [R, OC - 1] [last_col_ans < - this * last_col] [R, 1] merge the two matrices [ans < - new_ans | last_col_ans] [R, 1] [i.e. last_col_ans as the last col of ans] } } */ zen_type& oc2( const zen_type& other ) { zen_type& zen = static_cast<zen_type&>( *this ); const zen_type new_other( other, range_type( 0, other.row() ), range_type( 0, other.col() - 1 ) ); const zen_type last_col( other, range_type( 0, other.row() ), range_type( other.col() - 1, other.col() ) ); const zen_type& new_ans = zen * new_other; const zen_type& last_col_ans = zen * last_col; const zen_type& ans = new_ans || last_col_ans; zen.clone( ans, 0, zen.row(), 0, other.col() ); return zen; }
/* 1 ) else if ( row() & 1 ) { //<1> if ( row() & 2 ) {...} //<2> else { cut last row from the matrix, get two new matrices [ new_this < - this ] [R - 1, C] [ last_row < - this ] [1, C] do the multiplications [ new_ans < - new_this * other ] [R - 1, OC] [ last_row_ans < - last_row * other ] [1, OC] merge the two matrices to generate the ans [ ans < - new_ans | last_row_ans ] [R, OC] [ i.e. last_row_ans appended to new_ans as the last row ] } } */ zen_type& rr2( const zen_type& other ) { zen_type& zen = static_cast<zen_type&>( *this ); const zen_type new_this( zen, range_type( 0, zen.row() - 1 ), range_type( 0, zen.col() ) ); const zen_type last_row( zen, range_type( zen.row() - 1, zen.row() ), range_type( 0, zen.col() ) ); const zen_type& new_ans = new_this * other; const zen_type& last_row_ans = last_row * other; const zen_type& ans = new_ans && last_row_ans; zen.clone( ans, 0, zen.row(), 0, other.col() ); return zen; }
/*for each range in the option ,set up the to_print array*/ int set_each_range(const char range[], int to_print[]) { int type; int pos; int counter; int i; type = range_type(range); if (type == 0) { fprintf(stderr,"invalid option range %s\n", range); return 0; } sscanf(range, "%d:%d", &pos, &counter); if (pos >= 1024) return 1; if (type == 1) { to_print[pos - 1] = 1; return 1; } if (type == 2) { for (i = pos - 1; i < 1024; i++) to_print[i] = 1; return 1; } for (i = pos - 1; i < pos + counter - 1 && i < 1024; i++) to_print[i] = 1; return 1; }
void range_run<Char>::clear(range_type const& range) { BOOST_ASSERT(is_valid(range)); if (!run.empty()) { // search the ranges for one that potentially includes 'range' typename storage_type::iterator iter = std::upper_bound( run.begin(), run.end(), range, range_compare<range_type>() ); // 'range' starts with or after another range: if (iter != run.begin()) { typename storage_type::iterator const left_iter = iter-1; // 'range' starts after '*left_iter': if (left_iter->first < range.first) { // if 'range' is completely included inside '*left_iter': // need to break it apart into two ranges (punch a hole), if (left_iter->last > range.last) { Char save_last = left_iter->last; left_iter->last = range.first-1; run.insert(iter, range_type(range.last+1, save_last)); return; } // if 'range' contains 'left_iter->last': // truncate '*left_iter' (clip its right) else if (left_iter->last >= range.first) { left_iter->last = range.first-1; } } // 'range' has the same left bound as '*left_iter': it // must be removed or truncated by the code below else { iter = left_iter; } } // remove or truncate subsequent ranges that overlap with 'range': typename storage_type::iterator i = iter; // 1. skip subsequent ranges completely included in 'range' while (i != run.end() && i->last <= range.last) ++i; // 2. clip left of next range if overlapping with 'range' if (i != run.end() && i->first <= range.last) i->first = range.last+1; // erase all ranges that 'range' contained run.erase(iter, i); } }
/* //case col is odd 2 ) else if ( col() & 1 ) { //<1> if ( zen.col() & 2 ) {...} //<2> else { cut last col of this from right side [new_this < - this] [R, C - 1] [last_col < - this] [R, 1] cut last row of other from downside [new_other < - other] [C - 1, OC] [last_row < - other] [1, OC] do the multiplicaitons [new_ans < - new_this * new_other] [R, OC] [res_col_row < - last_col * last_row] [R, OC] do the addition to generate ans [ans < - new_ans + res_col_row] [R, OC] } */ zen_type& cc2( const zen_type & other ) { zen_type& zen = static_cast<zen_type&>( *this ); //[new_this <- this] [R,C-1] const zen_type new_this( zen, range_type( 0, zen.row() ), range_type( 0, zen.col() - 1 ) ); //[last_col <- this] [R,1] const zen_type last_col( zen, range_type( 0, zen.row() ), range_type( zen.col() - 1, zen.col() ) ); //[new_other <- other] [C-1,OC] const zen_type new_other( other, range_type( 0, other.row() - 1 ), range_type( 0, other.col() ) ); //[last_row <- other] [1, OC] const zen_type last_row( other, range_type( other.row() - 1, other.row() ), range_type( 0, other.col() ) ); const zen_type& new_ans = new_this * new_other; const zen_type& res_col_row = last_col * last_row; const zen_type& ans = new_ans + res_col_row; zen.clone( ans, 0, zen.row(), 0, other.col() ); return zen; }
typename sge::sprite::intrusive::ordered::collection< Choices, Order >::range_type sge::sprite::intrusive::ordered::collection< Choices, Order >::range() { return range_type( fcppt::make_ref( collections_ ) ); }
static char * parse_class(Cpattern p, char *iptr) { int endchar, firsttime = 1; char *optr, *nptr; if (*iptr++ == '[') { endchar = ']'; /* TODO: surely [^]] is valid? */ if ((*iptr == '!' || *iptr == '^') && iptr[1] != ']') { p->tp = CPAT_NCLASS; iptr++; } else p->tp = CPAT_CCLASS; } else { endchar = '}'; p->tp = CPAT_EQUIV; } /* find end of class. End character can appear literally first. */ for (optr = iptr; optr == iptr || *optr != endchar; optr++) if (!*optr) return optr; /* * We can always fit the parsed class within the same length * because of the tokenization (including a null byte). * * As the input string is metafied, but shouldn't contain shell * tokens, we can just add our own tokens willy nilly. */ optr = p->u.str = zhalloc((optr-iptr) + 1); while (firsttime || *iptr != endchar) { int ch; if (*iptr == '[' && iptr[1] == ':' && (nptr = strchr((char *)iptr + 2, ':')) && nptr[1] == ']') { /* Range type */ iptr += 2; ch = range_type((char *)iptr, nptr-iptr); iptr = nptr + 2; if (ch != PP_UNKWN) *optr++ = STOUC(Meta) + ch; } else { /* characters stay metafied */ char *ptr1 = iptr; if (*iptr == Meta) iptr++; iptr++; if (*iptr == '-' && iptr[1] && iptr[1] != endchar) { /* a run of characters */ iptr++; /* range token */ *optr++ = Meta + PP_RANGE; /* start of range character */ if (*ptr1 == Meta) { *optr++ = Meta; *optr++ = ptr1[1] ^ 32; } else *optr++ = *ptr1; if (*iptr == Meta) { *optr++ = *iptr++; *optr++ = *iptr++; } else *optr++ = *iptr++; } else { if (*ptr1 == Meta) { *optr++ = Meta; *optr++ = ptr1[1] ^ 32; } else *optr++ = *ptr1; } } firsttime = 0; } *optr = '\0'; return iptr; }
/* 4 ) else { //strassen algorithm } */ zen_type& strassen_multiply( const zen_type& other ) { zen_type& zen = static_cast<zen_type&>( *this ); const size_type R_2 = zen.row() >> 1; const size_type C_2 = zen.col() >> 1; const size_type OR_2 = C_2; const size_type OC_2 = other.col() >> 1; const zen_type a_00( zen, range_type( 0, R_2 ), range_type( 0, C_2 ) ); const zen_type a_01( zen, range_type( 0, R_2 ), range_type( C_2, zen.col() ) ); const zen_type a_10( zen, range_type( R_2, zen.row() ), range_type( 0, C_2 ) ); const zen_type a_11( zen, range_type( R_2, zen.row() ), range_type( C_2, zen.col() ) ); const zen_type b_00( other, range_type( 0, OR_2 ), range_type( 0, OC_2 ) ); const zen_type b_01( other, range_type( 0, OR_2 ), range_type( OC_2, other.col() ) ); const zen_type b_10( other, range_type( OR_2, other.row() ), range_type( 0, OC_2 ) ); const zen_type b_11( other, range_type( OR_2, other.row() ), range_type( OC_2, other.col() ) ); const zen_type& Q_0 = ( a_00 + a_11 ) * ( b_00 + b_11 ); const zen_type& Q_1 = ( a_10 + a_11 ) * b_00; const zen_type& Q_2 = a_00 * ( b_01 - b_11 ); const zen_type& Q_3 = a_11 * ( -b_00 + b_10 ); const zen_type& Q_4 = ( a_00 + a_01 ) * b_11; const zen_type& Q_5 = ( -a_00 + a_10 ) * ( b_00 + b_01 ); const zen_type& Q_6 = ( a_01 - a_11 ) * ( b_10 + b_11 ); const zen_type& c_00 = Q_0 + Q_3 - Q_4 + Q_6; const zen_type& c_10 = Q_1 + Q_3; const zen_type& c_01 = Q_2 + Q_4; const zen_type& c_11 = Q_0 - Q_1 + Q_2 + Q_5; const zen_type& ans = ( c_00 || c_01 ) && ( c_10 || c_11 ); zen.clone( ans, 0, zen.row(), 0, other.col() ); return zen; }
int ComputeBasis_HGRAD_Vector(const ordinal_type nworkset, const ordinal_type C, const ordinal_type order, const bool verbose) { typedef Vector<VectorTagType> VectorType; typedef typename VectorTagType::value_type ValueType; constexpr int VectorLength = VectorTagType::length; Teuchos::RCP<std::ostream> verboseStream; Teuchos::oblackholestream bhs; // outputs nothing if (verbose) verboseStream = Teuchos::rcp(&std::cout, false); else verboseStream = Teuchos::rcp(&bhs, false); Teuchos::oblackholestream oldFormatState; oldFormatState.copyfmt(std::cout); typedef typename Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ; *verboseStream << "DeviceSpace:: "; DeviceSpaceType::print_configuration(*verboseStream, false); *verboseStream << "HostSpace:: "; HostSpaceType::print_configuration(*verboseStream, false); *verboseStream << "VectorLength:: " << (VectorLength) << "\n"; using BasisTypeHost = Basis_HGRAD_HEX_C1_FEM<HostSpaceType,ValueType,ValueType>; using ImplBasisType = Impl::Basis_HGRAD_HEX_C1_FEM; using range_type = Kokkos::pair<ordinal_type,ordinal_type>; constexpr size_t LLC_CAPACITY = 32*1024*1024; Intrepid2::Test::Flush<LLC_CAPACITY,DeviceSpaceType> flush; Kokkos::Impl::Timer timer; double t_vectorize = 0; int errorFlag = 0; BasisTypeHost hostBasis; const auto cellTopo = hostBasis.getBaseCellTopology(); auto cubature = DefaultCubatureFactory::create<DeviceSpaceType,ValueType,ValueType>(cellTopo, order); const ordinal_type numCells = C, numCellsAdjusted = C/VectorLength + (C%VectorLength > 0), numVerts = cellTopo.getVertexCount(), numDofs = hostBasis.getCardinality(), numPoints = cubature->getNumPoints(), spaceDim = cubature->getDimension(); Kokkos::DynRankView<ValueType,HostSpaceType> dofCoordsHost("dofCoordsHost", numDofs, spaceDim); hostBasis.getDofCoords(dofCoordsHost); const auto refNodesHost = Kokkos::subview(dofCoordsHost, range_type(0, numVerts), Kokkos::ALL()); // pertub nodes Kokkos::DynRankView<VectorType,HostSpaceType> worksetCellsHost("worksetCellsHost", numCellsAdjusted, numVerts, spaceDim); for (ordinal_type cell=0;cell<numCells;++cell) { for (ordinal_type i=0;i<numVerts;++i) for (ordinal_type j=0;j<spaceDim;++j) { ValueType val = (rand()/(RAND_MAX + 1.0))*0.2 -0.1; worksetCellsHost(cell/VectorLength, i, j)[cell%VectorLength] = refNodesHost(i, j) + val; } } auto worksetCells = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), worksetCellsHost); Kokkos::deep_copy(worksetCells, worksetCellsHost); Kokkos::DynRankView<ValueType,DeviceSpaceType> refPoints("refPoints", numPoints, spaceDim), refWeights("refWeights", numPoints); cubature->getCubature(refPoints, refWeights); std::cout << "===============================================================================\n" << " Performance Test evaluating ComputeBasis \n" << " # of workset = " << nworkset << "\n" << " Test Array Structure (C,F,P,D) = " << numCells << ", " << numDofs << ", " << numPoints << ", " << spaceDim << "\n" << "===============================================================================\n"; *verboseStream << "\n" << "===============================================================================\n" << "TEST 1: evaluateFields vector version\n" << "===============================================================================\n"; try { Kokkos::DynRankView<ValueType,DeviceSpaceType> refBasisValues("refBasisValues", numDofs, numPoints), refBasisGrads ("refBasisGrads", numDofs, numPoints, spaceDim); ImplBasisType::getValues<DeviceSpaceType>(refBasisValues, refPoints, OPERATOR_VALUE); ImplBasisType::getValues<DeviceSpaceType>(refBasisGrads, refPoints, OPERATOR_GRAD); const ordinal_type ibegin = -3; // testing vertical approach { Kokkos::DynRankView<VectorType,DeviceSpaceType> weightedBasisValues("weightedBasisValues", numCellsAdjusted, numDofs, numPoints), weightedBasisGrads ("weightedBasisGrads", numCellsAdjusted, numDofs, numPoints, spaceDim); typedef F_hgrad_eval<VectorType,ValueType,DeviceSpaceType> FunctorType; using range_policy_type = Kokkos::Experimental::MDRangePolicy < DeviceSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >; range_policy_type policy( { 0, 0 }, { numCellsAdjusted, numPoints } ); FunctorType functor(weightedBasisValues, weightedBasisGrads, refBasisGrads, worksetCells, refWeights, refBasisValues, refBasisGrads); for (ordinal_type iwork=ibegin;iwork<nworkset;++iwork) { flush.run(); DeviceSpaceType::fence(); timer.reset(); Kokkos::parallel_for(policy, functor); DeviceSpaceType::fence(); t_vectorize += (iwork >= 0)*timer.seconds(); } } } catch (std::exception err) { *verboseStream << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; *verboseStream << err.what() << '\n'; *verboseStream << "-------------------------------------------------------------------------------" << "\n\n"; errorFlag = -1000; } std::cout << "TEST HGRAD " << " t_vectorize = " << (t_vectorize/nworkset) << std::endl; if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; // reset format state of std::cout std::cout.copyfmt(oldFormatState); return errorFlag; }