KOKKOS_INLINE_FUNCTION void operator() (const int i) const { left_subtype subA=subview(A, i, Kokkos::ALL(), Kokkos::ALL()); left_subtype subB=subview(B, i, Kokkos::ALL(), Kokkos::ALL()); left_subtype subC=subview(C, i, Kokkos::ALL(), Kokkos::ALL()); //#pragma loop(ivdep) for (std::size_t i = 0; i != static_cast<std::size_t>(msize); ++i) { for (std::size_t k = 0; k != static_cast<std::size_t>(ksize); ++k) { const Scalar r = subA(i,k); for (std::size_t j = 0; j != static_cast<std::size_t>(nsize); ++j) { subC(i,j)=beta*subC(i,j)+alpha*r*subB(k,j); } } } }
KOKKOS_INLINE_FUNCTION void operator() (const int i) const { left_subtype subA=subview(A, i, Kokkos::ALL(), Kokkos::ALL()); left_subtype subB=subview(B, i, Kokkos::ALL(), Kokkos::ALL()); left_subtype subC=subview(C, i, Kokkos::ALL(), Kokkos::ALL()); //#pragma loop(ivdep) for (size_t i = 0; i < msize; i++) { for (size_t k = 0; k < ksize; k++) { const Scalar r = subA(i,k); for (size_t j = 0; j < nsize; j++) { subC(i,j)=beta*subC(i,j)+alpha*r*subB(k,j); } } } }
void Tree<ElementContainer>::TraversePacket(Context<size,flags> &c,const Selector<size> &selector) const { bool split=1; enum { reflected=!(flags&(isct::fPrimary|isct::fShadow)) }; bool selectorsFiltered=size<=(reflected?4:isComplex?64:16); if(!Selector<size>::full) for(int n=0;n<size/4;n++) if(selector.Mask4(n)!=0x0f0f0f0f) { selectorsFiltered=0; break; } if((Selector<size>::full||selectorsFiltered) && size <= (reflected?4 : isComplex? 64 : 16)) { const Vec3q &dir=c.Dir(0); bool signsFiltered=1; int msk=_mm_movemask_ps(_mm_shuffle_ps(_mm_shuffle_ps(dir.x.m,dir.y.m,0),dir.z.m,0+(2<<2)))&7; if(filterSigns) { for(int n=0;n<size;n++) if(GetVecSign(c.Dir(n))!=msk) { signsFiltered=0; break; } } if(signsFiltered) { bool primary = (flags & (isct::fPrimary|isct::fShadow)) && gVals[1]; if((flags & isct::fShadow) &&!isComplex) { floatq dot=1.0f; for(int q=1;q<size;q++) dot=Min(dot,c.Dir(0)|c.Dir(q)); if(ForAny(dot<0.9998f)) primary=0; } if(separateFirstElement) elements[0].Collide(c,0); if(primary) TraversePrimary(c); else TraversePacket0(c); // if(primary && (flags & isct::fShadow)) c.stats.Skip(); split=0; } } if(split) { for(int q=0;q<4;q++) { Context<size/4,flags> subC(c.Split(q)); if(flags & isct::fShadow) subC.shadowCache=c.shadowCache; TraversePacket(subC,selector.SubSelector(q)); if(flags & isct::fShadow) c.shadowCache=subC.shadowCache; } } }
Node TarjanHD::hd(const Digraph& g, const DoubleArcMap& w, SubDigraph& subG, NodeNodeMap& mapToOrgG, NodeNodeMap& G2T, const ArcList& sortedArcs, int i) { assert(i >= 0); int m = static_cast<int>(sortedArcs.size()); assert(m == lemon::countArcs(subG)); int r = m - i; if (r == 0 || r == 1) { // add to _T a subtree rooted at node r, // labeled with w(e_m) and having n children labeled with // the vertices of subG Node root = _T.addNode(); _label[root] = w[sortedArcs.back()]; _T2OrgG[root] = lemon::INVALID; for (SubNodeIt v(subG); v != lemon::INVALID; ++v) { Node vv = G2T[v]; if (vv == lemon::INVALID) { vv = _T.addNode(); _label[vv] = -1; if (mapToOrgG[v] != lemon::INVALID) { _orgG2T[mapToOrgG[v]] = vv; _T2OrgG[vv] = mapToOrgG[v]; } } _T.addArc(vv, root); } return root; } else { int j = (i + m) % 2 == 0 ? (i + m) / 2 : (i + m) / 2 + 1; // remove arcs j+1 .. m ArcListIt arcEndIt, arcIt = sortedArcs.begin(); for (int k = 1; k <= m; ++k) { if (k == j + 1) { arcEndIt = arcIt; } if (k > j) { subG.disable(*arcIt); } ++arcIt; } // compute SCCs IntNodeMap comp(g, -1); int numSCC = lemon::stronglyConnectedComponents(subG, comp); if (numSCC == 1) { ArcList newSortedArcs(sortedArcs.begin(), arcEndIt); // _subG is strongly connected return hd(g, w, subG, mapToOrgG, G2T, newSortedArcs, i); } else { // determine strongly connected components NodeHasher<Digraph> hasher(g); NodeSetVector components(numSCC, NodeSet(42, hasher)); for (SubNodeIt v(subG); v != lemon::INVALID; ++v) { components[comp[v]].insert(v); } NodeVector roots(numSCC, lemon::INVALID); double w_i = i > 0 ? w[getArcByRank(sortedArcs, i)] : -std::numeric_limits<double>::max(); for (int k = 0; k < numSCC; ++k) { const NodeSet& component = components[k]; if (component.size() > 1) { // construct new sorted arc list for component: O(m) time ArcList newSortedArcs; for (ArcListIt arcIt = sortedArcs.begin(); arcIt != arcEndIt; ++arcIt) { Node u = g.source(*arcIt); Node v = g.target(*arcIt); bool u_in_comp = component.find(u) != component.end(); bool v_in_comp = component.find(v) != component.end(); if (u_in_comp && v_in_comp) { newSortedArcs.push_back(*arcIt); } } // remove nodes not in component from the graph for (NodeIt v(g); v != lemon::INVALID; ++v) { subG.status(v, component.find(v) != component.end()); } // find new_i, i.e. largest k such that w(e'_k) <= w(e_i) // if i == 0 or i > 0 but no such k exists => new_i := 0 int new_i = get_i(newSortedArcs, w, w_i); // recurse on strongly connected component roots[k] = hd(g, w, subG, mapToOrgG, G2T, newSortedArcs, new_i); } // enable all nodes again for (int k = 0; k < numSCC; ++k) { const NodeSet& component = components[k]; for (NodeSetIt nodeIt = component.begin(); nodeIt != component.end(); ++nodeIt) { subG.enable(*nodeIt); } } } // construct the condensed graph: // each strongly connected component is collapsed into a single node, and // from the resulting sets of multiple arcs retain only those with minimum weight Digraph c; DoubleArcMap ww(c); NodeNodeMap mapCToOrgG(c); NodeNodeMap C2T(c); ArcList newSortedArcs; int new_i = constructCondensedGraph(g, w, mapToOrgG, G2T, sortedArcs, comp, components, roots, j, c, ww, mapCToOrgG, C2T, newSortedArcs); BoolArcMap newArcFilter(c, true); BoolNodeMap newNodeFilter(c, true); SubDigraph subC(c, newNodeFilter, newArcFilter); Node root = hd(c, ww, subC, mapCToOrgG, C2T, newSortedArcs, new_i); return root; } } return lemon::INVALID; }