Ejemplo n.º 1
0
// erase the first occurrence of a value,
// IF THE VALUE OCCURS
template<class T, uint LEN> inline
void maybe_erase(ShortVec<T,LEN> &vec, T val)
{
    uint N = vec.size();
    for(uint i=0; i<N; i++) {
        if(vec[i] == val) {
            std::swap(vec[i], vec[N-1]);
            vec.resize(N-1);
            break;
        }
    }
}
Ejemplo n.º 2
0
// erase the first occurrence of a value from a short vector,
// !!! KNOWING that it MUST BE PRESENT
template<class T, uint LEN> inline
void remove(ShortVec<T,LEN> &vec, T val)
{
    uint last_i = vec.size()-1;
    for(uint i=0; i<last_i; i++) {
        if(vec[i] == val) {
            std::swap(vec[i], vec[last_i]);
            break;
        }
    }
    vec.resize(last_i);
}
Ejemplo n.º 3
0
    inline static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
    {
        typedef LibFlatArray::short_vec<double, C> ShortVec;

        for (; hoodNew.index() < indexEnd; hoodNew += C, ++hoodOld) {
            ShortVec x = &hoodOld->x();
            ShortVec y = &hoodOld->y();
            ShortVec cReal = &hoodOld->cReal();
            ShortVec cImag = &hoodOld->cImag();

            for (int i = 0; i < ITERATIONS; ++i) {
                ShortVec cRealOld = cReal;
                cReal = cReal * cReal - cImag * cImag;
                cImag = ShortVec(2.0) * cImag * cRealOld;
            }

            for (const auto& j: hoodOld.weights(0)) {
                ShortVec weights;
                ShortVec otherX;
                ShortVec otherY;
                weights.load_aligned(j.second());
                otherX.gather(&hoodOld->x(), j.first());
                otherY.gather(&hoodOld->y(), j.first());
                cReal += otherX * weights;
                cImag += otherY * weights;
            }

            &hoodNew->x() << x;
            &hoodNew->y() << y;
            &hoodNew->cReal() << cReal;
            &hoodNew->cImag() << cImag;
        }
    }
Ejemplo n.º 4
0
 static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
 {
     for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) {
         ShortVec tmp;
         tmp.load_aligned(&hoodNew->sum() + i * C);
         for (const auto& j: hoodOld.weights(0)) {
             ShortVec weights;
             ShortVec values;
             weights.load_aligned(j.second());
             values.gather(&hoodOld->value(), j.first());
             tmp += values * weights;
         }
         tmp.store_aligned(&hoodNew->sum() + i * C);
     }
 }
Ejemplo n.º 5
0
 static void updateLineX(HOOD_NEW& hoodNew, int indexEnd, HOOD_OLD& hoodOld, unsigned /* nanoStep */)
 {
     for (int i = hoodOld.index(); i < indexEnd; ++i, ++hoodOld) {
         ShortVec tmp;
         tmp.load_aligned(&hoodNew->sum() + i * C);
         for (const auto& j: hoodOld.weights(0)) {
             ShortVec weights;
             ShortVec values;
             weights.load_aligned(j.second());
             // fixme: is this gahter actually correct? shouldn't we use offset 0 for the gather? see also hpxperformancetests/main.cpp
             values.gather(&hoodOld->value(), j.first());
             tmp += values * weights;
         }
         tmp.store_aligned(&hoodNew->sum() + i * C);
     }
 }
Ejemplo n.º 6
0
void Mesh<VertData,TriData>::edgeSplit(
    RemeshScratchpad &scratchpad,
    Eptr e_split
) {
    /*
        Clean-up picture please...
            
     *                     *  vs_opp
     *                   /   \
     *                 /       \
     *       e0s     /           \     e1s  
     *             /               \             
     *           /      ts_orig      \           
     *         /                       \         
     *    v0 *---------------------------* v1  
     *                  e_split                    
     *                     _
     *                     |
     *                     v
     *
     *                     *   vs_opp
     *                   / | \
     *                 /   |   \
     *        e0s    /     | <----------- es_mid
     *             /       |       \
     *           /         |         \  
     *         /  t0s_new  |  t1s_new  \
     *    v0 *-------------*-------------* v1
     *           e0_new  v_new  e1_new
     *
        
        BEGIN
            new vertex
                -- invoke interpolation callback
        FOR EACH TRIANGLE SUB-PROBLEM:
            new next triangle (from split)
            new prev triangle (from split)
                -- invoke triangle split callback
            delete triangle
            FIXUP all of the Tri-Edges
    */
    
    /* Identify all the following components
     * and separate them according to type:
     *  -   e_split:        the edge to be split
     *  -   ts_orig:        the triangles to be split
     *  -   v0, v1:         the two endpoint vertices of the edge
     *  (the following is helper data; do not delete these vertices)
     *  -   vs_opp:         the vertices opposite eid_split for each triangle
     */
    
    ShortVec<Tptr, 2>       ts_orig             = e_split->tris;
    
    Vptr                    v0                  = e_split->verts[0];
    Vptr                    v1                  = e_split->verts[1];
    
    ShortVec<Vptr, 2>       vs_opp(ts_orig.size());
    for(uint i=0; i<ts_orig.size(); i++) {
        Tptr                t_orig              = ts_orig[i];
        for(uint k=0; k<3; k++) {
            if(t_orig->edges[k] == e_split) {
                            vs_opp[i]           = t_orig->verts[k];
                            break;
            }
        }
    }
    
    /* Next, we need to create the new geometry which will supplant
     * the just enumerated parts:
     *  -   v_new:          the vertex introduced by the split
     *  -   e0_new,
     *      e1_new:         the two pieces of e_split
     *  -   t0s_new,
     *      t1s_new:        the two pieces of each triangle in tids_orig
     *  -   es_mid:         the new edges splitting each triangle
     */
    Vptr                    v_new       = scratchpad.cache.newVert();
    Eptr                    e0_new      = allocateRemeshEdge(scratchpad);
    Eptr                    e1_new      = allocateRemeshEdge(scratchpad);
    ShortVec<Tptr, 2>       t0s_new(ts_orig.size());
    ShortVec<Tptr, 2>       t1s_new(ts_orig.size());
    ShortVec<Eptr, 2>       es_mid(ts_orig.size());
    
    for(uint i=0; i<ts_orig.size(); i++) {
                            t0s_new[i]  = scratchpad.cache.newTri();
                            t1s_new[i]  = scratchpad.cache.newTri();
                            es_mid[i]   = allocateRemeshEdge(scratchpad);
    }
    
    /* Now we want to connect up all of this new geometry to each other
     * and to the existing geometry,
     *  BUT!!!
     * we also want to be careful not to point any existing geometry
     * at the new pieces.  Before doing that, we want to be able to
     * compute data for the new geometry and confirm or deny that we
     * want to commit this operation. (e.g. check for unwanted collisions)
     *
     * We adopt the following strategy:
     *  -   first point the new triangles at edges and vertices
     *  -   next, point the new edges at vertices; and 
     *          point the new edges at new triangles.
     *  -   finally, point the new vertex at
     *  -       the new edges and new triangles
     */
    
    // hook up t0s_new and t1s_new
    // also go ahead and hook up es_mid
    for(uint i=0; i<ts_orig.size(); i++) {
        Tptr                t_orig              = ts_orig[i];
        Tptr                t0                  = t0s_new[i];
        Tptr                t1                  = t1s_new[i];
        Eptr                e_mid               = es_mid[i];
        
        // replace every edge and vertex appropriately for the two variants
        for(uint k=0; k<3; k++) {
            Vptr            v_orig              = t_orig->verts[k];
            Eptr            e_orig              = t_orig->edges[k];
            if(v_orig == v0) {
                            t0->verts[k]        = v_orig;
                            t0->edges[k]        = e_mid;
                            t1->verts[k]        = v_new;
                            t1->edges[k]        = e_orig;
            } else if(v_orig == v1) {
                            t0->verts[k]        = v_new;
                            t0->edges[k]        = e_orig;
                            t1->verts[k]        = v_orig;
                            t1->edges[k]        = e_mid;
            } else {
                            t0->verts[k]        = v_orig;
                            t0->edges[k]        = e0_new;
                            t1->verts[k]        = v_orig;
                            t1->edges[k]        = e1_new;
            }
        }
                            populateTriFromTopoTri(t0);
                            populateTriFromTopoTri(t1);
        // set up the mid edge from the split
                            e_mid->verts[0]     = v_new;
                            e_mid->verts[1]     = vs_opp[i];
                            e_mid->tris.resize(2);
                            e_mid->tris[0]      = t0;
                            e_mid->tris[1]      = t1;
    }
    // hook up e0_new and e1_new
                            e0_new->verts[0]    = v0;
                            e0_new->verts[1]    = v_new;
                            e1_new->verts[0]    = v_new;
                            e1_new->verts[1]    = v1;
    for(uint i=0; i<ts_orig.size(); i++) {
                            e0_new->tris.push_back(t0s_new[i]);
                            e1_new->tris.push_back(t1s_new[i]);
    }
    // hook up v_new
                            v_new->edges.push_back(e0_new);
                            v_new->edges.push_back(e1_new);
    for(uint i=0; i<ts_orig.size(); i++) {
                            v_new->edges.push_back(es_mid[i]);
                            v_new->tris.push_back(t0s_new[i]);
                            v_new->tris.push_back(t1s_new[i]);
    }
    
    
    // OK, here we get to finally compute data for all the new geometry
    // Once we've done that, we can also check to see whether we actually
    // want to commit this operation or not.
    
    // interpolate data onto the new vertex
    {
        VertData            &data_new           = verts[v_new->ref];
        const VertData      &data0              = verts[v0->ref];
        const VertData      &data1              = verts[v1->ref];
                            data_new.interpolate(data0, data1);
                            data_new.manifold   = (ts_orig.size() == 2);
    }
    
    // split triangles' data
    for(uint i=0; i<ts_orig.size(); i++) {
        Tptr                t_orig              = ts_orig[i];
        
        Tptr                t0                  = t0s_new[i];
        Tptr                t1                  = t1s_new[i];
                            
                            split_tris(t0->ref, t1->ref, t_orig->ref);
    }
    
    // TODO: COMMIT OPTION will be left as a stub for now!
    if(false) {
        // DESTROY THE GEOMETRY WE JUST CREATED AND RETURN
    }
    
    // record border edges for later priority updates
    ShortVec<Eptr, 4>       borderEdges;
    for(Tptr t : ts_orig) { // TODO: evacuate all of this to the end...
        for(uint k=0; k<3; k++) {
            if(t->edges[k] == e_split)  continue;
                            borderEdges.push_back(t->edges[k]);
        }
    }
    
    /* Now that we've got the go ahead, let's hook in the new geometry to
     *  the existing geometry!
     * We can do this in the following order:
     *  -   take all the new edges, and add them to their existing endpoint's
     *      edge list.  (all new edges must have exactly one such endpoint)
     *  -   take all the new triangles, and add them to their two existing
     *      endpoints' and one existing edge's triangle lists.
     */
    
    // add new edges to v0 and v1
                            v0->edges.push_back(e0_new);
                            v1->edges.push_back(e1_new);
    
    // now, let's tackle the other edges and triangles in tandem...
    for(uint i=0; i<ts_orig.size(); i++) {
        Tptr                t_orig              = ts_orig[i];
        Vptr                v_opp               = vs_opp[i];
        
        // add mid edge and two tris to v_opp
                            v_opp->edges.push_back(es_mid[i]);
                            v_opp->tris.push_back(t0s_new[i]);
                            v_opp->tris.push_back(t1s_new[i]);
        // add resp. tris to v0 and v1
                            v0->tris.push_back(t0s_new[i]);
                            v1->tris.push_back(t1s_new[i]);
        // find the two non-split edges and add resp. tris
        for(uint k=0; k<3; k++) {
            if(t_orig->verts[k] == v0) {
                Eptr        e1                  = t_orig->edges[k];
                            e1->tris.push_back(t1s_new[i]);
            } else if(t_orig->verts[k] == v1) {
                Eptr        e0                  = t_orig->edges[k];
                            e0->tris.push_back(t0s_new[i]);
            }
        }
    }
    
    /* Now, let's kill all the old geometry.  This consists of:
     *  -   e_split:        the edge to be split
     *  -   ts_orig:        the triangles to be split
     *
     * Luckily, in triangle splitting we know exactly which things
     * must be deleted.  A split cannot make any geometry newly singular.
     */
    
    // kill triangles
    for(Tptr t : ts_orig) {
        // First, unhook this triangle from its faces
        for(uint k=0; k<3; k++) {
            Vptr            v               = t->verts[k];
                            remove(v->tris, t);
            
            Eptr            e               = t->edges[k];
                            remove(e->tris, t);
        }
        // now that we're disconnected, jettison the triangle
                            scratchpad.cache.freeTri(t);
    }
    
    // now, kill the edge that we split
                            remove(v0->edges, e_split);
                            remove(v1->edges, e_split);
                            deallocateRemeshEdge(scratchpad, e_split);
    
    
    // recompute edge scores for all edges whose scores might be effected
    // Don't need to dequeue newly created edges...
                            scoreAndEnqueue(scratchpad.queue, e0_new);
                            scoreAndEnqueue(scratchpad.queue, e1_new);
    for(Eptr e : es_mid) {
                            scoreAndEnqueue(scratchpad.queue, e);
    }
    for(Eptr e : borderEdges) {
                            dequeue(scratchpad.queue, e);
                            scoreAndEnqueue(scratchpad.queue, e);
    }
}
Ejemplo n.º 7
0
void Mesh<VertData, TriData>::edgeCollapse(
    RemeshScratchpad &scratchpad,
    Eptr e_collapse,
    bool collapsing_tetrahedra_disappear
) {
    /*
     *
     *  Two cases: both with and without a triangle filling the
     *      triangular arrangement (i.e. wedge) of edges
     *
     *                     *   
     *                   /   \
     *                 / # # # \
     *               /  # # # #  \  
     *             / # # # # # # # \
     *           /  # # # # # # # #  \
     *         / # # # # # # # # # # # \
     *  vid0 *---------------------------* vid1
     *                eid_collapse
     *
     *                     *   
     *                   /   \
     *                 /       \
     *               /           \  
     *             /               \
     *           /                   \
     *         /                       \
     *  vid0 *---------------------------* vid1
     *                eid_collapse
     *
     *
     *  Additionally, we need to worry about how tetrahedral structures
     *  can collapse.  If the collapse will identify the two
     *  non-collapsing faces, then we call this structure a triangle wedge
     *
     *
     *                     * 
     *                    /|\
     *                   / | \
     *                  /  |  \
     *                 / # | # \
     *                / #  |  # \
     *               /   # | # # \
     *              / # #  *  # # \
     *             /  #  /   \  #  \
     *            / #  /       \  # \
     *           /   /           \   \ 
     *          /  /               \  \
     *         / /                   \ \
     *        //                       \\
     *  vid0 *---------------------------* vid1
     *                 eid_collapse
     *
     *
     *
        In summary, there are three features we should be interested in:
        --  hollow edge wedges that will be collapsed
        --  filled edge wedges that will be collapsed
        --  triangle wedges that will be collapsed
        
        We can identify all edge wedges (hollow or filled) via:
            JOIN FILTER(EDGES(A,X), X!=B) WITH
                 FILTER(EDGES(B,Y), Y!=A)
                 WHERE X=Y
        We can identify all triangle wedges via:
            JOIN FILTER(TRIANGLES(A,X,Y), X!=B && Y!=B) WITH
                 FILTER(TRIANGLES(B,Z,W), Z!=A && W!=A)
                 WHERE (X,Y)=(Z,W) or (X,Y)=(W,Z)
     */
    
    /* Identify all the following components
     * and separate them according to type:
     *  -   e_collapse:     the collapsing edge
     *  -   tris_collapse:  the collapsing triangles
     *  -   v0, v1:         the merging vertices
     *  -   edge_wedges:    the merging edges
     *  -   tri_wedges:     the merging (or disappearing) triangles
     *  -   edges_moving:   the persisting, but moving edges
     *  -   tris_moving:    the persisting, but moving triangles
     */
    ShortVec<Tptr, 2>       tris_collapse   = e_collapse->tris;
    
    Vptr                    v0              = e_collapse->verts[0];
    Vptr                    v1              = e_collapse->verts[1];
    
    ShortVec<EdgeWedge, 2>  edge_wedges;
    ShortVec<Eptr, 10>      edges_moving;
    
    ShortVec<TriWedge, 2>   tri_wedges;
    ShortVec<Tptr, 14>      tris_moving;
    
    // BUILD all the edge wedges
    std::map<Vptr, EdgeWedge>   edge_w_map;
    for(Eptr e : v0->edges) {
        if(e == e_collapse) continue;
        Vptr                ev0                 = e->verts[0];
        Vptr                ev1                 = e->verts[1];
        Vptr                key                 = (ev0 != v0)? ev0 : ev1;
                            edge_w_map[key].e0  = e;
    }
    for(Eptr e : v1->edges) {
        if(e == e_collapse) continue;
        Vptr                ev0                 = e->verts[0];
        Vptr                ev1                 = e->verts[1];
        Vptr                key                 = (ev0 != v1)? ev0 : ev1;
                            edge_w_map[key].e1  = e;
    }
    for(const auto &pair : edge_w_map) {
        if(pair.second.full())
                            edge_wedges.push_back(pair.second);
        else
                            edges_moving.push_back(pair.second.one());
    }
    
    // BUILD all the triangle wedges
    std::map<Eptr, TriWedge>    tri_w_map;
    for(Tptr t : v0->tris) {
        if(t->edges[0] == e_collapse ||
           t->edges[1] == e_collapse ||
           t->edges[2] == e_collapse)   continue;
        
        Eptr                key                 = nullptr;
        for(uint k=0; k<3; k++)
            if(t->verts[k] == v0)
                            key                 = t->edges[k];
        ENSURE(key != NULL);
                            tri_w_map[key].t0   = t;
    }
    for(Tptr t : v1->tris) {
        if(t->edges[0] == e_collapse ||
           t->edges[1] == e_collapse ||
           t->edges[2] == e_collapse)   continue;
        
        Eptr                key                 = nullptr;
        for(uint k=0; k<3; k++)
            if(t->verts[k] == v1)
                            key                 = t->edges[k];
        ENSURE(key != NULL);
                            tri_w_map[key].t1   = t;
    }
    for(const auto &pair : tri_w_map) {
        if(pair.second.full())
                            tri_wedges.push_back(pair.second);
        else
                            tris_moving.push_back(pair.second.one());
    }
    
    /* Next, we need to create the new geometry which will supplant
     * the just enumerated parts:
     *  -   v_merged:       the merged vertex
     *  -   edges_merged:   the merged edges (parallel to edge_wedges)
     *  -   tris_merged:    the merged triangles (parallel to tri_wedges)
     *  -   edges_moved:    the moved edges (parallel to edges_moving)
     *  -   tris_moved:     the moved triangles (parallel to tris_moving)
     */
    /* As part of this, we will build a record of the remapping to
     * aid us when we start to connect all of this geometry
     */
    
    Vptr                    v_merged            = scratchpad.cache.newVert();
    ShortVec<Eptr, 2>       edges_merged(edge_wedges.size());
    ShortVec<Eptr, 10>      edges_moved(edges_moving.size());
    ShortVec<Tptr, 2>       tris_merged(tri_wedges.size());
    ShortVec<Tptr, 14>      tris_moved(tris_moving.size());
    
    VptrRemap               vptr_remap(v0, v1, v_merged);
    PtrRemap<TopoEdge>    eptr_remap;
    PtrRemap<TopoTri>     tptr_remap;
    
    // Create new edges and enter re-mappings
    eptr_remap.set(e_collapse, nullptr); // mark this edge as dying
    for(uint i=0; i<edge_wedges.size(); i++) {
                            edges_merged[i] = allocateRemeshEdge(scratchpad);
                            eptr_remap.set(edge_wedges[i].e0, edges_merged[i]);
                            eptr_remap.set(edge_wedges[i].e1, edges_merged[i]);
    }
    for(uint i=0; i<edges_moving.size(); i++) {
                            edges_moved[i] = allocateRemeshEdge(scratchpad);
                            eptr_remap.set(edges_moving[i], edges_moved[i]);
    }
    
    // Create new triangles and enter re-mappings
    for(Tptr t : tris_collapse)
                            // mark this triangle as dying
                            tptr_remap.set(t, nullptr); 
    for(uint i=0; i<tri_wedges.size(); i++) {
        if(collapsing_tetrahedra_disappear) {
                            tris_merged[i]      = nullptr;
        } else {
                            tris_merged[i]      = scratchpad.cache.newTri();
        }
                            tptr_remap.set(tri_wedges[i].t0, tris_merged[i]);
                            tptr_remap.set(tri_wedges[i].t1, tris_merged[i]);
    }
    for(uint i=0; i<tris_moving.size(); i++) {
                            tris_moved[i]       = scratchpad.cache.newTri();
                            tptr_remap.set(tris_moving[i], tris_moved[i]);
    }
    
    /* Now we want to connect up all of this new geometry to each other
     * and to the existing geometry,
     *  BUT!!!
     * we also want to be careful not to point any existing geometry
     * at the new pieces.  Before doing that, we want to be able to
     * compute data for the new geometry and confirm or deny that we
     * want to commit this operation. (e.g. check for unwanted collisions)
     *
     * We adopt the following strategy:
     *  -   first point the new triangles at edges and vertices
     *  -   next, point the new edges at vertices; and 
     *          point the new edges at new triangles.
     *  -   finally, point the new vertex at
     *  -       the new edges and new triangles
     *
     * If an edge cannot find any valid triangles it is incident to,
     *  then it must be marked for deletion, etc.
     * If the merged vertex cannot find any valid tris/edges it is incident to,
     *  then it must be marked for deletion too!
     */
    
    // First, the triangles
    if(!collapsing_tetrahedra_disappear) {
        for(uint i=0; i<tri_wedges.size(); i++) {
            Tptr            t0                  = tri_wedges[i].t0;
            //Tptr            t1                  = tri_wedges[i].t1;
            Tptr            t_new               = tris_merged[i];
            
            for(uint k=0; k<3; k++) {
                            t_new->verts[k]     = vptr_remap[t0->verts[k]];
                            t_new->edges[k]     = eptr_remap[t0->edges[k]];
            }
                            populateTriFromTopoTri(t_new);
        }
    }
    for(uint i=0; i<tris_moving.size(); i++) {
        Tptr                t_old               = tris_moving[i];
        Tptr                t_new               = tris_moved[i];
        
        for(uint k=0; k<3; k++) {
                            t_new->verts[k]     = vptr_remap[t_old->verts[k]];
                            t_new->edges[k]     = eptr_remap[t_old->edges[k]];
        }
                            populateTriFromTopoTri(t_new);
    }
    
    // Next, the edges
    for(uint i=0; i<edge_wedges.size(); i++) {
        Eptr                e0                  = edge_wedges[i].e0;
        Eptr                e1                  = edge_wedges[i].e1;
        Eptr                e_new               = edges_merged[i];
        
        // plug in all the valid triangles...
        for(Tptr t : e0->tris) {
                            t                   = tptr_remap[t];
            if(t)           e_new->tris.push_back(t);
        }
        for(Tptr t : e1->tris) {
                            t                   = tptr_remap[t];
            if(t)           e_new->tris.push_back(t);
        }
        
        if(e_new->tris.size() == 0) { // if there are no parent triangles left
            // then we need to kill this edge
                            eptr_remap.set(e0, nullptr);
                            eptr_remap.set(e1, nullptr);
                            deallocateRemeshEdge(scratchpad, e_new);
                            edges_merged[i]     = nullptr;
        }
        else { // otherwise, let's go ahead and finish hooking up this edge
            for(uint k=0; k<2; k++)
                            e_new->verts[k]     = vptr_remap[e0->verts[k]];
        }
    }
    for(uint i=0; i<edges_moving.size(); i++) {
        Eptr                e_old               = edges_moving[i];
        Eptr                e_new               = edges_moved[i];
        
        // note: should never have any dead/null triangles
        for(Tptr t : e_old->tris) {
                            t                   = tptr_remap[t];
            ENSURE(t);
                            e_new->tris.push_back(t);
        }
        for(uint k=0; k<2; k++)
                            e_new->verts[k]     = vptr_remap[e_old->verts[k]];
    }
    
    // Finally, the vertex
    {
        // Should do this directly, not via the remap translation.
        // Working via re-maps will lead to duplicates of merged geometry.
        // However, we can exploit the fact that this vertex is unique,
        //    and that we already have lists of all the incident geometry
        
        for(Tptr t : tris_merged)
            if(t)           v_merged->tris.push_back(t);
        for(Tptr t : tris_moved) // cannot be dead
                            v_merged->tris.push_back(t);
        if(v_merged->tris.size() == 0) {
                            scratchpad.cache.freeVert(v_merged);
                            v_merged            = nullptr;
        } else {
            for(Eptr e : edges_merged)
                if(e)           v_merged->edges.push_back(e);
            for(Eptr e : edges_moved) // cannot be dead
                                v_merged->edges.push_back(e);
            // it's impossible to have triangles incident w/o edges too.
            ENSURE(v_merged->edges.size() > 0);
        }
    }
    
    
    // OK, here we get to finally compute data for all the new geometry
    // Once we've done that, we can also check to see whether we actually
    // want to commit this operation or not.
    
    // merge vertices' data
    if(v_merged) { // REMEMBER: vertex could be deleted by now
        VertData            &data_new           = verts[v_merged->ref];
        const VertData      &data0              = verts[v0->ref];
        const VertData      &data1              = verts[v1->ref];
                            data_new.merge(data0, data1);
    }
    // merge triangles' data
    for(uint i=0; i<tri_wedges.size(); i++) {
        Tptr                t_new               = tris_merged[i];
        // REMEMBER: these triangles could be deleted
        if(!t_new)          continue;
        
        Tptr                t0                  = tri_wedges[i].t0;
        Tptr                t1                  = tri_wedges[i].t1;
        
                            merge_tris(t_new->ref, t0->ref, t1->ref);
    }
    // update moved triangles' data
    for(uint i=0; i<tris_moving.size(); i++) {
        // NOTE: moved triangles cannot be deleted
        Tptr                t_new               = tris_moved[i];
        Tri                 &tri_new            = tris[t_new->ref];
        
        Tptr                t_old               = tris_moving[i];
        Tri                 &tri_old            = tris[t_old->ref];
        
                            move_tri(tri_new, tri_old);
    }
    
    // TODO: COMMIT OPTION will be left as a stub for now!
    if(false) {
        // DESTROY THE GEOMETRY WE JUST CREATED
        // AND RETURN
    }
    
    // Find and Store all of the existing, unchanged edges
    // that border this operation.  We will need these references
    // when we account for changes to edge operation priorities later
    ShortVec<Eptr, 16>      borderEdges;
    for(const TriWedge &e_wedge : tri_wedges) {
        Tptr                t0                  = e_wedge.t0; 
        for(uint k=0; k<3; k++) {
            if(t0->verts[k] == v0) {
                            borderEdges.push_back(t0->edges[k]);
                            break;
            }
        }
    }
    for(Tptr t : tris_moved) {
        for(uint k=0; k<3; k++) {
            if(t->verts[k] == v_merged) {
                            borderEdges.push_back(t->edges[k]);
                            break;
            }
        }
    }
    
    /* Now that we've got the go ahead, let's hook in the new geometry to
     *  the existing geometry!
     * We can do this in the following order:
     *  -   take all the new edges, and add them to their existing endpoint's
     *      edge list.  (all new edges must have exactly one such endpoint)
     *  -   take all the new triangles, and add them to their two existing
     *      endpoints' and one existing edge's triangle lists.
     */
    // first, consolidate arrays
    ShortVec<Eptr, 12>      new_edges;
    ShortVec<Tptr, 16>      new_tris;
    {
        for(Eptr e : edges_merged)
            if(e)           new_edges.push_back(e);
        for(Eptr e : edges_moved) // cannot be deleted
                            new_edges.push_back(e);
        for(Tptr t : tris_merged)
            if(t)           new_tris.push_back(t);
        for(Tptr t : tris_moved) // cannot be deleted
                            new_tris.push_back(t);
    }
    // hook up edges to existing geometry
    for(Eptr edge : new_edges) {
        Vptr                ev0                 = edge->verts[0];
        Vptr                ev1                 = edge->verts[1];
        Vptr                v_old               = (ev0 != v_merged)? ev0 : ev1;
                            v_old->edges.push_back(edge);
    }
    // hook up triangles to existing geometry
    for(Tptr tri : new_tris) {
        for(uint k=0; k<3; k++) {
            if(tri->verts[k] != v_merged)       continue;
            Eptr            e                   = tri->edges[k];
            Vptr            tv0                 = e->verts[0];
            Vptr            tv1                 = e->verts[1];
                            e->tris.push_back(tri);
                            tv0->tris.push_back(tri);
                            tv1->tris.push_back(tri);
                            break;
        }
    }
    
    /* Now, let's kill all the old geometry.  We can use the checklist
     * we built at the begining:
     *  -   e_collapse:     the collapsing edge
     *  -   tris_collapse:  the collapsing triangles
     *  -   v0, v1:         the merging vertices
     *  -   edge_wedges:    the merging edges
     *  -   tri_wedges:     the merging (or disappearing) triangles
     *  -   edges_moving:   the persisting, but moving edges
     *  -   tris_moving:    the persisting, but moving triangles
     *
     * We need to be careful to free this geometry in top down order;
     * starting with the triangles and moving towards the vertices.
     * If we furthermore guarantee that any singular edges or vertices
     * created by a triangle deletion are also deleted, then we can
     * focus all of our attention on just deleting triangles 
     */
    ShortVec<Tptr, 16>      dead_tris;
    ShortVec<Eptr, 16>      dead_edges;
    ShortVec<Vptr, 2>       dead_verts;
    
    // assemble the list of triangles to kill
    for(Tptr t : tris_collapse)
                            dead_tris.push_back(t);
    for(const auto &t_wedge : tri_wedges) {
                            dead_tris.push_back(t_wedge.t0);
                            dead_tris.push_back(t_wedge.t1);
    }
    for(Tptr t : tris_moving)
                            dead_tris.push_back(t);
    
    // process the list of triangles
    for(Tptr tri : dead_tris) {
        // Let's unhook this triangle from its faces first
        for(uint k=0; k<3; k++) {
            Vptr            v                   = tri->verts[k];
                            remove(v->tris, tri);
            if(v->tris.size() == 0)
                            dead_verts.push_back(v);
            
            Eptr            e                   = tri->edges[k];
                            remove(e->tris, tri);
            if(e->tris.size() == 0)
                            dead_edges.push_back(e);
        }
        // now that we're disconnected, go ahead and jettison the triangle
                            scratchpad.cache.freeTri(tri);
    }
    
    // now, we can process the list of edges
    for(Eptr edge : dead_edges) {
        // Let's unhook this edge from its vertices
        for(uint k=0; k<2; k++) {
            Vptr            v                   = edge->verts[k];
                            remove(v->edges, edge);
            // the triangle removal was enough to
            // determine which vertices should die.
            // re-adding them here would lead to duplicates
        }
        // and then jetisson the edge
                            deallocateRemeshEdge(scratchpad, edge);
        // If this edge is in the border edge list,
        // then we need to remove it right away!
                            maybe_erase(borderEdges, edge);
    }
    
    // Finally, polish off by getting rid of any vertices that talked too much
    for(Vptr vert : dead_verts) {
                            scratchpad.cache.freeVert(vert);
                            if(vert == v_merged)    v_merged = nullptr;
    }
    
    // We pause a moment here to update the manifoldness of any
    // vertices for which it might have changed
    // ONLY do if the merged vertex is still alive...
    if(v_merged) {
        verts[v_merged->ref].manifold = true;
        for(Eptr e : v_merged->edges) {
            if(e->tris.size() != 2)
                verts[v_merged->ref].manifold = false;
            
            // process neighboring point
            Vptr v = e->verts[0];
            if(v == v_merged)   v = e->verts[1];
            verts[v->ref].manifold = true;
            for(Eptr ee : v->edges) {
                if(ee->tris.size() != 2) {
                    verts[v->ref].manifold = false;
                    break;
                }
            }
        }
    }
    
    // Before we're completely done, we will go through and
    // adjust priorities for edges which might have been effected by this op.
    // Only explicitly dequeue pre-existing edges we did not delete!
    for(Eptr e : edges_merged) { if(e) { // might be deleted
                            scoreAndEnqueue(scratchpad.queue, e);
    }}
    for(Eptr e : edges_moved) { // def. not deleted
                            scoreAndEnqueue(scratchpad.queue, e);
    }
    for(Eptr e : borderEdges) {
            // border edges could have been deleted...
                            dequeue(scratchpad.queue, e);
                            scoreAndEnqueue(scratchpad.queue, e);
    }
    
    // that should more or less complete an edge collapse
}
Ejemplo n.º 8
0
void testImplementationInt()
{
    typedef short_vec<CARGO, ARITY> ShortVec;
    const int numElements = ShortVec::ARITY * 10;

    std::vector<CARGO> vec1(numElements);
    std::vector<CARGO> vec2(numElements, 4711);

    // init vec1:
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = i;
    }

    // test default c-tor:
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST(4711 == vec2[i]);
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST(0 == vec2[i]);
    }

    // tests vector load/store:
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(i, vec2[i]);
    }

    // tests scalar load, vector add:
    ShortVec w = vec1[1];

    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << (v + w);
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ((i + 1), vec2[i]);
    }

    // tests +=
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v += w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ((2 * i + 1), vec2[i]);
    }

    // test -
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v - w);
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ((-i - 1), vec2[i]);
    }

    // test -=
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v -= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ((2 * i + 1), vec2[i]);
    }

    // test *
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v * w);
    }
    for (int i = 0; i < numElements; ++i) {
        int reference = (i * (2 * i + 1));
        BOOST_TEST_EQ(reference, vec2[i]);
    }

    // test *=
    for (int i = 0; i < numElements; ++i) {
        vec2[i] = i + 2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v *= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(i * (i + 2), vec2[i]);
    }

    // test /
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = 4 * (i + 1);
        vec2[i] = (i + 1);
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v / w);
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(4, vec2[i]);
    }

    // test /=
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = 4 * (i + 1);
        vec2[i] = (i + 1);
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v /= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(4, vec2[i]);
    }

    // test sqrt()
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = i * i;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << sqrt(v);
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(i, vec2[i]);
    }

    // test "/ sqrt()"
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = (i + 1) * (i + 1);
        vec2[i] = (i + 1) * 2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << w / sqrt(v);
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST_EQ(2, vec2[i]);
    }

    // test string conversion
    for (int i = 0; i < ShortVec::ARITY; ++i) {
        vec1[i] = i + 5;
    }
    ShortVec v(&vec1[0]);
    std::ostringstream buf1;
    buf1 << v;

    std::ostringstream buf2;
    buf2 << "[";
    for (int i = 0; i < (ShortVec::ARITY - 1); ++i) {
        buf2 << (i + 5) << ", ";
    }
    buf2 << (ShortVec::ARITY - 1 + 5) << "]";

    BOOST_TEST(buf1.str() == buf2.str());

    // test gather
    {
        CARGO array[ARITY * 10];
        std::vector<int, aligned_allocator<int, 64> > indices(ARITY);
        CARGO actual[ARITY];
        CARGO expected[ARITY];
        std::memset(array, '\0', sizeof(CARGO) * ARITY * 10);

        for (int i = 0; i < ARITY * 10; ++i) {
            if (i % 10 == 0) {
                array[i] = i + 5;
            }
        }

        for (int i = 0; i < ARITY; ++i) {
            indices[i] = i * 10;
            expected[i] = (i * 10) + 5;
        }

        ShortVec vec;
        vec.gather(array, &indices[0]);
        actual << vec;

        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(actual[i], expected[i]);
        }
    }

#ifdef LIBFLATARRAY_WITH_CPP14
    // test gather via initializer_list
    {
        CARGO actual1[ARITY];
        CARGO actual2[ARITY];
        CARGO expected[ARITY];
        for (int i = 0; i < ARITY; ++i) {
            expected[i] = (i * 10) + 5;
        }

        // max: 32
        ShortVec vec1 = { 5, 15, 25, 35, 45, 55, 65, 75,
                          85, 95, 105, 115, 125, 135, 145, 155,
                          165, 175, 185, 195, 205, 215, 225, 235,
                          245, 255, 265, 275, 285, 295, 305, 315 };
        ShortVec vec2;
        vec2 = { 5, 15, 25, 35, 45, 55, 65, 75,
                 85, 95, 105, 115, 125, 135, 145, 155,
                 165, 175, 185, 195, 205, 215, 225, 235,
                 245, 255, 265, 275, 285, 295, 305, 315 };
        actual1 << vec1;
        actual2 << vec2;
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(actual1[i], expected[i]);
            BOOST_TEST_EQ(actual2[i], expected[i]);
        }
    }
#endif

    // test scatter
    {
        ShortVec vec;
        CARGO array[ARITY * 10];
        CARGO expected[ARITY * 10];
        std::vector<int, aligned_allocator<int, 64> > indices(ARITY);
        std::memset(array,    '\0', sizeof(CARGO) * ARITY * 10);
        std::memset(expected, '\0', sizeof(CARGO) * ARITY * 10);
        for (int i = 0; i < ARITY * 10; ++i) {
            if (i % 10 == 0) {
                expected[i] = i + 5;
            }
        }
        for (int i = 0; i < ARITY; ++i) {
            indices[i] = i * 10;
        }

        vec.gather(expected, &indices[0]);
        vec.scatter(array, &indices[0]);
        for (int i = 0; i < ARITY * 10; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }
    }

    // test non temporal stores
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = 5;
        }
        ShortVec v1 = 5;
        v1.store_nt(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = i;
        }
        ShortVec v2 = &expected[0];
        v2.store_nt(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }
    }

    // test aligned stores
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = 5;
        }
        ShortVec v1 = 5;
        v1.store_aligned(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = i;
        }
        ShortVec v2 = &expected[0];
        v2.store_aligned(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }
    }

    // test aligned loads
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            array[i]    = i;
            expected[i] = 0;
        }
        ShortVec v1;
        v1.load_aligned(&array[0]);
        v1.store(&expected[0]);
        for (int i = 0; i < ARITY; ++i) {
            BOOST_TEST_EQ(array[i], expected[i]);
        }
    }
}
Ejemplo n.º 9
0
void testImplementationReal()
{
    typedef short_vec<CARGO, ARITY> ShortVec;
    int numElements = ShortVec::ARITY * 10;

    std::vector<CARGO> vec1(numElements);
    std::vector<CARGO> vec2(numElements, 4711);

    // init vec1:
    for (int i = 0; i < numElements; ++i) {
        vec1[i] = i + 0.1;
    }

    // test default c-tor:
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST(4711 == vec2[i]);
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        BOOST_TEST(0 == vec2[i]);
    }

    // tests vector load/store:
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((i + 0.1), vec2[i]);
    }

    // tests scalar load, vector add:
    ShortVec w = vec1[0];

    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << (v + w);
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((i + 0.2), vec2[i]);
    }

    // tests +=
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v += w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((2 * i + 0.3), vec2[i]);
    }

    // test -
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v - w);
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((-i - 0.2), vec2[i]);
    }

    // test -=
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v -= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((2 * i + 0.3), vec2[i]);
    }

    // test *
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v * w);
    }
    for (int i = 0; i < numElements; ++i) {
        double reference = ((i + 0.1) * (2 * i + 0.3));
        TEST_REAL(reference, vec2[i]);
    }

    // test *=
    for (int i = 0; i < numElements; ++i) {
        vec2[i] = i + 0.2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v *= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        TEST_REAL((i + 0.1) * (i + 0.2), vec2[i]);
    }

    // test /
    for (int i = 0; i < numElements; ++i) {
        vec2[i] = i + 0.2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << (v / w);
    }
    for (int i = 0; i < numElements; ++i) {
        // accept lower accuracy for estimated division, really low
        // accuracy accepted because of results from ARM NEON:
        TEST_REAL_ACCURACY((i + 0.1) / (i + 0.2), vec2[i], 0.0025);
    }

    // test /=
    for (int i = 0; i < numElements; ++i) {
        vec2[i] = i + 0.2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        v /= w;
        &vec2[i] << v;
    }
    for (int i = 0; i < numElements; ++i) {
        // here, too, lower accuracy is acceptable. As with divisions,
        // ARM NEON costs us an order of magnitude here compared to X86.
        TEST_REAL_ACCURACY((i + 0.1) / (i + 0.2), vec2[i], 0.0025);
    }

    // test sqrt()
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        &vec2[i] << sqrt(v);
    }
    for (int i = 0; i < numElements; ++i) {
        // lower accuracy, mainly for ARM NEON
        TEST_REAL_ACCURACY(std::sqrt(double(i + 0.1)), vec2[i], 0.0025);
    }

    // test "/ sqrt()"
    for (int i = 0; i < numElements; ++i) {
        vec2[i] = i + 0.2;
    }
    for (int i = 0; i < (numElements - ShortVec::ARITY + 1); i += ShortVec::ARITY) {
        ShortVec v = &vec1[i];
        ShortVec w = &vec2[i];
        &vec2[i] << w / sqrt(v);
    }
    for (int i = 0; i < numElements; ++i) {
        // the expression "foo / sqrt(bar)" will again result in an
        // estimated result for single precision floats, so lower accuracy is acceptable:
        TEST_REAL_ACCURACY((i + 0.2) / std::sqrt(double(i + 0.1)), vec2[i], 0.0035);
    }

    // test string conversion
    for (int i = 0; i < ShortVec::ARITY; ++i) {
        vec1[i] = i + 0.1;
    }
    ShortVec v(&vec1[0]);
    std::ostringstream buf1;
    buf1 << v;

    std::ostringstream buf2;
    buf2 << "[";
    for (int i = 0; i < (ShortVec::ARITY - 1); ++i) {
        buf2 << (i + 0.1) << ", ";
    }
    buf2 << (ShortVec::ARITY - 1 + 0.1) << "]";

    BOOST_TEST(buf1.str() == buf2.str());

    // test gather
    {
        CARGO array[ARITY * 10];
        std::vector<int, aligned_allocator<int, 64> > indices(ARITY);
        CARGO actual[ARITY];
        CARGO expected[ARITY];
        std::memset(array, '\0', sizeof(CARGO) * ARITY * 10);

        for (int i = 0; i < ARITY * 10; ++i) {
            if (i % 10 == 0) {
                array[i] = i * 0.75;
            }
        }

        for (int i = 0; i < ARITY; ++i) {
            indices[i] = i * 10;
            expected[i] = (i * 10) * 0.75;
        }

        ShortVec vec;
        vec.gather(array, &indices[0]);
        actual << vec;

        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(actual[i], expected[i], 0.001);
        }
    }

#ifdef LIBFLATARRAY_WITH_CPP14
    // test gather via initializer_list
    {
        CARGO actual1[ARITY];
        CARGO actual2[ARITY];
        CARGO expected[ARITY];
        for (int i = 0; i < ARITY; ++i) {
            expected[i] = (i * 10) * 0.75;
        }

        // max: 32
        ShortVec vec1 = { 0.0, 7.5, 15.0, 22.50, 30.0, 37.5, 45.0, 52.5,
                          60.0, 67.5, 75.0, 82.5, 90.0, 97.5, 105.0, 112.5,
                          120.0, 127.5, 135.0, 142.5, 150.0, 157.5, 165.0, 172.5,
                          180.0, 187.5, 195.0, 202.5, 210.0, 217.5, 225.0, 232.5 };
        ShortVec vec2;
        vec2 = { 0.0, 7.5, 15.0, 22.50, 30.0, 37.5, 45.0, 52.5,
                 60.0, 67.5, 75.0, 82.5, 90.0, 97.5, 105.0, 112.5,
                 120.0, 127.5, 135.0, 142.5, 150.0, 157.5, 165.0, 172.5,
                 180.0, 187.5, 195.0, 202.5, 210.0, 217.5, 225.0, 232.5 };
        actual1 << vec1;
        actual2 << vec2;
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(actual1[i], expected[i], 0.001);
            TEST_REAL_ACCURACY(actual2[i], expected[i], 0.001);
        }
    }
#endif

    // test scatter
    {
        ShortVec vec;
        CARGO array[ARITY * 10];
        CARGO expected[ARITY * 10];
        std::vector<int, aligned_allocator<int, 64> > indices(ARITY);
        std::memset(array,    '\0', sizeof(CARGO) * ARITY * 10);
        std::memset(expected, '\0', sizeof(CARGO) * ARITY * 10);
        for (int i = 0; i < ARITY * 10; ++i) {
            if (i % 10 == 0) {
                expected[i] = i * 0.75;
            }
        }
        for (int i = 0; i < ARITY; ++i) {
            indices[i] = i * 10;
        }

        vec.gather(expected, &indices[0]);
        vec.scatter(array, &indices[0]);
        for (int i = 0; i < ARITY * 10; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }
    }

    // test non temporal stores
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = 5.0;
        }
        ShortVec v1 = 5.0;
        v1.store_nt(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = i + 0.1;
        }
        ShortVec v2 = &expected[0];
        v2.store_nt(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }
    }

    // test aligned stores
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = 5.0;
        }
        ShortVec v1 = 5.0;
        v1.store_aligned(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }

        for (int i = 0; i < ARITY; ++i) {
            expected[i] = i + 0.1;
        }
        ShortVec v2 = &expected[0];
        v2.store_aligned(&array[0]);
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }
    }

    // test aligned loads
    {
        std::vector<CARGO, aligned_allocator<CARGO, 64> > array(ARITY);
        std::vector<CARGO, aligned_allocator<CARGO, 64> > expected(ARITY);

        for (int i = 0; i < ARITY; ++i) {
            array[i]    = i + 0.1;
            expected[i] = 0;
        }
        ShortVec v1;
        v1.load_aligned(&array[0]);
        v1.store(&expected[0]);
        for (int i = 0; i < ARITY; ++i) {
            TEST_REAL_ACCURACY(array[i], expected[i], 0.001);
        }
    }
}