/* ============= AllocTris ============= */ idDrawVert * idGuiModel::AllocTris( int vertCount, const triIndex_t * tempIndexes, int indexCount, const idMaterial * material, const uint64 glState, const stereoDepthType_t stereoType ) { if ( material == NULL ) { return NULL; } if ( numIndexes + indexCount > MAX_INDEXES ) { static int warningFrame = 0; if ( warningFrame != tr.frameCount ) { warningFrame = tr.frameCount; idLib::Warning( "idGuiModel::AllocTris: MAX_INDEXES exceeded" ); } return NULL; } if ( numVerts + vertCount > MAX_VERTS ) { static int warningFrame = 0; if ( warningFrame != tr.frameCount ) { warningFrame = tr.frameCount; idLib::Warning( "idGuiModel::AllocTris: MAX_VERTS exceeded" ); } return NULL; } // break the current surface if we are changing to a new material or we can't // fit the data into our allocated block if ( material != surf->material || glState != surf->glState || stereoType != surf->stereoType ) { if ( surf->numIndexes ) { AdvanceSurf(); } surf->material = material; surf->glState = glState; surf->stereoType = stereoType; } int startVert = numVerts; int startIndex = numIndexes; numVerts += vertCount; numIndexes += indexCount; surf->numIndexes += indexCount; if ( ( startIndex & 1 ) || ( indexCount & 1 ) ) { // slow for write combined memory! // this should be very rare, since quads are always an even index count for ( int i = 0; i < indexCount; i++ ) { indexPointer[startIndex + i] = startVert + tempIndexes[i]; } } else { for ( int i = 0; i < indexCount; i += 2 ) { WriteIndexPair( indexPointer + startIndex + i, startVert + tempIndexes[i], startVert + tempIndexes[i+1] ); } } return vertexPointer + startVert; }
/* ==================== R_CopyOverlaySurface ==================== */ static void R_CopyOverlaySurface( idDrawVert* verts, int numVerts, triIndex_t* indexes, int numIndexes, const overlay_t* overlay, const idDrawVert* sourceVerts ) { assert_16_byte_aligned( &verts[numVerts] ); assert_16_byte_aligned( &indexes[numIndexes] ); assert_16_byte_aligned( overlay->verts ); assert_16_byte_aligned( overlay->indexes ); assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 ); assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 ); #if defined(USE_INTRINSICS) const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 ); const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 ); const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts ); // copy vertices for( int i = 0; i < overlay->numVerts; i++ ) { const overlayVertex_t& overlayVert = overlay->verts[i]; const idDrawVert& srcVert = sourceVerts[overlayVert.vertexNum]; idDrawVert& dstVert = verts[numVerts + i]; __m128i v0 = _mm_load_si128( ( const __m128i* )( ( byte* )&srcVert + 0 ) ); __m128i v1 = _mm_load_si128( ( const __m128i* )( ( byte* )&srcVert + 16 ) ); __m128i st = _mm_cvtsi32_si128( *( unsigned int* )overlayVert.st ); st = _mm_shuffle_epi32( st, _MM_SHUFFLE( 0, 1, 2, 3 ) ); v0 = _mm_and_si128( v0, vector_int_clear_last ); v0 = _mm_or_si128( v0, st ); _mm_stream_si128( ( __m128i* )( ( byte* )&dstVert + 0 ), v0 ); _mm_stream_si128( ( __m128i* )( ( byte* )&dstVert + 16 ), v1 ); } // copy indexes assert( ( overlay->numIndexes & 7 ) == 0 ); assert( sizeof( triIndex_t ) == 2 ); for( int i = 0; i < overlay->numIndexes; i += 8 ) { __m128i vi = _mm_load_si128( ( const __m128i* )&overlay->indexes[i] ); vi = _mm_add_epi16( vi, vector_short_num_verts ); _mm_stream_si128( ( __m128i* )&indexes[numIndexes + i], vi ); } _mm_sfence(); #else // copy vertices for( int i = 0; i < overlay->numVerts; i++ ) { const overlayVertex_t& overlayVert = overlay->verts[i]; // NOTE: bad out-of-order write-combined write, SIMD code does the right thing verts[numVerts + i] = sourceVerts[overlayVert.vertexNum]; // RB begin verts[numVerts + i].SetTexCoordS( overlayVert.st[0] ); verts[numVerts + i].SetTexCoordT( overlayVert.st[1] ); // RB end } // copy indexes for( int i = 0; i < overlay->numIndexes; i += 2 ) { assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts ); WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] ); } #endif }