Exemple #1
0
/*
=============
AllocTris
=============
*/
idDrawVert * idGuiModel::AllocTris( int vertCount, const triIndex_t * tempIndexes, int indexCount, const idMaterial * material, const uint64 glState, const stereoDepthType_t stereoType ) {
	if ( material == NULL ) {
		return NULL;
	}
	if ( numIndexes + indexCount > MAX_INDEXES ) {
		static int warningFrame = 0;
		if ( warningFrame != tr.frameCount ) {
			warningFrame = tr.frameCount;
			idLib::Warning( "idGuiModel::AllocTris: MAX_INDEXES exceeded" );
		}
		return NULL;
	}
	if ( numVerts + vertCount > MAX_VERTS ) {
		static int warningFrame = 0;
		if ( warningFrame != tr.frameCount ) {
			warningFrame = tr.frameCount;
			idLib::Warning( "idGuiModel::AllocTris: MAX_VERTS exceeded" );
		}
		return NULL;
	}

	// break the current surface if we are changing to a new material or we can't
	// fit the data into our allocated block
	if ( material != surf->material || glState != surf->glState || stereoType != surf->stereoType ) {
		if ( surf->numIndexes ) {
			AdvanceSurf();
		}
		surf->material = material;
		surf->glState = glState;
		surf->stereoType = stereoType;
	}

	int startVert = numVerts;
	int startIndex = numIndexes;

	numVerts += vertCount;
	numIndexes += indexCount;

	surf->numIndexes += indexCount;

	if ( ( startIndex & 1 ) || ( indexCount & 1 ) ) {
		// slow for write combined memory!
		// this should be very rare, since quads are always an even index count
		for ( int i = 0; i < indexCount; i++ ) {
			indexPointer[startIndex + i] = startVert + tempIndexes[i];
		}
	} else {
		for ( int i = 0; i < indexCount; i += 2 ) {
			WriteIndexPair( indexPointer + startIndex + i, startVert + tempIndexes[i], startVert + tempIndexes[i+1] );
		}
	}

	return vertexPointer + startVert;
}
/*
====================
R_CopyOverlaySurface
====================
*/
static void R_CopyOverlaySurface( idDrawVert* verts, int numVerts, triIndex_t* indexes, int numIndexes, const overlay_t* overlay, const idDrawVert* sourceVerts )
{
	assert_16_byte_aligned( &verts[numVerts] );
	assert_16_byte_aligned( &indexes[numIndexes] );
	assert_16_byte_aligned( overlay->verts );
	assert_16_byte_aligned( overlay->indexes );
	assert( ( ( overlay->numVerts * sizeof( idDrawVert ) ) & 15 ) == 0 );
	assert( ( ( overlay->numIndexes * sizeof( triIndex_t ) ) & 15 ) == 0 );
	
#if defined(USE_INTRINSICS)
	
	const __m128i vector_int_clear_last = _mm_set_epi32( 0, -1, -1, -1 );
	const __m128i vector_int_num_verts = _mm_shuffle_epi32( _mm_cvtsi32_si128( numVerts ), 0 );
	const __m128i vector_short_num_verts = _mm_packs_epi32( vector_int_num_verts, vector_int_num_verts );
	
	// copy vertices
	for( int i = 0; i < overlay->numVerts; i++ )
	{
		const overlayVertex_t& overlayVert = overlay->verts[i];
		const idDrawVert& srcVert = sourceVerts[overlayVert.vertexNum];
		idDrawVert& dstVert = verts[numVerts + i];
		
		__m128i v0 = _mm_load_si128( ( const __m128i* )( ( byte* )&srcVert +  0 ) );
		__m128i v1 = _mm_load_si128( ( const __m128i* )( ( byte* )&srcVert + 16 ) );
		__m128i st = _mm_cvtsi32_si128( *( unsigned int* )overlayVert.st );
		
		st = _mm_shuffle_epi32( st, _MM_SHUFFLE( 0, 1, 2, 3 ) );
		v0 = _mm_and_si128( v0, vector_int_clear_last );
		v0 = _mm_or_si128( v0, st );
		
		_mm_stream_si128( ( __m128i* )( ( byte* )&dstVert +  0 ), v0 );
		_mm_stream_si128( ( __m128i* )( ( byte* )&dstVert + 16 ), v1 );
	}
	
	// copy indexes
	assert( ( overlay->numIndexes & 7 ) == 0 );
	assert( sizeof( triIndex_t ) == 2 );
	for( int i = 0; i < overlay->numIndexes; i += 8 )
	{
		__m128i vi = _mm_load_si128( ( const __m128i* )&overlay->indexes[i] );
		
		vi = _mm_add_epi16( vi, vector_short_num_verts );
		
		_mm_stream_si128( ( __m128i* )&indexes[numIndexes + i], vi );
	}
	
	_mm_sfence();
	
#else
	
	// copy vertices
	for( int i = 0; i < overlay->numVerts; i++ )
	{
		const overlayVertex_t& overlayVert = overlay->verts[i];
	
		// NOTE: bad out-of-order write-combined write, SIMD code does the right thing
		verts[numVerts + i] = sourceVerts[overlayVert.vertexNum];
	
		// RB begin
		verts[numVerts + i].SetTexCoordS( overlayVert.st[0] );
		verts[numVerts + i].SetTexCoordT( overlayVert.st[1] );
		// RB end
	}
	
	// copy indexes
	for( int i = 0; i < overlay->numIndexes; i += 2 )
	{
		assert( overlay->indexes[i + 0] < overlay->numVerts && overlay->indexes[i + 1] < overlay->numVerts );
		WriteIndexPair( &indexes[numIndexes + i], numVerts + overlay->indexes[i + 0], numVerts + overlay->indexes[i + 1] );
	}
	
#endif
}