Esempio n. 1
0
BOOL LLVOWLSky::updateGeometry(LLDrawable * drawable)
{
	LLFastTimer ftm(FTM_GEO_SKY);
	LLStrider<LLVector3>	vertices;
	LLStrider<LLVector2>	texCoords;
	LLStrider<U16>			indices;

#if DOME_SLICES
	{
		mFanVerts = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB);
		mFanVerts->allocateBuffer(getFanNumVerts(), getFanNumIndices(), TRUE);

		BOOL success = mFanVerts->getVertexStrider(vertices)
			&& mFanVerts->getTexCoord0Strider(texCoords)
			&& mFanVerts->getIndexStrider(indices);

		if(!success) 
		{
			llerrs << "Failed updating WindLight sky geometry." << llendl;
		}

		buildFanBuffer(vertices, texCoords, indices);

		mFanVerts->flush();
	}

	{
		const U32 max_buffer_bytes = gSavedSettings.getS32("RenderMaxVBOSize")*1024;
		const U32 data_mask = LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK;
		const U32 max_verts = max_buffer_bytes / LLVertexBuffer::calcVertexSize(data_mask);

		const U32 total_stacks = getNumStacks();

		const U32 verts_per_stack = getNumSlices();

		// each seg has to have one more row of verts than it has stacks
		// then round down
		const U32 stacks_per_seg = (max_verts - verts_per_stack) / verts_per_stack;

		// round up to a whole number of segments
		const U32 strips_segments = (total_stacks+stacks_per_seg-1) / stacks_per_seg;

		llinfos << "WL Skydome strips in " << strips_segments << " batches." << llendl;

		mStripsVerts.resize(strips_segments, NULL);

		LLTimer timer;
		timer.start();

		for (U32 i = 0; i < strips_segments ;++i)
		{
			LLVertexBuffer * segment = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB);
			mStripsVerts[i] = segment;

			U32 num_stacks_this_seg = stacks_per_seg;
			if ((i == strips_segments - 1) && (total_stacks % stacks_per_seg) != 0)
			{
				// for the last buffer only allocate what we'll use
				num_stacks_this_seg = total_stacks % stacks_per_seg;
			}

			// figure out what range of the sky we're filling
			const U32 begin_stack = i * stacks_per_seg;
			const U32 end_stack = begin_stack + num_stacks_this_seg;
			llassert(end_stack <= total_stacks);

			const U32 num_verts_this_seg = verts_per_stack * (num_stacks_this_seg+1);
			llassert(num_verts_this_seg <= max_verts);

			const U32 num_indices_this_seg = 1+num_stacks_this_seg*(2+2*verts_per_stack);
			llassert(num_indices_this_seg * sizeof(U16) <= max_buffer_bytes);

			segment->allocateBuffer(num_verts_this_seg, num_indices_this_seg, TRUE);

			// lock the buffer
			BOOL success = segment->getVertexStrider(vertices)
				&& segment->getTexCoord0Strider(texCoords)
				&& segment->getIndexStrider(indices);

			if(!success) 
			{
				llerrs << "Failed updating WindLight sky geometry." << llendl;
			}

			// fill it
			buildStripsBuffer(begin_stack, end_stack,  vertices, texCoords, indices);

			// and unlock the buffer
			segment->flush();
		}
	
		llinfos << "completed in " << llformat("%.2f", timer.getElapsedTimeF32()) << "seconds" << llendl;
	}
#else
	mStripsVerts = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB);
	
	const F32 RADIUS = LLWLParamManager::sParamMgr->getDomeRadius();

	LLPointer<LLVertexBuffer> temp = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0);
	temp->allocateBuffer(12, 60, TRUE);

	BOOL success = temp->getVertexStrider(vertices)
		&& temp->getIndexStrider(indices);

	if (success)
	{
		for (U32 i = 0; i < 12; i++)
		{
			*vertices++ = icosahedron_vert[i];
		}

		for (U32 i = 0; i < 60; i++)
		{
			*indices++ = icosahedron_ind[i];
		}
	}


	LLPointer<LLVertexBuffer> temp2;
	
	for (U32 i = 0; i < 8; i++)
	{
		temp2 = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0);
		subdivide(*temp, temp2);
		temp = temp2;
	}
	
	temp->getVertexStrider(vertices);
	for (S32 i = 0; i < temp->getNumVerts(); i++)
	{
		LLVector3 v = vertices[i];
		v.normVec();
		vertices[i] = v*RADIUS;
	}

	temp2 = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0);
	chop(*temp, temp2);

	mStripsVerts->allocateBuffer(temp2->getNumVerts(), temp2->getNumIndices(), TRUE);
	
	success = mStripsVerts->getVertexStrider(vertices)
		&& mStripsVerts->getTexCoordStrider(texCoords)
		&& mStripsVerts->getIndexStrider(indices);

	LLStrider<LLVector3> v;
	temp2->getVertexStrider(v);
	LLStrider<U16> ind;
	temp2->getIndexStrider(ind);

	if (success)
	{
		for (S32 i = 0; i < temp2->getNumVerts(); ++i)
		{
			LLVector3 vert = *v++;
			vert.normVec();
			F32 z0 = vert.mV[2];
			F32 x0 = vert.mV[0];
			
			vert *= RADIUS;
			
			*vertices++ = vert;
			*texCoords++ = LLVector2((-z0 + 1.f) / 2.f, (-x0 + 1.f) / 2.f);
		}

		for (S32 i = 0; i < temp2->getNumIndices(); ++i)
		{
			*indices++ = *ind++;
		}
	}

	mStripsVerts->flush();
#endif

	updateStarColors();
	updateStarGeometry(drawable);

	LLPipeline::sCompiles++;

	return TRUE;
}
Esempio n. 2
0
void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLVOAvatar* avatar, LLFace* face, 
													const LLMeshSkinInfo* skin, LLVolume* volume, 
													const LLVolumeFace& vol_face, LLVOVolume* vobj)
{
	LLVector4a* weight = vol_face.mWeights;
	if (!weight)
	{
		return;
	}

	LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer();
	LLDrawable* drawable = face->getDrawable();

	U32 data_mask = face->getRiggedVertexBufferDataMask();

	if (buffer.isNull() || buffer->getTypeMask() != data_mask ||
		buffer->getRequestedVerts() != vol_face.mNumVertices ||
		buffer->getRequestedIndices() != vol_face.mNumIndices ||
		(drawable && drawable->isState(LLDrawable::REBUILD_ALL)))
	{
		face->setGeomIndex(0);
		face->setIndicesIndex(0);

		if (buffer.isNull() || buffer->getTypeMask() != data_mask)
		{	//make a new buffer
			if (sShaderLevel > 0)
			{
				buffer = new LLVertexBuffer(data_mask, GL_DYNAMIC_DRAW_ARB);
			}
			else
			{
				buffer = new LLVertexBuffer(data_mask, GL_STREAM_DRAW_ARB);
			}
			buffer->allocateBuffer(vol_face.mNumVertices,
								   vol_face.mNumIndices, true);
		}
		else
		{	//resize existing buffer
			buffer->resizeBuffer(vol_face.mNumVertices, vol_face.mNumIndices);
		}

		face->setSize(vol_face.mNumVertices, vol_face.mNumIndices);
		face->setVertexBuffer(buffer);

		U16 offset = 0;

		LLMatrix4 mat_vert = skin->mBindShapeMatrix;
		glh::matrix4f m((F32*) mat_vert.mMatrix);
		m = m.inverse().transpose();

		F32 mat3[] = {	m.m[0], m.m[1], m.m[2],
						m.m[4], m.m[5], m.m[6],
						m.m[8], m.m[9], m.m[10] };

		LLMatrix3 mat_normal(mat3);

		static LLCachedControl<bool> mesh_enable_deformer(gSavedSettings, "MeshEnableDeformer");
		if (mesh_enable_deformer)
		{
			LLDeformedVolume* deformed_volume = vobj->getDeformedVolume();
			deformed_volume->deform(volume, avatar, skin, face->getTEOffset());
			face->getGeometryVolume(*deformed_volume, face->getTEOffset(), mat_vert,
									mat_normal, offset, true);
		}
		else
		{
			face->getGeometryVolume(*volume, face->getTEOffset(), mat_vert,
 									mat_normal, offset, true);
		}
	}

	if (sShaderLevel <= 0 && face->mLastSkinTime < avatar->getLastSkinTime())
	{ //perform software vertex skinning for this face
		LLStrider<LLVector3> position;
		LLStrider<LLVector3> normal;

		bool has_normal = buffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
		buffer->getVertexStrider(position);

		if (has_normal)
		{
			buffer->getNormalStrider(normal);
		}
		LLVector4a* pos = (LLVector4a*) position.get();

		LLVector4a* norm = has_normal ? (LLVector4a*) normal.get() : NULL;

		//build matrix palette
		LLMatrix4a mp[64];
		LLMatrix4* mat = (LLMatrix4*) mp;

		for (U32 j = 0; j < skin->mJointNames.size(); ++j)
		{
			LLJoint* joint = avatar->getJoint(skin->mJointNames[j]);
			if (joint)
			{
				mat[j] = skin->mInvBindMatrix[j];
				mat[j] *= joint->getWorldMatrix();
			}
		}

		LLMatrix4a bind_shape_matrix;
		bind_shape_matrix.loadu(skin->mBindShapeMatrix);

		for (U32 j = 0; j < buffer->getRequestedVerts(); ++j)
		{
			LLMatrix4a final_mat;
			final_mat.clear();

			S32 idx[4];

			LLVector4 wght;

			F32 scale = 0.f;
			for (U32 k = 0; k < 4; k++)
		{
				F32 w = weight[j][k];

				idx[k] = llclamp((S32) floorf(w), 0, 63);
				wght[k] = w - floorf(w);
				scale += wght[k];
			}

			wght *= 1.f/scale;

			for (U32 k = 0; k < 4; k++)
			{
				F32 w = wght[k];

				LLMatrix4a src;
				src.setMul(mp[idx[k]], w);

				final_mat.add(src);
			}

			LLVector4a& v = vol_face.mPositions[j];
			LLVector4a t;
			LLVector4a dst;
			bind_shape_matrix.affineTransform(v, t);
			final_mat.affineTransform(t, dst);
			pos[j] = dst;

			if (norm)
			{
				LLVector4a& n = vol_face.mNormals[j];
				bind_shape_matrix.rotate(n, t);
				final_mat.rotate(t, dst);
				norm[j] = dst;
			}
		}
	}

	if (drawable && face->getTEOffset() == drawable->getNumFaces() - 1)
	{
		drawable->clearState(LLDrawable::REBUILD_ALL);
	}
}
Esempio n. 3
0
F32 gpu_benchmark()
{
    if (!gGLManager.mHasShaderObjects || !gGLManager.mHasTimerQuery)
    {   // don't bother benchmarking the fixed function
        // or venerable drivers which don't support accurate timing anyway
        // and are likely to be correctly identified by the GPU table already.
        return -1.f;
    }

    if (gBenchmarkProgram.mProgramObject == 0)
    {
        LLViewerShaderMgr::instance()->initAttribsAndUniforms();

        gBenchmarkProgram.mName = "Benchmark Shader";
        gBenchmarkProgram.mFeatures.attachNothing = true;
        gBenchmarkProgram.mShaderFiles.clear();
        gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkV.glsl", GL_VERTEX_SHADER_ARB));
        gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkF.glsl", GL_FRAGMENT_SHADER_ARB));
        gBenchmarkProgram.mShaderLevel = 1;
        if (!gBenchmarkProgram.createShader(NULL, NULL))
        {
            return -1.f;
        }
    }

    LLGLDisable blend(GL_BLEND);

    //measure memory bandwidth by:
    // - allocating a batch of textures and render targets
    // - rendering those textures to those render targets
    // - recording time taken
    // - taking the median time for a given number of samples

    //resolution of textures/render targets
    const U32 res = 1024;

    //number of textures
    const U32 count = 32;

    //number of samples to take
    const S32 samples = 64;

    if (gGLManager.mHasTimerQuery)
    {
        LLGLSLShader::initProfile();
    }

    LLRenderTarget dest[count];
    U32 source[count];
    LLImageGL::generateTextures(count, source);
    std::vector<F32> results;

    //build a random texture
    U8* pixels = new U8[res*res*4];

    for (U32 i = 0; i < res*res*4; ++i)
    {
        pixels[i] = (U8) ll_rand(255);
    }


    gGL.setColorMask(true, true);
    LLGLDepthTest depth(GL_FALSE);

    for (U32 i = 0; i < count; ++i)
    {   //allocate render targets and textures
        dest[i].allocate(res,res,GL_RGBA,false, false, LLTexUnit::TT_TEXTURE, true);
        dest[i].bindTarget();
        dest[i].clear();
        dest[i].flush();

        gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, source[i]);
        LLImageGL::setManualImage(GL_TEXTURE_2D, 0, GL_RGBA, res,res,GL_RGBA, GL_UNSIGNED_BYTE, pixels);
    }

    delete [] pixels;

    //make a dummy triangle to draw with
    LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0, GL_STATIC_DRAW_ARB);
    buff->allocateBuffer(3, 0, true);

    LLStrider<LLVector3> v;
    LLStrider<LLVector2> tc;

    buff->getVertexStrider(v);

    v[0].set(-1,1,0);
    v[1].set(-1,-3,0);
    v[2].set(3,1,0);

    buff->flush();

    gBenchmarkProgram.bind();

    bool busted_finish = false;

    buff->setBuffer(LLVertexBuffer::MAP_VERTEX);
    glFinish();

    for (S32 c = -1; c < samples; ++c)
    {
        LLTimer timer;
        timer.start();

        for (U32 i = 0; i < count; ++i)
        {
            dest[i].bindTarget();
            gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, source[i]);
            buff->drawArrays(LLRender::TRIANGLES, 0, 3);
            dest[i].flush();
        }

        //wait for current batch of copies to finish
        if (busted_finish)
        {
            //read a pixel off the last target since some drivers seem to ignore glFinish
            dest[count-1].bindTarget();
            U32 pixel = 0;
            glReadPixels(0,0,1,1,GL_RGBA, GL_UNSIGNED_BYTE, &pixel);
            dest[count-1].flush();
        }
        else
        {
            glFinish();
        }

        F32 time = timer.getElapsedTimeF32();

        if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow
        {
            //store result in gigabytes per second
            F32 gb = (F32) ((F64) (res*res*8*count))/(1000000000);

            F32 gbps = gb/time;

            if (!gGLManager.mHasTimerQuery && !busted_finish && gbps > 128.f)
            {   //unrealistically high bandwidth for a card without timer queries, glFinish is probably ignored
                busted_finish = true;
                LL_WARNS() << "GPU Benchmark detected GL driver with broken glFinish implementation." << LL_ENDL;
            }
            else
            {
                results.push_back(gbps);
            }
        }
    }

    gBenchmarkProgram.unbind();

    if (gGLManager.mHasTimerQuery)
    {
        LLGLSLShader::finishProfile(false);
    }

    LLImageGL::deleteTextures(count, source);

    std::sort(results.begin(), results.end());

    F32 gbps = results[results.size()/2];

    LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers" << LL_ENDL;

#if LL_DARWIN
    if (gbps > 512.f)
    {
        LL_WARNS() << "Memory bandwidth is improbably high and likely incorrect; discarding result." << LL_ENDL;
        //OSX is probably lying, discard result
        gbps = -1.f;
    }
#endif

    F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
    F32 seconds = ms/1000.f;

    F64 samples_drawn = res*res*count*samples;
    F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
    gbps = samples_sec*8;

    LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query" << LL_ENDL;

    return gbps;
}