BOOL LLVOWLSky::updateGeometry(LLDrawable * drawable) { LLFastTimer ftm(FTM_GEO_SKY); LLStrider<LLVector3> vertices; LLStrider<LLVector2> texCoords; LLStrider<U16> indices; #if DOME_SLICES { mFanVerts = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); mFanVerts->allocateBuffer(getFanNumVerts(), getFanNumIndices(), TRUE); BOOL success = mFanVerts->getVertexStrider(vertices) && mFanVerts->getTexCoord0Strider(texCoords) && mFanVerts->getIndexStrider(indices); if(!success) { llerrs << "Failed updating WindLight sky geometry." << llendl; } buildFanBuffer(vertices, texCoords, indices); mFanVerts->flush(); } { const U32 max_buffer_bytes = gSavedSettings.getS32("RenderMaxVBOSize")*1024; const U32 data_mask = LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK; const U32 max_verts = max_buffer_bytes / LLVertexBuffer::calcVertexSize(data_mask); const U32 total_stacks = getNumStacks(); const U32 verts_per_stack = getNumSlices(); // each seg has to have one more row of verts than it has stacks // then round down const U32 stacks_per_seg = (max_verts - verts_per_stack) / verts_per_stack; // round up to a whole number of segments const U32 strips_segments = (total_stacks+stacks_per_seg-1) / stacks_per_seg; llinfos << "WL Skydome strips in " << strips_segments << " batches." << llendl; mStripsVerts.resize(strips_segments, NULL); LLTimer timer; timer.start(); for (U32 i = 0; i < strips_segments ;++i) { LLVertexBuffer * segment = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); mStripsVerts[i] = segment; U32 num_stacks_this_seg = stacks_per_seg; if ((i == strips_segments - 1) && (total_stacks % stacks_per_seg) != 0) { // for the last buffer only allocate what we'll use num_stacks_this_seg = total_stacks % stacks_per_seg; } // figure out what range of the sky we're filling const U32 begin_stack = i * stacks_per_seg; const U32 end_stack = begin_stack + num_stacks_this_seg; llassert(end_stack <= total_stacks); const U32 num_verts_this_seg = verts_per_stack * (num_stacks_this_seg+1); llassert(num_verts_this_seg <= max_verts); const U32 num_indices_this_seg = 1+num_stacks_this_seg*(2+2*verts_per_stack); llassert(num_indices_this_seg * sizeof(U16) <= max_buffer_bytes); segment->allocateBuffer(num_verts_this_seg, num_indices_this_seg, TRUE); // lock the buffer BOOL success = segment->getVertexStrider(vertices) && segment->getTexCoord0Strider(texCoords) && segment->getIndexStrider(indices); if(!success) { llerrs << "Failed updating WindLight sky geometry." << llendl; } // fill it buildStripsBuffer(begin_stack, end_stack, vertices, texCoords, indices); // and unlock the buffer segment->flush(); } llinfos << "completed in " << llformat("%.2f", timer.getElapsedTimeF32()) << "seconds" << llendl; } #else mStripsVerts = new LLVertexBuffer(LLDrawPoolWLSky::SKY_VERTEX_DATA_MASK, GL_STATIC_DRAW_ARB); const F32 RADIUS = LLWLParamManager::sParamMgr->getDomeRadius(); LLPointer<LLVertexBuffer> temp = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0); temp->allocateBuffer(12, 60, TRUE); BOOL success = temp->getVertexStrider(vertices) && temp->getIndexStrider(indices); if (success) { for (U32 i = 0; i < 12; i++) { *vertices++ = icosahedron_vert[i]; } for (U32 i = 0; i < 60; i++) { *indices++ = icosahedron_ind[i]; } } LLPointer<LLVertexBuffer> temp2; for (U32 i = 0; i < 8; i++) { temp2 = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0); subdivide(*temp, temp2); temp = temp2; } temp->getVertexStrider(vertices); for (S32 i = 0; i < temp->getNumVerts(); i++) { LLVector3 v = vertices[i]; v.normVec(); vertices[i] = v*RADIUS; } temp2 = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX, 0); chop(*temp, temp2); mStripsVerts->allocateBuffer(temp2->getNumVerts(), temp2->getNumIndices(), TRUE); success = mStripsVerts->getVertexStrider(vertices) && mStripsVerts->getTexCoordStrider(texCoords) && mStripsVerts->getIndexStrider(indices); LLStrider<LLVector3> v; temp2->getVertexStrider(v); LLStrider<U16> ind; temp2->getIndexStrider(ind); if (success) { for (S32 i = 0; i < temp2->getNumVerts(); ++i) { LLVector3 vert = *v++; vert.normVec(); F32 z0 = vert.mV[2]; F32 x0 = vert.mV[0]; vert *= RADIUS; *vertices++ = vert; *texCoords++ = LLVector2((-z0 + 1.f) / 2.f, (-x0 + 1.f) / 2.f); } for (S32 i = 0; i < temp2->getNumIndices(); ++i) { *indices++ = *ind++; } } mStripsVerts->flush(); #endif updateStarColors(); updateStarGeometry(drawable); LLPipeline::sCompiles++; return TRUE; }
void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLVOAvatar* avatar, LLFace* face, const LLMeshSkinInfo* skin, LLVolume* volume, const LLVolumeFace& vol_face, LLVOVolume* vobj) { LLVector4a* weight = vol_face.mWeights; if (!weight) { return; } LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer(); LLDrawable* drawable = face->getDrawable(); U32 data_mask = face->getRiggedVertexBufferDataMask(); if (buffer.isNull() || buffer->getTypeMask() != data_mask || buffer->getRequestedVerts() != vol_face.mNumVertices || buffer->getRequestedIndices() != vol_face.mNumIndices || (drawable && drawable->isState(LLDrawable::REBUILD_ALL))) { face->setGeomIndex(0); face->setIndicesIndex(0); if (buffer.isNull() || buffer->getTypeMask() != data_mask) { //make a new buffer if (sShaderLevel > 0) { buffer = new LLVertexBuffer(data_mask, GL_DYNAMIC_DRAW_ARB); } else { buffer = new LLVertexBuffer(data_mask, GL_STREAM_DRAW_ARB); } buffer->allocateBuffer(vol_face.mNumVertices, vol_face.mNumIndices, true); } else { //resize existing buffer buffer->resizeBuffer(vol_face.mNumVertices, vol_face.mNumIndices); } face->setSize(vol_face.mNumVertices, vol_face.mNumIndices); face->setVertexBuffer(buffer); U16 offset = 0; LLMatrix4 mat_vert = skin->mBindShapeMatrix; glh::matrix4f m((F32*) mat_vert.mMatrix); m = m.inverse().transpose(); F32 mat3[] = { m.m[0], m.m[1], m.m[2], m.m[4], m.m[5], m.m[6], m.m[8], m.m[9], m.m[10] }; LLMatrix3 mat_normal(mat3); static LLCachedControl<bool> mesh_enable_deformer(gSavedSettings, "MeshEnableDeformer"); if (mesh_enable_deformer) { LLDeformedVolume* deformed_volume = vobj->getDeformedVolume(); deformed_volume->deform(volume, avatar, skin, face->getTEOffset()); face->getGeometryVolume(*deformed_volume, face->getTEOffset(), mat_vert, mat_normal, offset, true); } else { face->getGeometryVolume(*volume, face->getTEOffset(), mat_vert, mat_normal, offset, true); } } if (sShaderLevel <= 0 && face->mLastSkinTime < avatar->getLastSkinTime()) { //perform software vertex skinning for this face LLStrider<LLVector3> position; LLStrider<LLVector3> normal; bool has_normal = buffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); buffer->getVertexStrider(position); if (has_normal) { buffer->getNormalStrider(normal); } LLVector4a* pos = (LLVector4a*) position.get(); LLVector4a* norm = has_normal ? (LLVector4a*) normal.get() : NULL; //build matrix palette LLMatrix4a mp[64]; LLMatrix4* mat = (LLMatrix4*) mp; for (U32 j = 0; j < skin->mJointNames.size(); ++j) { LLJoint* joint = avatar->getJoint(skin->mJointNames[j]); if (joint) { mat[j] = skin->mInvBindMatrix[j]; mat[j] *= joint->getWorldMatrix(); } } LLMatrix4a bind_shape_matrix; bind_shape_matrix.loadu(skin->mBindShapeMatrix); for (U32 j = 0; j < buffer->getRequestedVerts(); ++j) { LLMatrix4a final_mat; final_mat.clear(); S32 idx[4]; LLVector4 wght; F32 scale = 0.f; for (U32 k = 0; k < 4; k++) { F32 w = weight[j][k]; idx[k] = llclamp((S32) floorf(w), 0, 63); wght[k] = w - floorf(w); scale += wght[k]; } wght *= 1.f/scale; for (U32 k = 0; k < 4; k++) { F32 w = wght[k]; LLMatrix4a src; src.setMul(mp[idx[k]], w); final_mat.add(src); } LLVector4a& v = vol_face.mPositions[j]; LLVector4a t; LLVector4a dst; bind_shape_matrix.affineTransform(v, t); final_mat.affineTransform(t, dst); pos[j] = dst; if (norm) { LLVector4a& n = vol_face.mNormals[j]; bind_shape_matrix.rotate(n, t); final_mat.rotate(t, dst); norm[j] = dst; } } } if (drawable && face->getTEOffset() == drawable->getNumFaces() - 1) { drawable->clearState(LLDrawable::REBUILD_ALL); } }
F32 gpu_benchmark() { if (!gGLManager.mHasShaderObjects || !gGLManager.mHasTimerQuery) { // don't bother benchmarking the fixed function // or venerable drivers which don't support accurate timing anyway // and are likely to be correctly identified by the GPU table already. return -1.f; } if (gBenchmarkProgram.mProgramObject == 0) { LLViewerShaderMgr::instance()->initAttribsAndUniforms(); gBenchmarkProgram.mName = "Benchmark Shader"; gBenchmarkProgram.mFeatures.attachNothing = true; gBenchmarkProgram.mShaderFiles.clear(); gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkV.glsl", GL_VERTEX_SHADER_ARB)); gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkF.glsl", GL_FRAGMENT_SHADER_ARB)); gBenchmarkProgram.mShaderLevel = 1; if (!gBenchmarkProgram.createShader(NULL, NULL)) { return -1.f; } } LLGLDisable blend(GL_BLEND); //measure memory bandwidth by: // - allocating a batch of textures and render targets // - rendering those textures to those render targets // - recording time taken // - taking the median time for a given number of samples //resolution of textures/render targets const U32 res = 1024; //number of textures const U32 count = 32; //number of samples to take const S32 samples = 64; if (gGLManager.mHasTimerQuery) { LLGLSLShader::initProfile(); } LLRenderTarget dest[count]; U32 source[count]; LLImageGL::generateTextures(count, source); std::vector<F32> results; //build a random texture U8* pixels = new U8[res*res*4]; for (U32 i = 0; i < res*res*4; ++i) { pixels[i] = (U8) ll_rand(255); } gGL.setColorMask(true, true); LLGLDepthTest depth(GL_FALSE); for (U32 i = 0; i < count; ++i) { //allocate render targets and textures dest[i].allocate(res,res,GL_RGBA,false, false, LLTexUnit::TT_TEXTURE, true); dest[i].bindTarget(); dest[i].clear(); dest[i].flush(); gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, source[i]); LLImageGL::setManualImage(GL_TEXTURE_2D, 0, GL_RGBA, res,res,GL_RGBA, GL_UNSIGNED_BYTE, pixels); } delete [] pixels; //make a dummy triangle to draw with LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_TEXCOORD0, GL_STATIC_DRAW_ARB); buff->allocateBuffer(3, 0, true); LLStrider<LLVector3> v; LLStrider<LLVector2> tc; buff->getVertexStrider(v); v[0].set(-1,1,0); v[1].set(-1,-3,0); v[2].set(3,1,0); buff->flush(); gBenchmarkProgram.bind(); bool busted_finish = false; buff->setBuffer(LLVertexBuffer::MAP_VERTEX); glFinish(); for (S32 c = -1; c < samples; ++c) { LLTimer timer; timer.start(); for (U32 i = 0; i < count; ++i) { dest[i].bindTarget(); gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, source[i]); buff->drawArrays(LLRender::TRIANGLES, 0, 3); dest[i].flush(); } //wait for current batch of copies to finish if (busted_finish) { //read a pixel off the last target since some drivers seem to ignore glFinish dest[count-1].bindTarget(); U32 pixel = 0; glReadPixels(0,0,1,1,GL_RGBA, GL_UNSIGNED_BYTE, &pixel); dest[count-1].flush(); } else { glFinish(); } F32 time = timer.getElapsedTimeF32(); if (c >= 0) // <-- ignore the first sample as it tends to be artificially slow { //store result in gigabytes per second F32 gb = (F32) ((F64) (res*res*8*count))/(1000000000); F32 gbps = gb/time; if (!gGLManager.mHasTimerQuery && !busted_finish && gbps > 128.f) { //unrealistically high bandwidth for a card without timer queries, glFinish is probably ignored busted_finish = true; LL_WARNS() << "GPU Benchmark detected GL driver with broken glFinish implementation." << LL_ENDL; } else { results.push_back(gbps); } } } gBenchmarkProgram.unbind(); if (gGLManager.mHasTimerQuery) { LLGLSLShader::finishProfile(false); } LLImageGL::deleteTextures(count, source); std::sort(results.begin(), results.end()); F32 gbps = results[results.size()/2]; LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers" << LL_ENDL; #if LL_DARWIN if (gbps > 512.f) { LL_WARNS() << "Memory bandwidth is improbably high and likely incorrect; discarding result." << LL_ENDL; //OSX is probably lying, discard result gbps = -1.f; } #endif F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f; F32 seconds = ms/1000.f; F64 samples_drawn = res*res*count*samples; F32 samples_sec = (samples_drawn/1000000000.0)/seconds; gbps = samples_sec*8; LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query" << LL_ENDL; return gbps; }