void WorkerThread::IncrementDispatchCounter() { if (!mozilla::StaticPrefs::dom_performance_enable_scheduler_timing()) { return; } MutexAutoLock lock(mLock); if (mWorkerPrivate) { PerformanceCounter* performanceCounter = mWorkerPrivate->GetPerformanceCounter(); if (performanceCounter) { performanceCounter->IncrementDispatchCounter(DispatchCategory::Worker); } } }
void TimeoutManager::RecordExecution(Timeout* aRunningTimeout, Timeout* aTimeout) { if (!StaticPrefs::dom_performance_enable_scheduler_timing() && mWindow.IsChromeWindow()) { return; } TimeoutBudgetManager& budgetManager = TimeoutBudgetManager::Get(); TimeStamp now = TimeStamp::Now(); if (aRunningTimeout) { // If we're running a timeout callback, record any execution until // now. TimeDuration duration = budgetManager.RecordExecution( now, aRunningTimeout, mWindow.IsBackgroundInternal()); budgetManager.MaybeCollectTelemetry(now); UpdateBudget(now, duration); // This is an ad-hoc way to use the counters for the timers // that should be removed at somepoint. See Bug 1482834 PerformanceCounter* counter = GetPerformanceCounter(); if (counter) { counter->IncrementExecutionDuration(duration.ToMicroseconds()); } } if (aTimeout) { // If we're starting a new timeout callback, start recording. budgetManager.StartRecording(now); PerformanceCounter* counter = GetPerformanceCounter(); if (counter) { counter->IncrementDispatchCounter(DispatchCategory(TaskCategory::Timer)); } } else { // Else stop by clearing the start timestamp. budgetManager.StopRecording(); } }
void ScanPerformanceThread::run() { m_bStopThread = false; bool bRet = false; PerformanceCounter perfCounter; MemoryPerformance memoryPerf = {0}; ProcessorPerformance processorPerf = {0}; DiskPerformance diskPerf = {0}; PerformanceHouse perfHouse; int refreshCount = 0; while (!m_bStopThread) { refreshCount++; perfCounter.GetMemoryPerformance(memoryPerf); perfHouse.SetMemoryPerformance(memoryPerf); perfCounter.GetProcessorPerformance(processorPerf); perfHouse.SetProcessorPerformance(processorPerf); perfCounter.GetDiskPerformance(diskPerf); perfHouse.SetDiskPerformance(diskPerf); this->msleep(500); // 每刷新30次写一次LOG if (refreshCount%30 != 0) continue; PrintLogW(L"Cpu Usage: %u%%", processorPerf.LoadPercentage); PrintLogW(L"Memory Total Size: %u", memoryPerf.TotalSize); PrintLogW(L"Memory Available Size: %u", memoryPerf.AvailableSize); PrintLogW(L""); } }
void Processor::K10PerformanceCounters::perfCounterGetInfo (class Processor *p) { PerformanceCounter *performanceCounter; DWORD node, core, slot; printf ("Caption:\n"); printf ("Evt:\tperformance counter event\n"); printf ("En:\tperformance counter is enabled\n"); printf ("U:\tperformance counter will count usermode instructions\n"); printf ("OS:\tperformance counter will counter Os/kernel instructions\n"); printf ("cMsk:\tperformance counter mask (see processor manual reference)\n"); printf ("ED:\tcounting on edge detect, else counting on level detect\n"); printf ("APIC:\tif set, an APIC interrupt will be issued on counter overflow\n"); printf ("icMsk:\tif set, mask is inversed (see processor manual reference)\n"); printf ("uMsk:\tunit mask (see processor manual reference)\n\n"); for (node = 0; node < p->getProcessorNodes(); node++) { printf ("--- Node %d\n", node); p->setNode(node); p->setCore(ALL_CORES); for (slot = 0; slot < p->getMaxSlots(); slot++) { performanceCounter = new PerformanceCounter(p->getMask(), slot, p->getMaxSlots()); for (core = 0; core < p->getProcessorCores(); core++) { if (!performanceCounter->fetch (core)) { printf ("K10PerformanceCounters.cpp::perfCounterGetInfo - unable to read performance counter register\n"); free (performanceCounter); return; } printf ("Slot %d core %d - evt:0x%x En:%d U:%d OS:%d cMsk:%x ED:%d APIC:%d icMsk:%x uMsk:%x\n", slot, core, performanceCounter->getEventSelect(), performanceCounter->getEnabled(), performanceCounter->getCountUserMode(), performanceCounter->getCountOsMode(), performanceCounter->getCounterMask(), performanceCounter->getEdgeDetect(), performanceCounter->getEnableAPICInterrupt(), performanceCounter->getInvertCntMask(), performanceCounter->getUnitMask() ); } free (performanceCounter); } } }
void Processor::K10PerformanceCounters::perfMonitorDCMA(class Processor *p) { PerformanceCounter *perfCounter; DWORD cpuIndex, nodeId, coreId; PROCESSORMASK cpuMask; unsigned int perfCounterSlot; uint64_t misses; // This pointers will refer an array containing previous performance counter values uint64_t *prevPerfCounters; try { p->setNode(p->ALL_NODES); p->setCore(p->ALL_CORES); cpuMask = p->getMask(); /* We do this to do some "caching" of the mask, instead of calculating each time we need to retrieve the time stamp counter */ // Allocating space for previous values of counters. prevPerfCounters = (uint64_t *) calloc( p->getProcessorCores() * p->getProcessorNodes(), sizeof(uint64_t)); //Creates a new performance counter, for now we set slot 0, but we will //use the findAvailable slot method to find an available method to be used perfCounter = new PerformanceCounter(cpuMask, 0, p->getMaxSlots()); //Event 0x76 is Idle Counter perfCounter->setEventSelect(0x47); perfCounter->setCountOsMode(true); perfCounter->setCountUserMode(true); perfCounter->setCounterMask(0); perfCounter->setEdgeDetect(false); perfCounter->setEnableAPICInterrupt(false); perfCounter->setInvertCntMask(false); perfCounter->setUnitMask(0); //Finds an available slot for our purpose perfCounterSlot = perfCounter->findAvailableSlot(); //findAvailableSlot() returns -2 in case of error if (perfCounterSlot == 0xfffffffe) throw "unable to access performance counter slots"; //findAvailableSlot() returns -1 in case there aren't available slots if (perfCounterSlot == 0xffffffff) throw "unable to find an available performance counter slot"; printf("Performance counter will use slot #%d\n", perfCounterSlot); //In case there are no errors, we program the object with the slot itself has found perfCounter->setSlot(perfCounterSlot); // Program the counter slot if (!perfCounter->program()) throw "unable to program performance counter parameters"; // Enable the counter slot if (!perfCounter->enable()) throw "unable to enable performance counters"; /* Here we take a snapshot of the performance counter and a snapshot of the time * stamp counter to initialize the arrays to let them not show erratic huge numbers * on first step */ if (!perfCounter->takeSnapshot()) throw "unable to retrieve performance counter data"; cpuIndex = 0; for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++) { for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++) { prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex); cpuIndex++; } } Signal::activateSignalHandler(SIGINT); while (!Signal::getSignalStatus()) { if (!perfCounter->takeSnapshot()) throw "unable to retrieve performance counter data"; cpuIndex = 0; for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++) { printf("Node %d -", nodeId); for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++) { misses = perfCounter->getCounter(cpuIndex) - prevPerfCounters[cpuIndex]; printf(" c%u:%0.3fk", coreId, (float) (misses/1000.0f)); prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex); cpuIndex++; } printf("\n"); } Sleep(1000); } perfCounter->disable(); printf ("CTRL-C executed. Cleaning on exit...\n"); } catch (char const *str) { if (perfCounter->getEnabled()) perfCounter->disable(); printf("K10PerformanceCounters.cpp::perfMonitorCPUUsage - %s\n", str); } free(perfCounter); free(prevPerfCounters); return; }
void Processor::K10PerformanceCounters::perfMonitorCPUUsage(class Processor *p) { PerformanceCounter *perfCounter; MSRObject *tscCounter; //We need the timestamp counter too to determine the cpu usage in percentage DWORD cpuIndex, nodeId, coreId; PROCESSORMASK cpuMask; unsigned int perfCounterSlot; uint64_t usage; // These two pointers will refer to two arrays containing previous performance counter values // and previous Time Stamp counters. We need these to obtain instantaneous CPU usage information uint64_t *prevPerfCounters; uint64_t *prevTSCCounters; try { p->setNode(p->ALL_NODES); p->setCore(p->ALL_CORES); cpuMask = p->getMask(); /* We do this to do some "caching" of the mask, instead of calculating each time we need to retrieve the time stamp counter */ // Allocating space for previous values of counters. prevPerfCounters = (uint64_t *) calloc(p->getProcessorCores() * p->getProcessorNodes(), sizeof(uint64_t)); prevTSCCounters = (uint64_t *) calloc(p->getProcessorCores() * p->getProcessorNodes(), sizeof(uint64_t)); // MSR Object to retrieve the time stamp counter for all the nodes and all the processors tscCounter = new MSRObject(); //Creates a new performance counter, for now we set slot 0, but we will //use the findAvailable slot method to find an available method to be used perfCounter = new PerformanceCounter(cpuMask, 0, p->getMaxSlots()); //Event 0x76 is Idle Counter perfCounter->setEventSelect(0x76); perfCounter->setCountOsMode(true); perfCounter->setCountUserMode(true); perfCounter->setCounterMask(0); perfCounter->setEdgeDetect(false); perfCounter->setEnableAPICInterrupt(false); perfCounter->setInvertCntMask(false); perfCounter->setUnitMask(0); perfCounter->setMaxSlots(p->getMaxSlots()); //Finds an available slot for our purpose perfCounterSlot = perfCounter->findAvailableSlot(); //findAvailableSlot() returns -2 in case of error if (perfCounterSlot == 0xfffffffe) throw "unable to access performance counter slots"; //findAvailableSlot() returns -1 in case there aren't available slots if (perfCounterSlot == 0xffffffff) throw "unable to find an available performance counter slot"; printf("Performance counter will use slot #%d\n", perfCounterSlot); //In case there are no errors, we program the object with the slot itself has found perfCounter->setSlot(perfCounterSlot); // Program the counter slot if (!perfCounter->program()) throw "unable to program performance counter parameters"; // Enable the counter slot if (!perfCounter->enable()) throw "unable to enable performance counters"; /* Here we take a snapshot of the performance counter and a snapshot of the time * stamp counter to initialize the arrays to let them not show erratic huge numbers * on first step */ if (!perfCounter->takeSnapshot()) { throw "unable to retrieve performance counter data"; return; } if (!tscCounter->readMSR(TIME_STAMP_COUNTER_REG, cpuMask)) { throw "unable to retrieve time stamp counter"; return; } cpuIndex = 0; for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++) { for (coreId = 0; coreId < p->getProcessorCores(); coreId++) { prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex); prevTSCCounters[cpuIndex] = tscCounter->getBits(cpuIndex, 0, 64); cpuIndex++; } } Signal::activateSignalHandler(SIGINT); printf("Values >100%% can be expected if the CPU is in a Boosted State\n"); while (!Signal::getSignalStatus()) { if (!perfCounter->takeSnapshot()) { throw "unable to retrieve performance counter data"; return; } if (!tscCounter->readMSR(TIME_STAMP_COUNTER_REG, cpuMask)) { throw "unable to retrieve time stamp counter"; return; } cpuIndex = 0; for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++) { printf("\nNode %d -", nodeId); for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++) { usage = ((perfCounter->getCounter(cpuIndex)) - prevPerfCounters[cpuIndex]) * 100; usage /= tscCounter->getBits(cpuIndex, 0, 64) - prevTSCCounters[cpuIndex]; printf(" c%d:%d%%", coreId, (unsigned int) usage); prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex); prevTSCCounters[cpuIndex] = tscCounter->getBits(cpuIndex, 0, 64); cpuIndex++; } } Sleep(1000); } perfCounter->disable(); printf ("CTRL-C executed. Cleaning on exit...\n"); } catch (char const *str) { if (perfCounter->getEnabled()) perfCounter->disable(); printf("K10PerformanceCounters.cpp::perfMonitorCPUUsage - %s\n", str); } free(perfCounter); free(tscCounter); free(prevPerfCounters); free(prevTSCCounters); return; }
//-------------------------------------------------------------------------------------- // Render a frame //-------------------------------------------------------------------------------------- void Render() { // Update our time static float t = 0.0f; float delta_t = 0.0f; { static DWORD dwTimeStart = 0; DWORD dwTimeCur = GetTickCount(); if( dwTimeStart == 0 ) dwTimeStart = dwTimeCur; float old_t = t; t = ( dwTimeCur - dwTimeStart ) / 1000.0f; delta_t = t-old_t; } { sphParticle particles[32]; for(size_t i=0; i<_countof(particles); ++i) { particles[i].position = ist::simdvec4_set(GenRand()*0.5f, GenRand()*0.5f, GenRand()*0.5f-7.5f, 1.0f); particles[i].velocity = _mm_set1_ps(0.0f); } g_sphgrid.addParticles(particles, _countof(particles)); } { static PerformanceCounter s_timer; static float s_prev = 0.0f; PerformanceCounter timer; g_sphgrid.update(1.0f); g_pImmediateContext->UpdateSubresource( g_pCubeInstanceBuffer, 0, NULL, &g_sphgrid.particles, 0, 0 ); if(s_timer.getElapsedMillisecond() - s_prev > 1000.0f) { char buf[128]; _snprintf(buf, _countof(buf), " SPH update: %d particles %.3fms\n", g_sphgrid.num_active_particles, timer.getElapsedMillisecond()); OutputDebugStringA(buf); ::SetWindowTextA(g_hWnd, buf); s_prev = s_timer.getElapsedMillisecond(); } } { CBChangesEveryFrame cb; XMVECTOR eye = g_camera.getEye(); { XMMATRIX rot = XMMatrixRotationZ(XMConvertToRadians(0.1f)); eye = XMVector4Transform(eye, rot); } g_camera.setEye(eye); g_camera.updateMatrix(); XMMATRIX vp = g_camera.getViewProjectionMatrix(); cb.ViewProjection = XMMatrixTranspose( vp ); cb.CameraPos = (FLOAT*)&eye; cb.LightPos = XMFLOAT4(10.0f, 10.0f, -10.0f, 1.0f); cb.LightColor = XMFLOAT4(0.9f, 0.9f, 0.9f, 1.0f); cb.MeshShininess = 200.0f; g_pImmediateContext->UpdateSubresource( g_pCBChangesEveryFrame, 0, NULL, &cb, 0, 0 ); } float ClearColor[4] = { 0.0f, 0.125f, 0.3f, 1.0f }; // red, green, blue, alpha g_pImmediateContext->ClearRenderTargetView( g_pRenderTargetView, ClearColor ); g_pImmediateContext->ClearDepthStencilView( g_pDepthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 ); { ID3D11Buffer *buffers[] = {g_pCubeVertexBuffer, g_pCubeInstanceBuffer}; UINT strides[] = {sizeof(SimpleVertex), sizeof(sphParticle), }; UINT offsets[] = {0, 0}; g_pImmediateContext->IASetVertexBuffers( 0, ARRAYSIZE(buffers), buffers, strides, offsets ); } g_pImmediateContext->IASetInputLayout( g_pCubeVertexLayout ); g_pImmediateContext->IASetIndexBuffer( g_pCubeIndexBuffer, DXGI_FORMAT_R16_UINT, 0 ); g_pImmediateContext->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); // Render the cube g_pImmediateContext->VSSetShader( g_pCubeVertexShader, NULL, 0 ); g_pImmediateContext->VSSetConstantBuffers( 0, 1, &g_pCBChangesEveryFrame ); g_pImmediateContext->PSSetShader( g_pCubePixelShader, NULL, 0 ); g_pImmediateContext->PSSetConstantBuffers( 0, 1, &g_pCBChangesEveryFrame ); g_pImmediateContext->DrawIndexedInstanced( 36, (UINT)g_sphgrid.num_active_particles, 0, 0, 0 ); // Present our back buffer to our front buffer g_pSwapChain->Present( 1, 0 ); // vsync on //g_pSwapChain->Present( 0, 0 ); // vsync off }
int ImplicitNewmarkSparse::DoTimestep() { int numIter = 0; double error0 = 0; // error after the first step double errorQuotient; // store current amplitudes and set initial guesses for qaccel, qvel for(int i=0; i<r; i++) { q_1[i] = q[i]; qvel_1[i] = qvel[i]; qaccel_1[i] = qaccel[i]; qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i]; qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i]; } do { int i; /* printf("q:\n"); for(int i=0; i<r; i++) printf("%G ", q[i]); printf("\n"); printf("Internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); */ PerformanceCounter counterForceAssemblyTime; forceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix); counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); //tangentStiffnessMatrix->Print(); //tangentStiffnessMatrix->Save("K"); // scale internal forces for(i=0; i<r; i++) internalForces[i] *= internalForceScalingFactor; *tangentStiffnessMatrix *= internalForceScalingFactor; memset(qresidual, 0, sizeof(double) * r); if (useStaticSolver) { // no operation } else { // build effective stiffness: add mass matrix and damping matrix to tangentStiffnessMatrix tangentStiffnessMatrix->ScalarMultiply(dampingStiffnessCoef, rayleighDampingMatrix); rayleighDampingMatrix->AddSubMatrix(dampingMassCoef, *massMatrix); rayleighDampingMatrix->ScalarMultiplyAdd(alpha4, tangentStiffnessMatrix); //*tangentStiffnessMatrix += alpha4 * *rayleighDampingMatrix; tangentStiffnessMatrix->AddSubMatrix(alpha4, *dampingMatrix, 1); tangentStiffnessMatrix->AddSubMatrix(alpha1, *massMatrix); // compute force residual, store it into aux variable qresidual // qresidual = M * qaccel + C * qvel - externalForces + internalForces massMatrix->MultiplyVector(qaccel, qresidual); rayleighDampingMatrix->MultiplyVectorAdd(qvel, qresidual); dampingMatrix->MultiplyVectorAdd(qvel, qresidual); } // add externalForces, internalForces for(i=0; i<r; i++) { qresidual[i] += internalForces[i] - externalForces[i]; qresidual[i] *= -1; qdelta[i] = qresidual[i]; } /* printf("internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); printf("external forces:\n"); for(int i=0; i<r; i++) printf("%G ", externalForces[i]); printf("\n"); printf("residual:\n"); for(int i=0; i<r; i++) printf("%G ", -qresidual[i]); printf("\n"); */ double error = 0; for(i=0; i<r; i++) error += qresidual[i] * qresidual[i]; // on the first iteration, compute initial error if (numIter == 0) { error0 = error; errorQuotient = 1.0; } else { // error divided by the initial error, before performing this iteration errorQuotient = error / error0; } if (errorQuotient < epsilon * epsilon) { break; } //tangentStiffnessMatrix->Save("Keff"); RemoveRows(r, bufferConstrained, qdelta, numConstrainedDOFs, constrainedDOFs); systemMatrix->AssignSuperMatrix(tangentStiffnessMatrix); // solve: systemMatrix * buffer = bufferConstrained PerformanceCounter counterSystemSolveTime; memset(buffer, 0, sizeof(double) * r); #ifdef SPOOLES SPOOLESSolver solver(systemMatrix); int info = solver.SolveLinearSystem(buffer, bufferConstrained); char solverString[16] = "SPOOLES"; #endif #ifdef PARDISO int info = pardisoSolver->ComputeCholeskyDecomposition(systemMatrix); if (info == 0) info = pardisoSolver->SolveLinearSystem(buffer, bufferConstrained); char solverString[16] = "PARDISO"; #endif #ifdef PCG int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(buffer, bufferConstrained, 1e-6, 10000); if (info > 0) info = 0; char solverString[16] = "PCG"; #endif if (info != 0) { printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info); return 1; } counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs); /* printf("qdelta:\n"); for(int i=0; i<r; i++) printf("%G ", qdelta[i]); printf("\n"); exit(1); */ // update state for(i=0; i<r; i++) { q[i] += qdelta[i]; qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i]; qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i]; } for(int i=0; i<numConstrainedDOFs; i++) q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0; numIter++; } while (numIter < maxIterations); /* printf("qvel:\n"); for(int i=0; i<r; i++) printf("%G ", qvel[i]); printf("\n"); printf("qaccel:\n"); for(int i=0; i<r; i++) printf("%G ", qaccel[i]); printf("\n"); */ //printf("Num iterations performed: %d\n",numIter); //if ((numIter >= maxIterations) && (maxIterations > 1)) //{ //printf("Warning: method did not converge in max number of iterations.\n"); //} return 0; }
int ImplicitNewmarkDense::DoTimestep() { int numIter = 0; double error0 = 0; // error after the first step double errorQuotient; // store current amplitudes and set initial guesses for qaccel, qvel // note: these guesses will later be overriden; they are only used to construct the right-hand-side vector (multiplication with M and C) for(int i=0; i<r; i++) { q_1[i] = q[i]; qvel_1[i] = qvel[i]; qaccel_1[i] = qaccel[i]; qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i]; qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i]; } do { int i; PerformanceCounter counterForceAssemblyTime; reducedForceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix); counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); // scale internal forces for(i=0; i<r; i++) internalForces[i] *= internalForceScalingFactor; /* printf("internalForceScalingFactor = %G\n", internalForceScalingFactor); printf("q:\n"); for(int i=0; i<r; i++) printf("%G ", q[i]); printf("\n"); printf("Internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); */ for(i=0; i<r2; i++) tangentStiffnessMatrix[i] *= internalForceScalingFactor; for(i=0; i<r2; i++) tangentStiffnessMatrix[i] += tangentStiffnessMatrixOffset[i]; /* printf("Tangent stiffness matrix:\n"); for(int i=0; i<r; i++) { for(int j=0; j<r; j++) printf("%.15f ", tangentStiffnessMatrix[r * j + i]); printf("\n"); } printf("Tangent stiffness matrix offset:\n"); for(int i=0; i<r; i++) { for(int j=0; j<r; j++) printf("%.15f ", tangentStiffnessMatrixOffset[r * j + i]); printf("\n"); } printf("----\n"); */ //WriteMatrixToDisk_("Kr", r, r, tangentStiffnessMatrix); //WriteMatrixToDisk_("Mr", r, r, massMatrix); //exit(1); memset(qresidual, 0, sizeof(double) * r); if (useStaticSolver) { // no operation } else { // build effective stiffness: add mass matrix and damping matrix to tangentStiffnessMatrix for(i=0; i<r2; i++) { dampingMatrix[i] = dampingMassCoef * massMatrix[i] + dampingStiffnessCoef * tangentStiffnessMatrix[i]; tangentStiffnessMatrix[i] += alpha4 * dampingMatrix[i]; //tangentStiffnessMatrix[i] += alpha3 * massMatrix[i] + gamma * alpha1 * dampingMatrix[i]; // static Rayleigh damping // add mass matrix to the effective stiffness matrix tangentStiffnessMatrix[i] += alpha1 * massMatrix[i]; } // compute force residual, store it into aux variable qresidual // qresidual = M * qaccel + C * qvel - externalForces + internalForces // M * qaccel cblas_dgemv(CblasColMajor,CblasNoTrans, r,r,1.0,massMatrix,r,qaccel,1,0.0,qresidual,1); // += C * qvel cblas_dgemv(CblasColMajor,CblasNoTrans, r,r,1.0,dampingMatrix,r,qvel,1,1.0,qresidual,1); } // add externalForces, internalForces for(i=0; i<r; i++) { qresidual[i] += internalForces[i] - externalForces[i]; qresidual[i] *= -1; qdelta[i] = qresidual[i]; } /* printf("internalForceScalingFactor = %G\n", internalForceScalingFactor); printf("internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); printf("external forces:\n"); for(int i=0; i<r; i++) printf("%G ", externalForces[i]); printf("\n"); printf("mass matrix:\n"); for(int i=0; i<r*r; i++) printf("%G ", massMatrix[i]); printf("\n"); printf("damping matrix:\n"); for(int i=0; i<r*r; i++) printf("%G ", dampingMatrix[i]); printf("\n"); printf("effective stiffness matrix:\n"); for(int i=0; i<r*r; i++) printf("%G ", tangentStiffnessMatrix[i]); printf("\n"); printf("matrix rhs:\n"); for(int i=0; i<r; i++) printf("%G ", qdelta[i]); printf("\n"); */ double error = 0; for(i=0; i<r; i++) error += qresidual[i] * qresidual[i]; // on the first iteration, compute initial error if (numIter == 0) { error0 = error; errorQuotient = 1.0; } else { // rel error wrt to initial error before performing this iteration errorQuotient = error / error0; } if ((errorQuotient < epsilon * epsilon) || (error == 0)) { break; } // solve (effective stiffness) * qdelta = qresidual PerformanceCounter counterSystemSolveTime; //counterSystemSolveTime.StartCounter(); // it starts automatically in constructor switch (solver) { case generalMatrixSolver: { INTEGER N = r; INTEGER NRHS = 1; double * A = tangentStiffnessMatrix; INTEGER LDA = r; double * B = qdelta; INTEGER LDB = r; INTEGER INFO; #ifdef __APPLE__ #define DGESV dgesv_ #else #define DGESV dgesv #endif DGESV ( &N, &NRHS, A, &LDA, IPIV->GetBuf(), B, &LDB, &INFO ); if (INFO != 0) { printf("Error: Gaussian elimination solver returned non-zero exit status %d.\n",(int)INFO); return 1; } } break; case symmetricMatrixSolver: { // call dsysv ( uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info) #ifdef __APPLE__ #define DSYSV dsysv_ #else #define DSYSV dsysv #endif char uplo = 'U'; INTEGER nrhs = 1; INTEGER info; INTEGER R = r; INTEGER symmetricSolver_lworkI = symmetricSolver_lwork; DSYSV ( &uplo, &R, &nrhs, tangentStiffnessMatrix, &R, IPIV->GetBuf(), qdelta, &R, symmetricSolver_work, &symmetricSolver_lworkI, &info); if (info != 0) { printf("Error: Symmetric indefinite solver returned non-zero exit status %d.\n",(int)info); return 1; } } break; case positiveDefiniteMatrixSolver: { // call dposv ( uplo, n, nrhs, a, lda, b, ldb, info) #ifdef __APPLE__ #define DPOSV dposv_ #else #define DPOSV dposv #endif char uplo = 'U'; INTEGER nrhs = 1; INTEGER info = 0; INTEGER R = r; DPOSV ( &uplo, &R, &nrhs, tangentStiffnessMatrix, &R, qdelta, &R, &info); if (info != 0) { printf("Error: Positive-definite Cholesky solver returned non-zero exit status %d.\n",(int)info); return 1; } } break; default: printf("Error: reduced integration solver not specified.\n"); return 1; break; } counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); /* printf("qdelta:\n"); for(int i=0; i<r; i++) printf("%G ", qdelta[i]); printf("\n"); */ // update state for(i=0; i<r; i++) { q[i] += qdelta[i]; qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i]; qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i]; } numIter++; } while (numIter < maxIterations); /* printf("Num iterations performed: %d (maxIterations=%d)\n", numIter, maxIterations); if ((numIter >= maxIterations) && (maxIterations > 1)) { printf("Warning: method did not converge in max number of iterations.\n"); } */ return 0; }
int main(int argc, char **argv) { // Initialize form, sliders and buttons form = make_window(); performanceCounter.StartCounter(); // init saveFileTimeCounter.StartCounter(); // init groundPlane_button->value(groundPlane); fog_button->value(useFog); worldAxes_button->value(renderWorldAxes); frame_slider->value(1); if (saveScreenToFile == SAVE_CONTINUOUS) record_button->value(1); // ON else record_button->value(0); // OFF // just do some timing, no special purpose // because the first data is always not trustable according to experience performanceCounter.StopCounter(); performanceCounter.GetElapsedTime(); saveFileTimeCounter.StopCounter(); saveFileTimeCounter.GetElapsedTime(); performanceCounter.StartCounter(); // show form, and do initial draw of model form->show(); glwindow->show(); // glwindow is initialized when the form is built performanceCounter.StopCounter(); if (argc > 2) { char *filename; filename = argv[1]; if(filename != NULL) { //Read skeleton from asf file pSkeleton = new Skeleton(filename, MOCAP_SCALE); //Set the rotations for all bones in their local coordinate system to 0 //Set root position to (0, 0, 0) pSkeleton->setBasePosture(); displayer.LoadSkeleton(pSkeleton); lastSkeleton++; } if (displayer.GetNumSkeletons()) { filename = argv[2]; if(filename != NULL) { //Read motion (.amc) file and create a motion pMotion = new Motion(filename, MOCAP_SCALE,pSkeleton); //set sampled motion for display displayer.LoadMotion(pMotion); lastMotion++; //Tell skeleton to perform the first pose ( first posture ) pSkeleton->setPosture(*(displayer.GetSkeletonMotion(0)->GetPosture(0))); // Set skeleton to perform the first pose ( first posture ) int currentFrames = displayer.GetSkeletonMotion(0)->GetNumFrames(); if (currentFrames > maxFrames) { maxFrames = currentFrames; frame_slider->maximum((double)maxFrames); } frame_slider->maximum((double)maxFrames); currentFrameIndex=0; } // if(filename != NULL) } else printf("Load a skeleton first.\n"); framesIncrementDoublePrecision = 1.0; // Current frame and frame increment playButton = ON; repeatButton = OFF; groundPlane = ON; glwindow->redraw(); } // if (argc > 2) Fl::add_idle(idle); return Fl::run(); }
void idle(void*) { if (previousPlayButtonStatus == ON) { // it means we should measure the interval between two frames // if it is too tiny, we should slow down the motion performanceCounter.StopCounter(); double actualTimeCostOneFrame = performanceCounter.GetElapsedTime(); // in seconds // time spent on saving the screen in previous time-step should be excluded if (saveFileTimeCost > 0.0) actualTimeCostOneFrame -= saveFileTimeCost; framesIncrementDoublePrecision = actualTimeCostOneFrame * expectedFPS; } // start counter at the beginning of the new round if (playButton == ON) performanceCounter.StartCounter(); if(rewindButton == ON) { currentFrameIndex = 0; currentFrameIndexDoublePrecision = 0.0; for (int i = 0; i < displayer.GetNumSkeletons(); i++) { if (displayer.GetSkeletonMotion(i) != NULL) { Posture * initSkeleton = displayer.GetSkeletonMotion(i)->GetPosture(0); displayer.GetSkeleton(i)->setPosture(*initSkeleton); } } rewindButton = OFF; } // Initialization saveFileTimeCost = -1.0; if(playButton == ON) { if (saveScreenToFile == SAVE_CONTINUOUS) { saveFileTimeCounter.StartCounter(); CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename); saveScreenshot(640, 480, saveScreenToFileContinuousFilename); printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename); saveScreenToFileContinuousCount++; saveFileTimeCounter.StopCounter(); saveFileTimeCost = saveFileTimeCounter.GetElapsedTime(); } if (saveScreenToFile == SAVE_CONTINUOUS) { currentFrameIndexDoublePrecision += 1.0; } else { currentFrameIndexDoublePrecision += framesIncrementDoublePrecision; } currentFrameIndex = (int)currentFrameIndexDoublePrecision; if(currentFrameIndex >= maxFrames) { if (repeatButton == ON) { currentFrameIndex = 0; currentFrameIndexDoublePrecision = 0.0; } else // repeat button is OFF { currentFrameIndex = maxFrames - 1; currentFrameIndexDoublePrecision = currentFrameIndex; playButton = OFF; // important, especially in "recording" mode } } if (currentFrameIndex < 0) { currentFrameIndex = 0; currentFrameIndexDoublePrecision = 0.0; } SetSkeletonsToSpecifiedFrame(currentFrameIndex); frame_slider->value((double) currentFrameIndex + 1); } // if(playButton == ON) if (minusOneButton == ON) if (displayer.GetNumSkeletons() != 0) { currentFrameIndex--; if (currentFrameIndex < 0) currentFrameIndex = 0; frame_slider->value((double) currentFrameIndex + 1); SetSkeletonsToSpecifiedFrame(currentFrameIndex); if (saveScreenToFile == SAVE_CONTINUOUS) { CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename); saveScreenshot(640, 480, saveScreenToFileContinuousFilename); printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename); saveScreenToFileContinuousCount++; } minusOneButton = OFF; } if (plusOneButton == ON) { if (displayer.GetNumSkeletons() != 0) { currentFrameIndex++; if (currentFrameIndex >= maxFrames) currentFrameIndex = maxFrames - 1; frame_slider->value((double) currentFrameIndex + 1); SetSkeletonsToSpecifiedFrame(currentFrameIndex); if (saveScreenToFile == SAVE_CONTINUOUS) { CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename); saveScreenshot(640, 480, saveScreenToFileContinuousFilename); printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename); saveScreenToFileContinuousCount++; } plusOneButton = OFF; } } frame_slider->value((double)(currentFrameIndex + 1)); previousPlayButtonStatus = playButton; // Super important updating glwindow->redraw(); }
int VolumeConservingIntegrator::DoTimestep() { int numIter = 0; //Error after the first step double error0 = 0; double errorQuotient; // store current amplitudes and set initial guesses for qaccel, qvel for (int i = 0; i < r; i++) { qaccel_1[i] = qaccel[i] = 0; q_1[i] = q[i]; qvel_1[i] = qvel[i]; } do { int i; /* printf("q:\n"); for(int i=0; i<r; i++) printf("%G ", q[i]); printf("\n"); printf("Internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); */ PerformanceCounter counterForceAssemblyTime; forceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix); counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); //tangentStiffnessMatrix->Print(); //tangentStiffnessMatrix->Save("K"); //Scale internal forces for (i = 0; i < r; i++) internalForces[i] *= internalForceScalingFactor; *tangentStiffnessMatrix *= internalForceScalingFactor; memset(qresidual, 0, sizeof(double) * r); if (useStaticSolver) { // fint + K * qdelta = fext // add externalForces, internalForces for (i = 0; i < r; i++) { qresidual[i] = externalForces[i] - internalForces[i]; qdelta[i] = qresidual[i]; } } else { tangentStiffnessMatrix->ScalarMultiply(dampingStiffnessCoef, rayleighDampingMatrix); rayleighDampingMatrix->AddSubMatrix(dampingMassCoef, *massMatrix); // build effective stiffness: // Keff = M + h D + h^2 * K // compute force residual, store it into aux variable qresidual // qresidual = h * (-D qdot - fint + fext - h * K * qdot)) //add mass matrix and damping matrix to tangentStiffnessMatrix *tangentStiffnessMatrix *= timestep; *tangentStiffnessMatrix += *rayleighDampingMatrix; tangentStiffnessMatrix->AddSubMatrix(1.0, *dampingMatrix, 1); // at this point, tangentStiffnessMatrix = h * K + D tangentStiffnessMatrix->MultiplyVector(qvel, qresidual); *tangentStiffnessMatrix *= timestep; tangentStiffnessMatrix->AddSubMatrix(1.0, *massMatrix); // add externalForces, internalForces for (i = 0; i < r; i++) { qresidual[i] += internalForces[i] - externalForces[i]; qresidual[i] *= -timestep; qdelta[i] = qresidual[i]; } } /* printf("internal forces:\n"); for(int i=0; i<r; i++) printf("%G ", internalForces[i]); printf("\n"); printf("external forces:\n"); for(int i=0; i<r; i++) printf("%G ", externalForces[i]); printf("\n"); printf("residual:\n"); for(int i=0; i<r; i++) printf("%G ", -qresidual[i]); printf("\n"); */ double error = 0; for (i = 0; i < r; i++) error += qresidual[i] * qresidual[i]; // on the first iteration, compute initial error if (numIter == 0) { error0 = error; errorQuotient = 1.0; } else { // rel error wrt to initial error before performing this iteration errorQuotient = error / error0; } if (errorQuotient < epsilon * epsilon) break; //tangentStiffnessMatrix->Save("Keff"); RemoveRows(r, bufferConstrained, qdelta, numConstrainedDOFs, constrainedDOFs); systemMatrix->AssignSuperMatrix(tangentStiffnessMatrix); // solve: systemMatrix * qdelta = qresidual PerformanceCounter counterSystemSolveTime; memset(buffer, 0, sizeof(double) * r); #ifdef SPOOLES int info; if (numSolverThreads > 1) { SPOOLESSolverMT * solver = new SPOOLESSolverMT(systemMatrix, numSolverThreads); info = solver->SolveLinearSystem(buffer, bufferConstrained); delete(solver); } else { SPOOLESSolver * solver = new SPOOLESSolver(systemMatrix); info = solver->SolveLinearSystem(buffer, bufferConstrained); delete(solver); } char solverString[16] = "SPOOLES"; #endif #ifdef PARDISO int info = pardisoSolver->ComputeCholeskyDecomposition(systemMatrix); if (info == 0) info = pardisoSolver->SolveLinearSystem(buffer, bufferConstrained); char solverString[16] = "PARDISO"; #endif //Profile finds this function as a hotspot #ifdef PCG int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner( buffer, bufferConstrained, 1e-6, 10000); if (info > 0) info = 0; char solverString[16] = "PCG"; #endif if (info != 0) { printf( "Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int) info); exit(-1); return 1; } counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs); /* printf("qdelta:\n"); for(int i=0; i<r; i++) printf("%G ", qdelta[i]); printf("\n"); exit(1); */ // update state if (useStaticSolver) { for (i = 0; i < r; i++) { q[i] += qdelta[i]; qvel[i] = (q[i] - q_1[i]) / timestep; } } else { for (i = 0; i < r; i++) { qvel[i] += qdelta[i]; q[i] += timestep * qvel[i]; } } for (int i = 0; i < numConstrainedDOFs; i++) q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0; numIter++; } while (numIter < maxIterations); /* printf("q:\n"); for(int i=0; i<r; i++) printf("%G ", q[i]); printf("\n"); printf("qvel:\n"); for(int i=0; i<r; i++) printf("%G ", qvel[i]); printf("\n"); */ //printf("Num iterations performed: %d\n",numIter); //if ((numIter >= maxIterations) && (maxIterations > 1)) //{ //printf("Warning: method did not converge in max number of iterations.\n"); //} return 0; }
// the "idle" routine; called periodically by GLUT void idleFunction(void) { cpuLoadCounter.StartCounter(); glutSetWindow(windowID); if (!lockScene) { // determine force in case user is pulling on a vertex if (g_iLeftMouseButton) { if (pulledVertex != -1) { double forceX = (g_vMousePos[0] - dragStartX); double forceY = -(g_vMousePos[1] - dragStartY); double externalForce[3]; camera->CameraVector2WorldVector_OrientationOnly3D( forceX, forceY, 0, externalForce); renderingModalMatrix->ProjectSingleVertex(pulledVertex, externalForce[0], externalForce[1], externalForce[2], fq); for(int i=0; i<r; i++) fq[i] = fqBase[i] + deformableObjectCompliance * fq[i]; } } else { memcpy(fq,fqBase,sizeof(double) * r); } // set the reduced external forces implicitNewmarkDense->SetExternalForces(fq); // integrate the dynamics via implicit Newmark for(int i=0; i<substepsPerTimeStep; i++) { int code = implicitNewmarkDense->DoTimestep(); if (code != 0) { printf("The integrator went unstable. Reduce the timestep, or increase the number of substeps per timestep.\n"); implicitNewmarkDense->ResetToRest(); for(int i=0; i<r; i++) { fqBase[i] = 0; fq[i] = 0; } implicitNewmarkDense->SetExternalForces(fq); explosionFlag = 1; explosionCounter.StartCounter(); break; } /* printf("q =\n"); double * q = implicitNewmarkDense->Getq(); for(int i=0; i<r; i++) printf("%G ", q[i]); printf("\n"); */ } memcpy(q, implicitNewmarkDense->Getq(), sizeof(double) * r); } if (explosionFlag) { explosionCounter.StopCounter(); if (explosionCounter.GetElapsedTime() > 4.0) // the message will appear on screen for 4 seconds explosionFlag = 0; } // compute u=Uq deformableObjectRenderingMeshReduced->Setq(q); deformableObjectRenderingMeshReduced->Compute_uUq(); graphicFrame++; // update title bar information at 4 Hz titleBarCounter.StopCounter(); double elapsedTime = titleBarCounter.GetElapsedTime(); if (elapsedTime >= 1.0 / 4) { titleBarCounter.StartCounter(); fps = graphicFrame / elapsedTime; // update menu bar char windowTitle[4096]; sprintf(windowTitle,"%s | Num modes = %d | %.1f Hz | Deformation CPU Load: %d%%", windowTitleBase, implicitNewmarkDense->GetNumDOFs() , fps, (int)(100 * cpuLoad + 0.5) ); glutSetWindowTitle(windowTitle); graphicFrame = 0; if (syncTimeStepWithGraphics) { timeStep = 1.0 / fps; implicitNewmarkDense->SetTimestep(timeStep / substepsPerTimeStep); Sync_GLUI(); } } cpuLoadCounter.StopCounter(); double cpuTimePerGraphicsFrame = cpuLoadCounter.GetElapsedTime(); cpuLoad = cpuTimePerGraphicsFrame * fps; glutPostRedisplay(); }
int EulerSparse::DoTimestep() { // v_{n+1} = v_n + h * (F_n / m) // x_{n+1} = x_n + h * v_{n+1} // store current state for(int i=0; i<r; i++) { q_1[i] = q[i]; qvel_1[i] = qvel[i]; qaccel_1[i] = qaccel[i]; } PerformanceCounter counterForceAssemblyTime; forceModel->GetInternalForce(q, internalForces); counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); // scale internal forces for(int i=0; i<r; i++) internalForces[i] *= internalForceScalingFactor; // damping double * dampingForces = buffer; massMatrix->MultiplyVector(qvel, dampingForces); for(int i=0; i<r; i++) dampingForces[i] *= dampingMassCoef; dampingMatrix->MultiplyVectorAdd(qvel, dampingForces); //printf("C=\n"); //dampingMatrix->Print(); //dampingMatrix->Save("C"); for(int i=0; i<r; i++) { // set qresidual = F_n, for a subsequent solve M * qdelta = h * F_n qresidual[i] = externalForces[i] - internalForces[i] - dampingForces[i]; } PerformanceCounter counterSystemSolveTime; // solve: M * qdelta = qresidual memset(qdelta, 0.0, sizeof(double)*r); #ifdef PARDISO int info = pardisoSolver->SolveLinearSystem(qdelta, qresidual); char solverString[16] = "PARDISO"; #endif #ifdef SPOOLES int info = spoolesSolver->SolveLinearSystem(qdelta, qresidual); char solverString[16] = "SPOOLES"; #endif #ifdef PCG int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(qdelta, qresidual, 1e-6, 10000); if (info > 0) info = 0; char solverString[16] = "PCG"; #endif if (info != 0) { printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info); return 1; } counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); // update state if (symplectic) { for(int i=0; i<r; i++) { qvel[i] += timestep * qdelta[i]; q[i] += timestep * qvel[i]; } } else { for(int i=0; i<r; i++) { q[i] += timestep * qvel[i]; qvel[i] += timestep * qdelta[i]; } } for(int i=0; i<r; i++) qaccel[i] = qdelta[i]; // constrain fixed DOFs for(int i=0; i<numConstrainedDOFs; i++) q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0; return 0; }
int CentralDifferencesSparse::DoTimestep() { PerformanceCounter counterForceAssemblyTime; forceModel->GetInternalForce(q, internalForces); for (int i=0; i<r; i++) internalForces[i] *= internalForceScalingFactor; counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); if (tangentialDampingMode > 0) if (timestepIndex % tangentialDampingMode == 0) DecomposeSystemMatrix(); // this routines also updates the damping and system matrices // update equation is (see WRIGGERS P.: Computational Contact Mechanics. John Wiley & Sons, Ltd., 2002., page 275) : // // (M + dt / 2 * C) * q(t+1) = (dt)^2 * (fext(t) - fint(q(t))) + dt / 2 * C * q(t-1) + M * (2q(t) - q(t-1)) // // (M + dt / 2 * C) * (q(t+1) - q(t)) = (dt)^2 * (fext(t) - fint(q(t))) + dt / 2 * C * (q(t-1) - q(t)) + M * (q(t) - q(t-1)) // fext are the external forces // fint is the vector of internal forces // compute rhs = (dt)^2 * (fext - fint(q(t))) + dt / 2 * C * (q(t-1) - q(t)) + M * (q(t) - q(t-1)) // first, compute rhs = M * (q - q_1) for (int i=0; i<r; i++) buffer[i] = q[i] - q_1[i]; massMatrix->MultiplyVector(buffer, rhs); // rhs += dt / 2 * dampingMatrix * (q_{n-1} - q_n) for (int i=0; i<r; i++) qdelta[i] = q_1[i] - q[i]; rayleighDampingMatrix->MultiplyVector(qdelta, buffer); for (int i=0; i<r; i++) rhs[i] += 0.5 * timestep * buffer[i]; // rhs += dt * dt * (fext - fint(q(t))) double timestep2 = timestep * timestep; for (int i=0; i<r; i++) rhs[i] += timestep2 * (externalForces[i] - internalForces[i]); // now rhs contains the correct value RemoveRows(r, rhsConstrained, rhs, numConstrainedDOFs, constrainedDOFs); PerformanceCounter counterSystemSolveTime; memset(buffer, 0, sizeof(double) * r); #ifdef SPOOLES int info = spoolesSolver->SolveLinearSystem(buffer, rhsConstrained); char solverString[16] = "SPOOLES"; #endif #ifdef PARDISO int info = pardisoSolver->SolveLinearSystem(buffer, rhsConstrained); char solverString[16] = "PARDISO"; #endif #ifdef PCG int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(buffer, rhsConstrained, 1e-6, 10000); if (info > 0) info = 0; char solverString[16] = "PCG"; #endif InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs); counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); if (info != 0) { printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info); return 1; } // the new value of q is now in buffer // update velocity, and previous and current positions for (int i=0; i<r; i++) { q_1[i] = q[i]; qvel[i] = qdelta[i] / timestep; qaccel[i] = (qvel[i] - qvel_1[i]) / timestep; qvel_1[i] = qvel[i]; qaccel_1[i] = qaccel[i]; q[i] += qdelta[i]; } timestepIndex++; return 0; }
int CentralDifferencesDense::DoTimestep() { if (r == 0) return 0; // the reduced force interpolation PerformanceCounter counterForceAssemblyTime; reducedForceModel->GetInternalForce(q,internalForces); counterForceAssemblyTime.StopCounter(); forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime(); if (plasticfq != NULL) { SetTotalForces(internalForces); for(int i=0; i<r; i++) internalForces[i] -= plasticfq[i]; } PerformanceCounter counterSystemSolveTime; for (int i=0; i<r; i++) internalForces[i] *= internalForceScalingFactor; if (tangentialDampingMode) UpdateLU(); // update equation is: // // (massMatrix + dt / 2 * dampingMatrix) * q(t+1) = (dt)^2 * (fr - Rr(q(t))) + dt/2 * dampingMatrix * q(t-1) + massMatrix * (2q(t) - q(t-1)) // LU decomposition of massMatrix + dt / 2 * Dr is available in L,U // fr = U^T * f are the reduced external forces // Rr is the vector of reduced internal forces // update equation follows from Newton's law // Mu'' = -Cu' - F_int + F_ext // Mu'' + Cu' + R(u) = F_ext // R(u) = F_int // here, F_int is the external loading force necessary to sustain a certain deformation // it is opposite to the internal forces acting on the body in a given deformation state // compute rhs = (dt)^2 * (fr - Rr(q(t))) + dt/2 * dampingMatrix * q(t-1) + massMatrix * (2q(t) - q(t-1)) // first, compute rhs = massMatrix * (2*q - q_1) for (int i=0; i<r; i++) { rhs[i] = 0; for (int j=0; j<r; j++) rhs[i] += massMatrix[ELT(r,i,j)] * (2 * q[j] - q_1[j]); } // rhs += dt / 2 * dampingMatrix * q_{n-1} for (int i=0; i<r; i++) for (int j=0; j<r; j++) rhs[i] += timestep / 2 * dampingMatrix[ELT(r,i,j)] * q_1[j]; // rhs += dt * dt * (fr - Rr(q(t))) for (int i=0; i<r; i++) rhs[i] += timestep * timestep * (externalForces[i] - internalForces[i]); // now rhs contains the correct values // solve (M~ + dt/2 D~) * qnew = rhs // use data from the previously computed LU decomposition char trans='N'; INTEGER nrhs = 1; INTEGER INFO; INTEGER R = r; DGETRS (&trans,&R,&nrhs,LUFactor,&R,IPIV->GetBuf(),rhs,&R,&INFO); if (INFO != 0) { printf("Error: DGETRS returned a non-zero exit code %d.\n", (int)INFO); return INFO; } // the solution qnew is now in rhs // update velocity // and update previous and current positions for (int i=0; i<r; i++) { qvel[i] = (rhs[i] - q[i]) / timestep; q_1[i] = q[i]; q[i] = rhs[i]; } ProcessPlasticDeformations(); counterSystemSolveTime.StopCounter(); systemSolveTime = counterSystemSolveTime.GetElapsedTime(); return 0; }
NS_IMETHODIMP WorkerThread::Dispatch(already_AddRefed<nsIRunnable> aRunnable, uint32_t aFlags) { // May be called on any thread! nsCOMPtr<nsIRunnable> runnable(aRunnable); // in case we exit early // Workers only support asynchronous dispatch. if (NS_WARN_IF(aFlags != NS_DISPATCH_NORMAL)) { return NS_ERROR_UNEXPECTED; } const bool onWorkerThread = PR_GetCurrentThread() == mThread; if (GetSchedulerLoggingEnabled() && onWorkerThread && mWorkerPrivate) { PerformanceCounter* performanceCounter = mWorkerPrivate->GetPerformanceCounter(); if (performanceCounter) { performanceCounter->IncrementDispatchCounter(DispatchCategory::Worker); } } #ifdef DEBUG if (runnable && !onWorkerThread) { nsCOMPtr<nsICancelableRunnable> cancelable = do_QueryInterface(runnable); { MutexAutoLock lock(mLock); // Only enforce cancelable runnables after we've started the worker loop. if (!mAcceptingNonWorkerRunnables) { MOZ_ASSERT(cancelable, "Only nsICancelableRunnable may be dispatched to a worker!"); } } } #endif WorkerPrivate* workerPrivate = nullptr; if (onWorkerThread) { // No need to lock here because it is only modified on this thread. MOZ_ASSERT(mWorkerPrivate); mWorkerPrivate->AssertIsOnWorkerThread(); workerPrivate = mWorkerPrivate; } else { MutexAutoLock lock(mLock); MOZ_ASSERT(mOtherThreadsDispatchingViaEventTarget < UINT32_MAX); if (mWorkerPrivate) { workerPrivate = mWorkerPrivate; // Incrementing this counter will make the worker thread sleep if it // somehow tries to unset mWorkerPrivate while we're using it. mOtherThreadsDispatchingViaEventTarget++; } } nsresult rv; if (runnable && onWorkerThread) { RefPtr<WorkerRunnable> workerRunnable = workerPrivate->MaybeWrapAsWorkerRunnable(runnable.forget()); rv = nsThread::Dispatch(workerRunnable.forget(), NS_DISPATCH_NORMAL); } else { rv = nsThread::Dispatch(runnable.forget(), NS_DISPATCH_NORMAL); } if (!onWorkerThread && workerPrivate) { // We need to wake the worker thread if we're not already on the right // thread and the dispatch succeeded. if (NS_SUCCEEDED(rv)) { MutexAutoLock workerLock(workerPrivate->mMutex); workerPrivate->mCondVar.Notify(); } // Now unset our waiting flag. { MutexAutoLock lock(mLock); MOZ_ASSERT(mOtherThreadsDispatchingViaEventTarget); if (!--mOtherThreadsDispatchingViaEventTarget) { mWorkerPrivateCondVar.Notify(); } } } if (NS_WARN_IF(NS_FAILED(rv))) { return rv; } return NS_OK; }
void * MyFrame::LinearModesWorker( int numDesiredModes, int * r, double ** frequencies_, double ** modes_ ) { *r = -1; // create mass matrix SparseMatrix * massMatrix; GenerateMassMatrix::computeMassMatrix(precomputationState.simulationMesh, &massMatrix, true); // create stiffness matrix StVKElementABCD * precomputedIntegrals = StVKElementABCDLoader::load(precomputationState.simulationMesh); StVKInternalForces * internalForces = new StVKInternalForces(precomputationState.simulationMesh, precomputedIntegrals); SparseMatrix * stiffnessMatrix; StVKStiffnessMatrix * stiffnessMatrixClass = new StVKStiffnessMatrix(internalForces); stiffnessMatrixClass->GetStiffnessMatrixTopology(&stiffnessMatrix); double * zero = (double*) calloc(3 * precomputationState.simulationMesh->getNumVertices(), sizeof(double)); stiffnessMatrixClass->ComputeStiffnessMatrix(zero, stiffnessMatrix); free(zero); delete(precomputedIntegrals); delete(stiffnessMatrixClass); delete(internalForces); // constrain the degrees of freedom int numConstrainedVertices = (int) (precomputationState.fixedVertices.size()); int * constrainedDOFs = (int*) malloc (sizeof(int) * 3 * numConstrainedVertices); set<int> :: iterator iter; int i = 0; for(iter = precomputationState.fixedVertices.begin(); iter != precomputationState.fixedVertices.end(); iter++) { constrainedDOFs[3*i+0] = 3 * (*iter) + 1; constrainedDOFs[3*i+1] = 3 * (*iter) + 2; constrainedDOFs[3*i+2] = 3 * (*iter) + 3; i++; } int oneIndexed = 1; massMatrix->RemoveRowsColumns( 3 * numConstrainedVertices, constrainedDOFs, oneIndexed); stiffnessMatrix->RemoveRowsColumns( 3 * numConstrainedVertices, constrainedDOFs, oneIndexed); // call ARPACK double * frequenciesTemp = (double*) malloc (sizeof(double) * numDesiredModes); int numRetainedDOFs = stiffnessMatrix->Getn(); double * modesTemp = (double*) malloc (sizeof(double) * numDesiredModes * numRetainedDOFs); printf("Computing linear modes using ARPACK: ...\n"); PerformanceCounter ARPACKCounter; double sigma = -1.0; int numLinearSolverThreads = wxThread::GetCPUCount(); if (numLinearSolverThreads > 3) numLinearSolverThreads = 3; // diminished returns in solver beyond 3 threads //massMatrix->Save("MFactory"); //stiffnessMatrix->Save("KFactory"); ARPACKSolver generalizedEigenvalueProblem; int nconv = generalizedEigenvalueProblem.SolveGenEigShInv (stiffnessMatrix, massMatrix, numDesiredModes, frequenciesTemp, modesTemp, sigma, numLinearSolverThreads); ARPACKCounter.StopCounter(); double ARPACKTime = ARPACKCounter.GetElapsedTime(); printf("ARPACK time: %G s.\n", ARPACKTime); fflush(NULL); if (nconv < numDesiredModes) { free(modesTemp); free(frequenciesTemp); *r = -3; free(constrainedDOFs); delete(massMatrix); delete(stiffnessMatrix); return NULL; } int n3 = 3 * precomputationState.simulationMesh->getNumVertices(); *frequencies_ = (double*) calloc (numDesiredModes, sizeof(double)); *modes_ = (double*) calloc (numDesiredModes * n3, sizeof(double)); for(int i=0; i<numDesiredModes; i++) { // insert zero rows into the computed modes int oneIndexed = 1; InsertRows(n3, &modesTemp[numRetainedDOFs*i], &((*modes_)[n3*i]), 3 * numConstrainedVertices, constrainedDOFs, oneIndexed); } for(int i=0; i<numDesiredModes; i++) { if (frequenciesTemp[i] <= 0) (*frequencies_)[i] = 0.0; else (*frequencies_)[i] = sqrt((frequenciesTemp)[i]) / (2 * M_PI); } free(modesTemp); free(frequenciesTemp); free(constrainedDOFs); delete(massMatrix); delete(stiffnessMatrix); *r = numDesiredModes; return NULL; }