void WorkerThread::IncrementDispatchCounter() {
  if (!mozilla::StaticPrefs::dom_performance_enable_scheduler_timing()) {
    return;
  }
  MutexAutoLock lock(mLock);
  if (mWorkerPrivate) {
    PerformanceCounter* performanceCounter =
        mWorkerPrivate->GetPerformanceCounter();
    if (performanceCounter) {
      performanceCounter->IncrementDispatchCounter(DispatchCategory::Worker);
    }
  }
}
void
TimeoutManager::RecordExecution(Timeout* aRunningTimeout,
                                Timeout* aTimeout)
{
  if (!StaticPrefs::dom_performance_enable_scheduler_timing() &&
      mWindow.IsChromeWindow()) {
    return;
  }

  TimeoutBudgetManager& budgetManager = TimeoutBudgetManager::Get();
  TimeStamp now = TimeStamp::Now();

  if (aRunningTimeout) {
    // If we're running a timeout callback, record any execution until
    // now.
    TimeDuration duration = budgetManager.RecordExecution(
      now, aRunningTimeout, mWindow.IsBackgroundInternal());
    budgetManager.MaybeCollectTelemetry(now);

    UpdateBudget(now, duration);

    // This is an ad-hoc way to use the counters for the timers
    // that should be removed at somepoint. See Bug 1482834
    PerformanceCounter* counter = GetPerformanceCounter();
    if (counter) {
      counter->IncrementExecutionDuration(duration.ToMicroseconds());
    }
  }

  if (aTimeout) {
    // If we're starting a new timeout callback, start recording.
    budgetManager.StartRecording(now);
    PerformanceCounter* counter = GetPerformanceCounter();
    if (counter) {
      counter->IncrementDispatchCounter(DispatchCategory(TaskCategory::Timer));
    }
  } else {
    // Else stop by clearing the start timestamp.
    budgetManager.StopRecording();
  }
}
void ScanPerformanceThread::run()
{
    m_bStopThread = false;

    bool bRet = false;
    PerformanceCounter perfCounter;
    MemoryPerformance memoryPerf = {0};
    ProcessorPerformance processorPerf = {0};
    DiskPerformance diskPerf = {0};

    PerformanceHouse perfHouse;

    int refreshCount = 0;
    while (!m_bStopThread)
    {
        refreshCount++;

        perfCounter.GetMemoryPerformance(memoryPerf);
        perfHouse.SetMemoryPerformance(memoryPerf);

        perfCounter.GetProcessorPerformance(processorPerf);
        perfHouse.SetProcessorPerformance(processorPerf);
        
        perfCounter.GetDiskPerformance(diskPerf);
        perfHouse.SetDiskPerformance(diskPerf);

        this->msleep(500);

        // 每刷新30次写一次LOG
        if (refreshCount%30 != 0)
            continue;

        PrintLogW(L"Cpu Usage: %u%%", processorPerf.LoadPercentage);
        PrintLogW(L"Memory Total Size: %u", memoryPerf.TotalSize);
        PrintLogW(L"Memory Available Size: %u", memoryPerf.AvailableSize);

        PrintLogW(L"");
    }

}
void Processor::K10PerformanceCounters::perfCounterGetInfo (class Processor *p) {

	PerformanceCounter *performanceCounter;
	DWORD node, core, slot;

	printf ("Caption:\n");
	printf ("Evt:\tperformance counter event\n");
	printf ("En:\tperformance counter is enabled\n");
	printf ("U:\tperformance counter will count usermode instructions\n");
	printf ("OS:\tperformance counter will counter Os/kernel instructions\n");
	printf ("cMsk:\tperformance counter mask (see processor manual reference)\n");
	printf ("ED:\tcounting on edge detect, else counting on level detect\n");
	printf ("APIC:\tif set, an APIC interrupt will be issued on counter overflow\n");
	printf ("icMsk:\tif set, mask is inversed (see processor manual reference)\n");
	printf ("uMsk:\tunit mask (see processor manual reference)\n\n");

	for (node = 0; node < p->getProcessorNodes(); node++)
	{
		printf ("--- Node %d\n", node);

		p->setNode(node);
		p->setCore(ALL_CORES);

		for (slot = 0; slot < p->getMaxSlots(); slot++)
		{
			performanceCounter = new PerformanceCounter(p->getMask(), slot, p->getMaxSlots());

			for (core = 0; core < p->getProcessorCores(); core++)
			{
				if (!performanceCounter->fetch (core))
				{
					printf ("K10PerformanceCounters.cpp::perfCounterGetInfo - unable to read performance counter register\n");
					free (performanceCounter);
					return;
				}

				printf ("Slot %d core %d - evt:0x%x En:%d U:%d OS:%d cMsk:%x ED:%d APIC:%d icMsk:%x uMsk:%x\n",
						slot,
						core,
						performanceCounter->getEventSelect(),
						performanceCounter->getEnabled(),
						performanceCounter->getCountUserMode(),
						performanceCounter->getCountOsMode(),
						performanceCounter->getCounterMask(),
						performanceCounter->getEdgeDetect(),
						performanceCounter->getEnableAPICInterrupt(),
						performanceCounter->getInvertCntMask(),
						performanceCounter->getUnitMask()
						);
			}
			free (performanceCounter);
		}
	}
}
void Processor::K10PerformanceCounters::perfMonitorDCMA(class Processor *p)
{
	PerformanceCounter *perfCounter;

	DWORD cpuIndex, nodeId, coreId;
	PROCESSORMASK cpuMask;
	unsigned int perfCounterSlot;

	uint64_t misses;

	// This pointers will refer an array containing previous performance counter values
	uint64_t *prevPerfCounters;

	try {

		p->setNode(p->ALL_NODES);
		p->setCore(p->ALL_CORES);

		cpuMask = p->getMask();
		/* We do this to do some "caching" of the mask, instead of calculating each time
		 we need to retrieve the time stamp counter */

		// Allocating space for previous values of counters.
		prevPerfCounters = (uint64_t *) calloc(
				p->getProcessorCores() * p->getProcessorNodes(),
				sizeof(uint64_t));

		//Creates a new performance counter, for now we set slot 0, but we will
		//use the findAvailable slot method to find an available method to be used
		perfCounter = new PerformanceCounter(cpuMask, 0, p->getMaxSlots());

		//Event 0x76 is Idle Counter
		perfCounter->setEventSelect(0x47);
		perfCounter->setCountOsMode(true);
		perfCounter->setCountUserMode(true);
		perfCounter->setCounterMask(0);
		perfCounter->setEdgeDetect(false);
		perfCounter->setEnableAPICInterrupt(false);
		perfCounter->setInvertCntMask(false);
		perfCounter->setUnitMask(0);

		//Finds an available slot for our purpose
		perfCounterSlot = perfCounter->findAvailableSlot();

		//findAvailableSlot() returns -2 in case of error
		if (perfCounterSlot == 0xfffffffe)
			throw "unable to access performance counter slots";

		//findAvailableSlot() returns -1 in case there aren't available slots
		if (perfCounterSlot == 0xffffffff)
			throw "unable to find an available performance counter slot";

		printf("Performance counter will use slot #%d\n", perfCounterSlot);

		//In case there are no errors, we program the object with the slot itself has found
		perfCounter->setSlot(perfCounterSlot);

		// Program the counter slot
		if (!perfCounter->program())
			throw "unable to program performance counter parameters";

		// Enable the counter slot
		if (!perfCounter->enable())
			throw "unable to enable performance counters";

		/* Here we take a snapshot of the performance counter and a snapshot of the time
		 * stamp counter to initialize the arrays to let them not show erratic huge numbers
		 * on first step
		 */

		if (!perfCounter->takeSnapshot())
			throw "unable to retrieve performance counter data";

		cpuIndex = 0;
		for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++)
		{
			for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++)
			{
				prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex);
				cpuIndex++;
			}
		}

		Signal::activateSignalHandler(SIGINT);

		while (!Signal::getSignalStatus())
		{
			if (!perfCounter->takeSnapshot())
				throw "unable to retrieve performance counter data";

			cpuIndex = 0;

			for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++)
			{
				printf("Node %d -", nodeId);

				for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++)
				{
					misses = perfCounter->getCounter(cpuIndex) - prevPerfCounters[cpuIndex];

					printf(" c%u:%0.3fk", coreId, (float) (misses/1000.0f));

					prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex);

					cpuIndex++;
				}
				printf("\n");
			}
			Sleep(1000);
		}

		perfCounter->disable();

		printf ("CTRL-C executed. Cleaning on exit...\n");

	} catch (char const *str) {

		if (perfCounter->getEnabled()) perfCounter->disable();

		printf("K10PerformanceCounters.cpp::perfMonitorCPUUsage - %s\n", str);

	}

	free(perfCounter);
	free(prevPerfCounters);

	return;

}
void Processor::K10PerformanceCounters::perfMonitorCPUUsage(class Processor *p)
{
	PerformanceCounter *perfCounter;
	MSRObject *tscCounter; //We need the timestamp counter too to determine the cpu usage in percentage

	DWORD cpuIndex, nodeId, coreId;
	PROCESSORMASK cpuMask;
	unsigned int perfCounterSlot;

	uint64_t usage;

	// These two pointers will refer to two arrays containing previous performance counter values
	// and previous Time Stamp counters. We need these to obtain instantaneous CPU usage information
	uint64_t *prevPerfCounters;
	uint64_t *prevTSCCounters;

	try
	{
		p->setNode(p->ALL_NODES);
		p->setCore(p->ALL_CORES);

		cpuMask = p->getMask();
		/* We do this to do some "caching" of the mask, instead of calculating each time
		 we need to retrieve the time stamp counter */

		// Allocating space for previous values of counters.
		prevPerfCounters = (uint64_t *) calloc(p->getProcessorCores() * p->getProcessorNodes(), sizeof(uint64_t));
		prevTSCCounters = (uint64_t *) calloc(p->getProcessorCores() * p->getProcessorNodes(), sizeof(uint64_t));

		// MSR Object to retrieve the time stamp counter for all the nodes and all the processors
		tscCounter = new MSRObject();

		//Creates a new performance counter, for now we set slot 0, but we will
		//use the findAvailable slot method to find an available method to be used
		perfCounter = new PerformanceCounter(cpuMask, 0, p->getMaxSlots());

		//Event 0x76 is Idle Counter
		perfCounter->setEventSelect(0x76);
		perfCounter->setCountOsMode(true);
		perfCounter->setCountUserMode(true);
		perfCounter->setCounterMask(0);
		perfCounter->setEdgeDetect(false);
		perfCounter->setEnableAPICInterrupt(false);
		perfCounter->setInvertCntMask(false);
		perfCounter->setUnitMask(0);
		perfCounter->setMaxSlots(p->getMaxSlots());

		//Finds an available slot for our purpose
		perfCounterSlot = perfCounter->findAvailableSlot();

		//findAvailableSlot() returns -2 in case of error
		if (perfCounterSlot == 0xfffffffe)
			throw "unable to access performance counter slots";

		//findAvailableSlot() returns -1 in case there aren't available slots
		if (perfCounterSlot == 0xffffffff)
			throw "unable to find an available performance counter slot";

		printf("Performance counter will use slot #%d\n", perfCounterSlot);

		//In case there are no errors, we program the object with the slot itself has found
		perfCounter->setSlot(perfCounterSlot);

		// Program the counter slot
		if (!perfCounter->program())
			throw "unable to program performance counter parameters";

		// Enable the counter slot
		if (!perfCounter->enable())
			throw "unable to enable performance counters";

		/* Here we take a snapshot of the performance counter and a snapshot of the time
		 * stamp counter to initialize the arrays to let them not show erratic huge numbers
		 * on first step
		 */

		if (!perfCounter->takeSnapshot())
		{
			throw "unable to retrieve performance counter data";
			return;
		}

		if (!tscCounter->readMSR(TIME_STAMP_COUNTER_REG, cpuMask))
		{
			throw "unable to retrieve time stamp counter";
			return;
		}

		cpuIndex = 0;
		for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++)
		{
			for (coreId = 0; coreId < p->getProcessorCores(); coreId++)
			{
				prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex);
				prevTSCCounters[cpuIndex] = tscCounter->getBits(cpuIndex, 0, 64);
				cpuIndex++;
			}
		}

		Signal::activateSignalHandler(SIGINT);
		printf("Values >100%% can be expected if the CPU is in a Boosted State\n");

		while (!Signal::getSignalStatus())
		{
			if (!perfCounter->takeSnapshot())
			{
				throw "unable to retrieve performance counter data";
				return;
			}

			if (!tscCounter->readMSR(TIME_STAMP_COUNTER_REG, cpuMask))
			{
				throw "unable to retrieve time stamp counter";
				return;
			}

			cpuIndex = 0;

			for (nodeId = 0; nodeId < p->getProcessorNodes(); nodeId++)
			{
				printf("\nNode %d -", nodeId);

				for (coreId = 0x0; coreId < p->getProcessorCores(); coreId++)
				{
 					usage = ((perfCounter->getCounter(cpuIndex)) - prevPerfCounters[cpuIndex]) * 100;
 					usage /= tscCounter->getBits(cpuIndex, 0, 64) - prevTSCCounters[cpuIndex];
 
 					printf(" c%d:%d%%", coreId, (unsigned int) usage);
 
 					prevPerfCounters[cpuIndex] = perfCounter->getCounter(cpuIndex);
 					prevTSCCounters[cpuIndex] = tscCounter->getBits(cpuIndex, 0, 64);

					cpuIndex++;
				}
			}
			Sleep(1000);
		}

		perfCounter->disable();

		printf ("CTRL-C executed. Cleaning on exit...\n");

	} catch (char const *str) {

		if (perfCounter->getEnabled()) perfCounter->disable();

		printf("K10PerformanceCounters.cpp::perfMonitorCPUUsage - %s\n", str);

	}

	free(perfCounter);
	free(tscCounter);
	free(prevPerfCounters);
	free(prevTSCCounters);

	return;

}
Exemple #7
0
//--------------------------------------------------------------------------------------
// Render a frame
//--------------------------------------------------------------------------------------
void Render()
{
    // Update our time
    static float t = 0.0f;
    float delta_t = 0.0f;
    {
        static DWORD dwTimeStart = 0;
        DWORD dwTimeCur = GetTickCount();
        if( dwTimeStart == 0 )
            dwTimeStart = dwTimeCur;
        float old_t = t;
        t = ( dwTimeCur - dwTimeStart ) / 1000.0f;
        delta_t = t-old_t;
    }

    {
        sphParticle particles[32];
        for(size_t i=0; i<_countof(particles); ++i) {
            particles[i].position = ist::simdvec4_set(GenRand()*0.5f, GenRand()*0.5f, GenRand()*0.5f-7.5f, 1.0f);
            particles[i].velocity = _mm_set1_ps(0.0f);
        }
        g_sphgrid.addParticles(particles, _countof(particles));
    }
    {
        static PerformanceCounter s_timer;
        static float s_prev = 0.0f;
        PerformanceCounter timer;

        g_sphgrid.update(1.0f);
        g_pImmediateContext->UpdateSubresource( g_pCubeInstanceBuffer, 0, NULL, &g_sphgrid.particles, 0, 0 );

        if(s_timer.getElapsedMillisecond() - s_prev > 1000.0f) {
            char buf[128];
            _snprintf(buf, _countof(buf), "  SPH update: %d particles %.3fms\n", g_sphgrid.num_active_particles, timer.getElapsedMillisecond());
            OutputDebugStringA(buf);
            ::SetWindowTextA(g_hWnd, buf);
            s_prev = s_timer.getElapsedMillisecond();
        }
    }

    {
        CBChangesEveryFrame cb;
        XMVECTOR eye = g_camera.getEye();
        {
            XMMATRIX rot = XMMatrixRotationZ(XMConvertToRadians(0.1f));
            eye = XMVector4Transform(eye, rot);
        }
        g_camera.setEye(eye);
        g_camera.updateMatrix();
        XMMATRIX vp = g_camera.getViewProjectionMatrix();

        cb.ViewProjection   = XMMatrixTranspose( vp );
        cb.CameraPos        = (FLOAT*)&eye;

        cb.LightPos         = XMFLOAT4(10.0f, 10.0f, -10.0f, 1.0f);
        cb.LightColor       = XMFLOAT4(0.9f, 0.9f, 0.9f, 1.0f);

        cb.MeshShininess    = 200.0f;
        g_pImmediateContext->UpdateSubresource( g_pCBChangesEveryFrame, 0, NULL, &cb, 0, 0 );
    }


    float ClearColor[4] = { 0.0f, 0.125f, 0.3f, 1.0f }; // red, green, blue, alpha
    g_pImmediateContext->ClearRenderTargetView( g_pRenderTargetView, ClearColor );
    g_pImmediateContext->ClearDepthStencilView( g_pDepthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 );

    {
        ID3D11Buffer *buffers[] = {g_pCubeVertexBuffer, g_pCubeInstanceBuffer};
        UINT strides[] = {sizeof(SimpleVertex), sizeof(sphParticle), };
        UINT offsets[] = {0, 0};
        g_pImmediateContext->IASetVertexBuffers( 0, ARRAYSIZE(buffers), buffers, strides, offsets );
    }
    g_pImmediateContext->IASetInputLayout( g_pCubeVertexLayout );
    g_pImmediateContext->IASetIndexBuffer( g_pCubeIndexBuffer, DXGI_FORMAT_R16_UINT, 0 );
    g_pImmediateContext->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST );

    // Render the cube
    g_pImmediateContext->VSSetShader( g_pCubeVertexShader, NULL, 0 );
    g_pImmediateContext->VSSetConstantBuffers( 0, 1, &g_pCBChangesEveryFrame );
    g_pImmediateContext->PSSetShader( g_pCubePixelShader, NULL, 0 );
    g_pImmediateContext->PSSetConstantBuffers( 0, 1, &g_pCBChangesEveryFrame );

    g_pImmediateContext->DrawIndexedInstanced( 36, (UINT)g_sphgrid.num_active_particles, 0, 0, 0 );

    // Present our back buffer to our front buffer
    g_pSwapChain->Present( 1, 0 ); // vsync on
    //g_pSwapChain->Present( 0, 0 ); // vsync off
}
int ImplicitNewmarkSparse::DoTimestep()
{
  int numIter = 0;

  double error0 = 0; // error after the first step
  double errorQuotient;

  // store current amplitudes and set initial guesses for qaccel, qvel
  for(int i=0; i<r; i++)
  {
    q_1[i] = q[i]; 
    qvel_1[i] = qvel[i];
    qaccel_1[i] = qaccel[i];

    qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i];
    qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i];
  }

  do
  {
    int i;

/*
    printf("q:\n");
    for(int i=0; i<r; i++)
      printf("%G ", q[i]);
    printf("\n");

    printf("Internal forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", internalForces[i]);
    printf("\n");
*/

    PerformanceCounter counterForceAssemblyTime;
    forceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix);
    counterForceAssemblyTime.StopCounter();
    forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

    //tangentStiffnessMatrix->Print();
    //tangentStiffnessMatrix->Save("K");

    // scale internal forces
    for(i=0; i<r; i++)
      internalForces[i] *= internalForceScalingFactor;

    *tangentStiffnessMatrix *= internalForceScalingFactor;

    memset(qresidual, 0, sizeof(double) * r);

    if (useStaticSolver)
    {
      // no operation
    }
    else
    {
      // build effective stiffness: add mass matrix and damping matrix to tangentStiffnessMatrix
      tangentStiffnessMatrix->ScalarMultiply(dampingStiffnessCoef, rayleighDampingMatrix);
      rayleighDampingMatrix->AddSubMatrix(dampingMassCoef, *massMatrix);

      rayleighDampingMatrix->ScalarMultiplyAdd(alpha4, tangentStiffnessMatrix);
      //*tangentStiffnessMatrix += alpha4 * *rayleighDampingMatrix;
      tangentStiffnessMatrix->AddSubMatrix(alpha4, *dampingMatrix, 1);

      tangentStiffnessMatrix->AddSubMatrix(alpha1, *massMatrix);
      
      // compute force residual, store it into aux variable qresidual
      // qresidual = M * qaccel + C * qvel - externalForces + internalForces

      massMatrix->MultiplyVector(qaccel, qresidual);
      rayleighDampingMatrix->MultiplyVectorAdd(qvel, qresidual);
      dampingMatrix->MultiplyVectorAdd(qvel, qresidual);
    }

    // add externalForces, internalForces
    for(i=0; i<r; i++)
    {
      qresidual[i] += internalForces[i] - externalForces[i];
      qresidual[i] *= -1;
      qdelta[i] = qresidual[i];
    }

/*
    printf("internal forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", internalForces[i]);
    printf("\n");

    printf("external forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", externalForces[i]);
    printf("\n");

    printf("residual:\n");
    for(int i=0; i<r; i++)
      printf("%G ", -qresidual[i]);
    printf("\n");
*/

    double error = 0;
    for(i=0; i<r; i++)
      error += qresidual[i] * qresidual[i];

    // on the first iteration, compute initial error
    if (numIter == 0) 
    {
      error0 = error;
      errorQuotient = 1.0;
    }
    else
    {
      // error divided by the initial error, before performing this iteration
      errorQuotient = error / error0; 
    }

    if (errorQuotient < epsilon * epsilon)
    {
      break;
    }

    //tangentStiffnessMatrix->Save("Keff");
    RemoveRows(r, bufferConstrained, qdelta, numConstrainedDOFs, constrainedDOFs);
    systemMatrix->AssignSuperMatrix(tangentStiffnessMatrix);

    // solve: systemMatrix * buffer = bufferConstrained

    PerformanceCounter counterSystemSolveTime;
    memset(buffer, 0, sizeof(double) * r);

    #ifdef SPOOLES
      SPOOLESSolver solver(systemMatrix);
      int info = solver.SolveLinearSystem(buffer, bufferConstrained);
      char solverString[16] = "SPOOLES";
    #endif

    #ifdef PARDISO
      int info = pardisoSolver->ComputeCholeskyDecomposition(systemMatrix);
      if (info == 0)
        info = pardisoSolver->SolveLinearSystem(buffer, bufferConstrained);
      char solverString[16] = "PARDISO";
    #endif

    #ifdef PCG
      int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(buffer, bufferConstrained, 1e-6, 10000);
      if (info > 0)
        info = 0;
      char solverString[16] = "PCG";
    #endif

    if (info != 0)
    {
      printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info);
      return 1;
    }

    counterSystemSolveTime.StopCounter();
    systemSolveTime = counterSystemSolveTime.GetElapsedTime();

    InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs);

/*
    printf("qdelta:\n");
    for(int i=0; i<r; i++)
      printf("%G ", qdelta[i]);
    printf("\n");
    exit(1);
*/
    // update state
    for(i=0; i<r; i++)
    {
      q[i] += qdelta[i];
      qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i];
      qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i];
    }

    for(int i=0; i<numConstrainedDOFs; i++)
      q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0;

    numIter++;
  }
  while (numIter < maxIterations);

/*
  printf("qvel:\n");
  for(int i=0; i<r; i++)
    printf("%G ", qvel[i]);
  printf("\n");

  printf("qaccel:\n");
  for(int i=0; i<r; i++)
    printf("%G ", qaccel[i]);
  printf("\n");
*/

  //printf("Num iterations performed: %d\n",numIter);
  //if ((numIter >= maxIterations) && (maxIterations > 1))
  //{
    //printf("Warning: method did not converge in max number of iterations.\n");
  //}

  return 0;
}
int ImplicitNewmarkDense::DoTimestep()
{
  int numIter = 0;

  double error0 = 0; // error after the first step
  double errorQuotient;

  // store current amplitudes and set initial guesses for qaccel, qvel
  // note: these guesses will later be overriden; they are only used to construct the right-hand-side vector (multiplication with M and C)
  for(int i=0; i<r; i++)
  {
    q_1[i] = q[i]; 
    qvel_1[i] = qvel[i];
    qaccel_1[i] = qaccel[i];

    qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i];
    qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i];
  }

  do
  {
    int i;


    PerformanceCounter counterForceAssemblyTime;
    reducedForceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix);
    counterForceAssemblyTime.StopCounter();
    forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

    // scale internal forces
    for(i=0; i<r; i++)
      internalForces[i] *= internalForceScalingFactor;

/*
    printf("internalForceScalingFactor = %G\n", internalForceScalingFactor);
    printf("q:\n");
    for(int i=0; i<r; i++)
      printf("%G ", q[i]);
    printf("\n");

    printf("Internal forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", internalForces[i]);
    printf("\n");
*/

    for(i=0; i<r2; i++)
      tangentStiffnessMatrix[i] *= internalForceScalingFactor;

    for(i=0; i<r2; i++)
      tangentStiffnessMatrix[i] += tangentStiffnessMatrixOffset[i];

/*
    printf("Tangent stiffness matrix:\n");
    for(int i=0; i<r; i++)
    {
      for(int j=0; j<r; j++)
        printf("%.15f ", tangentStiffnessMatrix[r * j + i]);
      printf("\n");
    }
    printf("Tangent stiffness matrix offset:\n");
    for(int i=0; i<r; i++)
    {
      for(int j=0; j<r; j++)
        printf("%.15f ", tangentStiffnessMatrixOffset[r * j + i]);
      printf("\n");
    }
    printf("----\n");
*/

    //WriteMatrixToDisk_("Kr", r, r, tangentStiffnessMatrix);
    //WriteMatrixToDisk_("Mr", r, r, massMatrix);
    //exit(1);

    memset(qresidual, 0, sizeof(double) * r);

    if (useStaticSolver)
    {
      // no operation
    }
    else
    {
      // build effective stiffness: add mass matrix and damping matrix to tangentStiffnessMatrix
      for(i=0; i<r2; i++)
      {
        dampingMatrix[i] = dampingMassCoef * massMatrix[i] + dampingStiffnessCoef * tangentStiffnessMatrix[i];
        tangentStiffnessMatrix[i] += alpha4 * dampingMatrix[i];
        //tangentStiffnessMatrix[i] += alpha3 * massMatrix[i] + gamma * alpha1 * dampingMatrix[i]; // static Rayleigh damping

        // add mass matrix to the effective stiffness matrix
        tangentStiffnessMatrix[i] += alpha1 * massMatrix[i];
      }

      // compute force residual, store it into aux variable qresidual
      // qresidual = M * qaccel + C * qvel - externalForces + internalForces

      // M * qaccel
      cblas_dgemv(CblasColMajor,CblasNoTrans,
        r,r,1.0,massMatrix,r,qaccel,1,0.0,qresidual,1);

      // += C * qvel
      cblas_dgemv(CblasColMajor,CblasNoTrans,
        r,r,1.0,dampingMatrix,r,qvel,1,1.0,qresidual,1);
    }

    // add externalForces, internalForces
    for(i=0; i<r; i++)
    {
      qresidual[i] += internalForces[i] - externalForces[i];
      qresidual[i] *= -1;
      qdelta[i] = qresidual[i];
    }

/*
    printf("internalForceScalingFactor = %G\n", internalForceScalingFactor);

    printf("internal forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", internalForces[i]);
    printf("\n");

    printf("external forces:\n");
    for(int i=0; i<r; i++)
      printf("%G ", externalForces[i]);
    printf("\n");

    printf("mass matrix:\n");
    for(int i=0; i<r*r; i++)
      printf("%G ", massMatrix[i]);
    printf("\n");

    printf("damping matrix:\n");
    for(int i=0; i<r*r; i++)
      printf("%G ", dampingMatrix[i]);
    printf("\n");

    printf("effective stiffness matrix:\n");
    for(int i=0; i<r*r; i++)
      printf("%G ", tangentStiffnessMatrix[i]);
    printf("\n");

    printf("matrix rhs:\n");
    for(int i=0; i<r; i++)
      printf("%G ", qdelta[i]);
    printf("\n");
*/

    double error = 0;
    for(i=0; i<r; i++)
      error += qresidual[i] * qresidual[i];

    // on the first iteration, compute initial error
    if (numIter == 0) 
    {
      error0 = error;
      errorQuotient = 1.0;
    }
    else
    {
      // rel error wrt to initial error before performing this iteration
      errorQuotient = error / error0; 
    }

    if ((errorQuotient < epsilon * epsilon) || (error == 0))
    {
      break;
    }

    // solve (effective stiffness) * qdelta = qresidual
    PerformanceCounter counterSystemSolveTime;
    //counterSystemSolveTime.StartCounter(); // it starts automatically in constructor

    switch (solver)
    {
      case generalMatrixSolver:
      {
        INTEGER N = r;
        INTEGER NRHS = 1;
        double * A = tangentStiffnessMatrix;
        INTEGER LDA = r;
        double * B = qdelta;
        INTEGER LDB = r;
        INTEGER INFO;

        #ifdef __APPLE__
          #define DGESV dgesv_
        #else
          #define DGESV dgesv
        #endif

        DGESV ( &N, &NRHS, A, &LDA, IPIV->GetBuf(), B, &LDB, &INFO );

        if (INFO != 0)
        {
          printf("Error: Gaussian elimination solver returned non-zero exit status %d.\n",(int)INFO);
          return 1;
        }
      }
      break;

      case symmetricMatrixSolver:
      {
        // call dsysv ( uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info)
  
        #ifdef __APPLE__
          #define DSYSV dsysv_
        #else
          #define DSYSV dsysv
        #endif

        char uplo = 'U';
        INTEGER nrhs = 1;
        INTEGER info;
        INTEGER R = r;

        INTEGER symmetricSolver_lworkI = symmetricSolver_lwork;
        DSYSV ( &uplo, &R, &nrhs, tangentStiffnessMatrix, &R, IPIV->GetBuf(), qdelta, &R, symmetricSolver_work, &symmetricSolver_lworkI, &info);

        if (info != 0)
        {
          printf("Error: Symmetric indefinite solver returned non-zero exit status %d.\n",(int)info);
          return 1;
        }
      }
      break;

      case positiveDefiniteMatrixSolver:
      {
        // call dposv ( uplo, n, nrhs, a, lda, b, ldb, info)

        #ifdef __APPLE__
          #define DPOSV dposv_
        #else
          #define DPOSV dposv
        #endif
  
        char uplo = 'U';
        INTEGER nrhs = 1;
        INTEGER info = 0;
        INTEGER R = r;

        DPOSV ( &uplo, &R, &nrhs, tangentStiffnessMatrix, &R, qdelta, &R, &info);

        if (info != 0)
        {
          printf("Error: Positive-definite Cholesky solver returned non-zero exit status %d.\n",(int)info);
          return 1;
        }

      }
      break;

      default:
        printf("Error: reduced integration solver not specified.\n");
        return 1;
      break;
    }
    counterSystemSolveTime.StopCounter();
    systemSolveTime = counterSystemSolveTime.GetElapsedTime();

/*
    printf("qdelta:\n");
    for(int i=0; i<r; i++)
      printf("%G ", qdelta[i]);
    printf("\n");
*/

    // update state
    for(i=0; i<r; i++)
    {
      q[i] += qdelta[i];
      qaccel[i] = alpha1 * (q[i] - q_1[i]) - alpha2 * qvel_1[i] - alpha3 * qaccel_1[i];
      qvel[i] = alpha4 * (q[i] - q_1[i]) + alpha5 * qvel_1[i] + alpha6 * qaccel_1[i];
    }

    numIter++;
  }
  while (numIter < maxIterations);

/*
  printf("Num iterations performed: %d (maxIterations=%d)\n", numIter, maxIterations);
  if ((numIter >= maxIterations) && (maxIterations > 1))
  {
    printf("Warning: method did not converge in max number of iterations.\n");
  }
*/

  return 0;
}
int main(int argc, char **argv) 
{
  // Initialize form, sliders and buttons
  form = make_window();

  performanceCounter.StartCounter();  // init
  saveFileTimeCounter.StartCounter(); // init

  groundPlane_button->value(groundPlane);
  fog_button->value(useFog);
  worldAxes_button->value(renderWorldAxes);
  frame_slider->value(1);
  if (saveScreenToFile == SAVE_CONTINUOUS)
    record_button->value(1);  // ON
  else
    record_button->value(0);  // OFF

  // just do some timing, no special purpose
  // because the first data is always not trustable according to experience
  performanceCounter.StopCounter(); 
  performanceCounter.GetElapsedTime();
  saveFileTimeCounter.StopCounter();
  saveFileTimeCounter.GetElapsedTime();
  performanceCounter.StartCounter();
  // show form, and do initial draw of model
  form->show();
  glwindow->show(); // glwindow is initialized when the form is built
  performanceCounter.StopCounter();

  if (argc > 2)
  {
    char *filename;

    filename = argv[1];
    if(filename != NULL)
    {
      //Read skeleton from asf file
      pSkeleton = new Skeleton(filename, MOCAP_SCALE);

      //Set the rotations for all bones in their local coordinate system to 0
      //Set root position to (0, 0, 0)
      pSkeleton->setBasePosture();
      displayer.LoadSkeleton(pSkeleton);
      lastSkeleton++;
    }

    if (displayer.GetNumSkeletons())
    {
      filename = argv[2];
      if(filename != NULL)
      {
        //Read motion (.amc) file and create a motion
        pMotion = new Motion(filename, MOCAP_SCALE,pSkeleton);

        //set sampled motion for display
        displayer.LoadMotion(pMotion);               
        
        lastMotion++;

        //Tell skeleton to perform the first pose ( first posture )
        pSkeleton->setPosture(*(displayer.GetSkeletonMotion(0)->GetPosture(0)));          

        // Set skeleton to perform the first pose ( first posture )         
        int currentFrames = displayer.GetSkeletonMotion(0)->GetNumFrames();
        if (currentFrames > maxFrames)
        {
          maxFrames = currentFrames;
          frame_slider->maximum((double)maxFrames);

        }
        frame_slider->maximum((double)maxFrames);

        currentFrameIndex=0;
      } // if(filename != NULL)
    }
    else
      printf("Load a skeleton first.\n");
    framesIncrementDoublePrecision = 1.0;            // Current frame and frame increment
    playButton = ON;
    repeatButton = OFF;
    groundPlane = ON; 
    glwindow->redraw();
  }  // if (argc > 2)
  Fl::add_idle(idle);
  return Fl::run();
}
void idle(void*)
{
  if (previousPlayButtonStatus == ON)  
  {
    // it means we should measure the interval between two frames
    // if it is too tiny, we should slow down the motion
    performanceCounter.StopCounter();
    double actualTimeCostOneFrame = performanceCounter.GetElapsedTime(); // in seconds

    // time spent on saving the screen in previous time-step should be excluded
    if (saveFileTimeCost > 0.0)   
      actualTimeCostOneFrame -= saveFileTimeCost;

    framesIncrementDoublePrecision = actualTimeCostOneFrame * expectedFPS;
  }
  // start counter at the beginning of the new round
  if (playButton == ON)
    performanceCounter.StartCounter();

  if(rewindButton == ON)
  {
    currentFrameIndex = 0;
    currentFrameIndexDoublePrecision = 0.0;
    for (int i = 0; i < displayer.GetNumSkeletons(); i++)
    {
      if (displayer.GetSkeletonMotion(i) != NULL)
      {
        Posture * initSkeleton = displayer.GetSkeletonMotion(i)->GetPosture(0);
        displayer.GetSkeleton(i)->setPosture(*initSkeleton);
      }
    }
    rewindButton = OFF;
  }

  // Initialization
  saveFileTimeCost = -1.0;

  if(playButton == ON) 
  {
    if (saveScreenToFile == SAVE_CONTINUOUS)
    {
      saveFileTimeCounter.StartCounter();
      CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename);
      saveScreenshot(640, 480, saveScreenToFileContinuousFilename);
      printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename);
      saveScreenToFileContinuousCount++;
      saveFileTimeCounter.StopCounter();
      saveFileTimeCost = saveFileTimeCounter.GetElapsedTime();
    }

    if (saveScreenToFile == SAVE_CONTINUOUS)
    {
      currentFrameIndexDoublePrecision += 1.0;
    }
    else
    {
      currentFrameIndexDoublePrecision += framesIncrementDoublePrecision;
    }

    currentFrameIndex = (int)currentFrameIndexDoublePrecision;

    if(currentFrameIndex >= maxFrames)
    {
      if (repeatButton == ON)
      {
        currentFrameIndex = 0;
        currentFrameIndexDoublePrecision = 0.0;
      }
      else  // repeat button is OFF
      {
        currentFrameIndex = maxFrames - 1;
        currentFrameIndexDoublePrecision = currentFrameIndex;
        playButton = OFF;  // important, especially in "recording" mode
      }
    }

    if (currentFrameIndex < 0)
    {
      currentFrameIndex = 0;
      currentFrameIndexDoublePrecision = 0.0;
    }

    SetSkeletonsToSpecifiedFrame(currentFrameIndex);

    frame_slider->value((double) currentFrameIndex + 1);
  }  // if(playButton == ON)

  if (minusOneButton == ON)
    if (displayer.GetNumSkeletons() != 0)
    {
      currentFrameIndex--;
      if (currentFrameIndex < 0)
        currentFrameIndex = 0;
      frame_slider->value((double) currentFrameIndex + 1);

      SetSkeletonsToSpecifiedFrame(currentFrameIndex);    
      if (saveScreenToFile == SAVE_CONTINUOUS)
      {
        CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename);
        saveScreenshot(640, 480, saveScreenToFileContinuousFilename);
        printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename);
        saveScreenToFileContinuousCount++;
      }
      minusOneButton = OFF;
    }

  if (plusOneButton == ON)
  {
    if (displayer.GetNumSkeletons() != 0)
    {
      currentFrameIndex++;
      if (currentFrameIndex >= maxFrames)
        currentFrameIndex = maxFrames - 1;
      frame_slider->value((double) currentFrameIndex + 1);

      SetSkeletonsToSpecifiedFrame(currentFrameIndex);
      if (saveScreenToFile == SAVE_CONTINUOUS)
      {
        CreateScreenFilename(SAVE_CONTINUOUS, saveScreenToFileContinuousCount, saveScreenToFileContinuousFilename);
        saveScreenshot(640, 480, saveScreenToFileContinuousFilename);
        printf("%s is saved to disk.\n", saveScreenToFileContinuousFilename);
        saveScreenToFileContinuousCount++;
      }
      plusOneButton = OFF;
    }
  }

  frame_slider->value((double)(currentFrameIndex + 1));

  previousPlayButtonStatus = playButton; // Super important updating

  glwindow->redraw();
}
int VolumeConservingIntegrator::DoTimestep() {
	int numIter = 0;

	//Error after the first step
	double error0 = 0;
	double errorQuotient;

	// store current amplitudes and set initial guesses for qaccel, qvel
	for (int i = 0; i < r; i++) {
		qaccel_1[i] = qaccel[i] = 0;
		q_1[i] = q[i];
		qvel_1[i] = qvel[i];
	}

	do {
		int i;

		/*
		 printf("q:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", q[i]);
		 printf("\n");

		 printf("Internal forces:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", internalForces[i]);
		 printf("\n");
		 */

		PerformanceCounter counterForceAssemblyTime;
		forceModel->GetForceAndMatrix(q, internalForces, tangentStiffnessMatrix);
		counterForceAssemblyTime.StopCounter();
		forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

		//tangentStiffnessMatrix->Print();
		//tangentStiffnessMatrix->Save("K");

		//Scale internal forces
		for (i = 0; i < r; i++)
			internalForces[i] *= internalForceScalingFactor;

		*tangentStiffnessMatrix *= internalForceScalingFactor;

		memset(qresidual, 0, sizeof(double) * r);

		if (useStaticSolver) {
			// fint + K * qdelta = fext

			// add externalForces, internalForces
			for (i = 0; i < r; i++) {
				qresidual[i] = externalForces[i] - internalForces[i];
				qdelta[i] = qresidual[i];
			}
		} else {
			tangentStiffnessMatrix->ScalarMultiply(dampingStiffnessCoef,
					rayleighDampingMatrix);
			rayleighDampingMatrix->AddSubMatrix(dampingMassCoef, *massMatrix);

			// build effective stiffness:
			// Keff = M + h D + h^2 * K
			// compute force residual, store it into aux variable qresidual
			// qresidual = h * (-D qdot - fint + fext - h * K * qdot))

			//add mass matrix and damping matrix to tangentStiffnessMatrix
			*tangentStiffnessMatrix *= timestep;

			*tangentStiffnessMatrix += *rayleighDampingMatrix;
			tangentStiffnessMatrix->AddSubMatrix(1.0, *dampingMatrix, 1); // at this point, tangentStiffnessMatrix = h * K + D
			tangentStiffnessMatrix->MultiplyVector(qvel, qresidual);
			*tangentStiffnessMatrix *= timestep;
			tangentStiffnessMatrix->AddSubMatrix(1.0, *massMatrix);

			// add externalForces, internalForces
			for (i = 0; i < r; i++) {
				qresidual[i] += internalForces[i] - externalForces[i];
				qresidual[i] *= -timestep;
				qdelta[i] = qresidual[i];
			}
		}

		/*
		 printf("internal forces:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", internalForces[i]);
		 printf("\n");

		 printf("external forces:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", externalForces[i]);
		 printf("\n");

		 printf("residual:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", -qresidual[i]);
		 printf("\n");
		 */

		double error = 0;
		for (i = 0; i < r; i++)
			error += qresidual[i] * qresidual[i];

		// on the first iteration, compute initial error
		if (numIter == 0) {
			error0 = error;
			errorQuotient = 1.0;
		} else {
			// rel error wrt to initial error before performing this iteration
			errorQuotient = error / error0;
		}

		if (errorQuotient < epsilon * epsilon)
			break;

		//tangentStiffnessMatrix->Save("Keff");
		RemoveRows(r, bufferConstrained, qdelta, numConstrainedDOFs,
				constrainedDOFs);
		systemMatrix->AssignSuperMatrix(tangentStiffnessMatrix);

		// solve: systemMatrix * qdelta = qresidual

		PerformanceCounter counterSystemSolveTime;
		memset(buffer, 0, sizeof(double) * r);

#ifdef SPOOLES
		int info;
		if (numSolverThreads > 1)
		{
			SPOOLESSolverMT * solver = new SPOOLESSolverMT(systemMatrix, numSolverThreads);
			info = solver->SolveLinearSystem(buffer, bufferConstrained);
			delete(solver);
		}
		else
		{
			SPOOLESSolver * solver = new SPOOLESSolver(systemMatrix);
			info = solver->SolveLinearSystem(buffer, bufferConstrained);
			delete(solver);
		}
		char solverString[16] = "SPOOLES";
#endif

#ifdef PARDISO
		int info = pardisoSolver->ComputeCholeskyDecomposition(systemMatrix);
		if (info == 0)
		info = pardisoSolver->SolveLinearSystem(buffer, bufferConstrained);
		char solverString[16] = "PARDISO";
#endif

		//Profile finds this function as a hotspot
#ifdef PCG
		int info =
				jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(
						buffer, bufferConstrained, 1e-6, 10000);
		if (info > 0)
			info = 0;
		char solverString[16] = "PCG";
#endif

		if (info != 0) {
			printf(
					"Error: %s sparse solver returned non-zero exit status %d.\n",
					solverString, (int) info);
			exit(-1);
			return 1;
		}

		counterSystemSolveTime.StopCounter();
		systemSolveTime = counterSystemSolveTime.GetElapsedTime();

		InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs);

		/*
		 printf("qdelta:\n");
		 for(int i=0; i<r; i++)
		 printf("%G ", qdelta[i]);
		 printf("\n");
		 exit(1);
		 */
		// update state
		if (useStaticSolver) {
			for (i = 0; i < r; i++) {
				q[i] += qdelta[i];
				qvel[i] = (q[i] - q_1[i]) / timestep;
			}
		} else {
			for (i = 0; i < r; i++) {
				qvel[i] += qdelta[i];
				q[i] += timestep * qvel[i];
			}
		}

		for (int i = 0; i < numConstrainedDOFs; i++)
			q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0;

		numIter++;
	} while (numIter < maxIterations);

	/*
	 printf("q:\n");
	 for(int i=0; i<r; i++)
	 printf("%G ", q[i]);
	 printf("\n");

	 printf("qvel:\n");
	 for(int i=0; i<r; i++)
	 printf("%G ", qvel[i]);
	 printf("\n");
	 */

	//printf("Num iterations performed: %d\n",numIter);
	//if ((numIter >= maxIterations) && (maxIterations > 1))
	//{
	//printf("Warning: method did not converge in max number of iterations.\n");
	//}
	return 0;
}
// the "idle" routine; called periodically by GLUT 
void idleFunction(void)
{
  cpuLoadCounter.StartCounter();

  glutSetWindow(windowID);
  
  if (!lockScene)
  {
    // determine force in case user is pulling on a vertex
    if (g_iLeftMouseButton) 
    {
      if (pulledVertex != -1)
      {
        double forceX = (g_vMousePos[0] - dragStartX);
        double forceY = -(g_vMousePos[1] - dragStartY);

        double externalForce[3];

        camera->CameraVector2WorldVector_OrientationOnly3D(
          forceX, forceY, 0, externalForce);

        renderingModalMatrix->ProjectSingleVertex(pulledVertex,
          externalForce[0], externalForce[1], externalForce[2], fq);

        for(int i=0; i<r; i++)
          fq[i] = fqBase[i] + deformableObjectCompliance * fq[i];
      }
    }
    else
    {
      memcpy(fq,fqBase,sizeof(double) * r);
    }

    // set the reduced external forces
    implicitNewmarkDense->SetExternalForces(fq);

    // integrate the dynamics via implicit Newmark
    for(int i=0; i<substepsPerTimeStep; i++)
    {
      int code = implicitNewmarkDense->DoTimestep();
      if (code != 0)
      {
        printf("The integrator went unstable. Reduce the timestep, or increase the number of substeps per timestep.\n");
        implicitNewmarkDense->ResetToRest();
        for(int i=0; i<r; i++)
        {
          fqBase[i] = 0;
          fq[i] = 0;
        }
        implicitNewmarkDense->SetExternalForces(fq);
        explosionFlag = 1;
        explosionCounter.StartCounter();
        break;
      }

      /*
        printf("q =\n");
        double * q = implicitNewmarkDense->Getq();
        for(int i=0; i<r; i++)
          printf("%G ", q[i]);
        printf("\n");
      */
    }

    memcpy(q, implicitNewmarkDense->Getq(), sizeof(double) * r);
  }

  if (explosionFlag)
  {
    explosionCounter.StopCounter();
    if (explosionCounter.GetElapsedTime() > 4.0) // the message will appear on screen for 4 seconds
      explosionFlag = 0;
  }

  // compute u=Uq
  deformableObjectRenderingMeshReduced->Setq(q);
  deformableObjectRenderingMeshReduced->Compute_uUq();

  graphicFrame++;
  
  // update title bar information at 4 Hz
  titleBarCounter.StopCounter();
  double elapsedTime = titleBarCounter.GetElapsedTime();
  if (elapsedTime >= 1.0 / 4)
  {
    titleBarCounter.StartCounter();
    fps = graphicFrame / elapsedTime;

    // update menu bar
    char windowTitle[4096];
    sprintf(windowTitle,"%s | Num modes = %d | %.1f Hz | Deformation CPU Load: %d%%", windowTitleBase, 
      implicitNewmarkDense->GetNumDOFs() , fps, (int)(100 * cpuLoad + 0.5) );
    glutSetWindowTitle(windowTitle);
    graphicFrame = 0;

    if (syncTimeStepWithGraphics)
    {
      timeStep = 1.0 / fps;
      implicitNewmarkDense->SetTimestep(timeStep / substepsPerTimeStep);
      Sync_GLUI();
    }
  }

  cpuLoadCounter.StopCounter();
  double cpuTimePerGraphicsFrame = cpuLoadCounter.GetElapsedTime();
  cpuLoad = cpuTimePerGraphicsFrame * fps; 

  glutPostRedisplay();
}
Exemple #14
0
int EulerSparse::DoTimestep()
{
    // v_{n+1} = v_n + h * (F_n / m)
    // x_{n+1} = x_n + h * v_{n+1}

    // store current state
    for(int i=0; i<r; i++)
    {
        q_1[i] = q[i];
        qvel_1[i] = qvel[i];
        qaccel_1[i] = qaccel[i];
    }

    PerformanceCounter counterForceAssemblyTime;
    forceModel->GetInternalForce(q, internalForces);
    counterForceAssemblyTime.StopCounter();
    forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

    // scale internal forces
    for(int i=0; i<r; i++)
        internalForces[i] *= internalForceScalingFactor;

    // damping
    double * dampingForces = buffer;
    massMatrix->MultiplyVector(qvel, dampingForces);
    for(int i=0; i<r; i++)
        dampingForces[i] *= dampingMassCoef;
    dampingMatrix->MultiplyVectorAdd(qvel, dampingForces);

    //printf("C=\n");
    //dampingMatrix->Print();
    //dampingMatrix->Save("C");

    for(int i=0; i<r; i++)
    {
        // set qresidual = F_n, for a subsequent solve M * qdelta = h * F_n
        qresidual[i] = externalForces[i] - internalForces[i] - dampingForces[i];
    }

    PerformanceCounter counterSystemSolveTime;

    // solve: M * qdelta = qresidual

    memset(qdelta, 0.0, sizeof(double)*r);

#ifdef PARDISO
    int info = pardisoSolver->SolveLinearSystem(qdelta, qresidual);
    char solverString[16] = "PARDISO";
#endif

#ifdef SPOOLES
    int info = spoolesSolver->SolveLinearSystem(qdelta, qresidual);
    char solverString[16] = "SPOOLES";
#endif

#ifdef PCG
    int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(qdelta, qresidual, 1e-6, 10000);
    if (info > 0)
        info = 0;
    char solverString[16] = "PCG";
#endif

    if (info != 0)
    {
        printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info);
        return 1;
    }

    counterSystemSolveTime.StopCounter();
    systemSolveTime = counterSystemSolveTime.GetElapsedTime();

    // update state
    if (symplectic)
    {
        for(int i=0; i<r; i++)
        {
            qvel[i] += timestep * qdelta[i];
            q[i] += timestep * qvel[i];
        }
    }
    else
    {
        for(int i=0; i<r; i++)
        {
            q[i] += timestep * qvel[i];
            qvel[i] += timestep * qdelta[i];
        }
    }

    for(int i=0; i<r; i++)
        qaccel[i] = qdelta[i];

    // constrain fixed DOFs
    for(int i=0; i<numConstrainedDOFs; i++)
        q[constrainedDOFs[i]] = qvel[constrainedDOFs[i]] = qaccel[constrainedDOFs[i]] = 0.0;

    return 0;
}
int CentralDifferencesSparse::DoTimestep()
{
  PerformanceCounter counterForceAssemblyTime;
    forceModel->GetInternalForce(q, internalForces);
    for (int i=0; i<r; i++)
      internalForces[i] *= internalForceScalingFactor;
  counterForceAssemblyTime.StopCounter();
  forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

  if (tangentialDampingMode > 0)
    if (timestepIndex % tangentialDampingMode == 0)
      DecomposeSystemMatrix(); // this routines also updates the damping and system matrices
  
  // update equation is (see WRIGGERS P.: Computational Contact Mechanics. John Wiley & Sons, Ltd., 2002., page 275) :
  //
  // (M + dt / 2 * C) * q(t+1) = (dt)^2 * (fext(t) - fint(q(t))) + dt / 2 * C * q(t-1) + M * (2q(t) - q(t-1))
  //
  // (M + dt / 2 * C) * (q(t+1) - q(t)) = (dt)^2 * (fext(t) - fint(q(t))) + dt / 2 * C * (q(t-1) - q(t)) + M * (q(t) - q(t-1)) 

  // fext are the external forces
  // fint is the vector of internal forces

  // compute rhs = (dt)^2 * (fext - fint(q(t))) + dt / 2 * C * (q(t-1) - q(t)) + M * (q(t) - q(t-1))
  // first, compute rhs = M * (q - q_1)
  for (int i=0; i<r; i++)
    buffer[i] = q[i] - q_1[i];
  massMatrix->MultiplyVector(buffer, rhs);
  
  // rhs += dt / 2 * dampingMatrix * (q_{n-1} - q_n)
  for (int i=0; i<r; i++)
    qdelta[i] = q_1[i] - q[i];
  rayleighDampingMatrix->MultiplyVector(qdelta, buffer);
  for (int i=0; i<r; i++)
    rhs[i] += 0.5 * timestep * buffer[i];

  // rhs += dt * dt * (fext - fint(q(t))) 
  double timestep2 = timestep * timestep;
  for (int i=0; i<r; i++)
    rhs[i] += timestep2 * (externalForces[i] - internalForces[i]);

  // now rhs contains the correct value

  RemoveRows(r, rhsConstrained, rhs, numConstrainedDOFs, constrainedDOFs);

  PerformanceCounter counterSystemSolveTime;

  memset(buffer, 0, sizeof(double) * r);

  #ifdef SPOOLES
    int info = spoolesSolver->SolveLinearSystem(buffer, rhsConstrained);
    char solverString[16] = "SPOOLES";
  #endif

  #ifdef PARDISO
    int info = pardisoSolver->SolveLinearSystem(buffer, rhsConstrained);
    char solverString[16] = "PARDISO";
  #endif
  
  #ifdef PCG
    int info = jacobiPreconditionedCGSolver->SolveLinearSystemWithJacobiPreconditioner(buffer, rhsConstrained, 1e-6, 10000);
    if (info > 0)
      info = 0;
    char solverString[16] = "PCG";
  #endif

  InsertRows(r, buffer, qdelta, numConstrainedDOFs, constrainedDOFs);

  counterSystemSolveTime.StopCounter();
  systemSolveTime = counterSystemSolveTime.GetElapsedTime();

  if (info != 0)
  {
    printf("Error: %s sparse solver returned non-zero exit status %d.\n", solverString, (int)info);
    return 1;
  }

  // the new value of q is now in buffer
  // update velocity, and previous and current positions
  for (int i=0; i<r; i++)
  {
    q_1[i] = q[i];
    qvel[i] = qdelta[i] / timestep;
    qaccel[i] = (qvel[i] - qvel_1[i]) / timestep;
    qvel_1[i] = qvel[i];
    qaccel_1[i] = qaccel[i];
    q[i] += qdelta[i];
  }

  timestepIndex++;

  return 0;
}
int CentralDifferencesDense::DoTimestep()
{
  if (r == 0)
    return 0;

  // the reduced force interpolation
  PerformanceCounter counterForceAssemblyTime;
  reducedForceModel->GetInternalForce(q,internalForces);
  counterForceAssemblyTime.StopCounter();
  forceAssemblyTime = counterForceAssemblyTime.GetElapsedTime();

  if (plasticfq != NULL)
  {
    SetTotalForces(internalForces);
    for(int i=0; i<r; i++)
      internalForces[i] -= plasticfq[i];
  }

  PerformanceCounter counterSystemSolveTime;

  for (int i=0; i<r; i++)
    internalForces[i] *= internalForceScalingFactor;

  if (tangentialDampingMode)
    UpdateLU();

  // update equation is:
  //
  // (massMatrix + dt / 2 * dampingMatrix) * q(t+1) = (dt)^2 * (fr - Rr(q(t))) + dt/2 * dampingMatrix * q(t-1) + massMatrix * (2q(t) - q(t-1))

  // LU decomposition of massMatrix + dt / 2 * Dr is available in L,U
  // fr = U^T * f are the reduced external forces
  // Rr is the vector of reduced internal forces

  // update equation follows from Newton's law
  // Mu'' = -Cu' - F_int + F_ext
  // Mu'' + Cu' + R(u) =  F_ext
  // R(u) = F_int

  // here, F_int is the external loading force necessary to sustain a certain deformation
  // it is opposite to the internal forces acting on the body in a given deformation state

  // compute rhs = (dt)^2 * (fr - Rr(q(t))) + dt/2 * dampingMatrix * q(t-1) + massMatrix * (2q(t) - q(t-1))
  // first, compute rhs = massMatrix * (2*q - q_1)
  for (int i=0; i<r; i++)
  {
    rhs[i] = 0;
    for (int j=0; j<r; j++)
      rhs[i] += massMatrix[ELT(r,i,j)] * (2 * q[j] - q_1[j]);
  }

  // rhs += dt / 2 * dampingMatrix * q_{n-1}
  for (int i=0; i<r; i++)
    for (int j=0; j<r; j++)
      rhs[i] += timestep / 2 * dampingMatrix[ELT(r,i,j)] * q_1[j];

  // rhs += dt * dt * (fr - Rr(q(t)))
  for (int i=0; i<r; i++)
    rhs[i] += timestep * timestep * (externalForces[i] - internalForces[i]);

  // now rhs contains the correct values

  // solve (M~ + dt/2 D~) * qnew = rhs
  // use data from the previously computed LU decomposition
  char trans='N';
  INTEGER nrhs = 1;
  INTEGER INFO;
  INTEGER R = r;
  DGETRS (&trans,&R,&nrhs,LUFactor,&R,IPIV->GetBuf(),rhs,&R,&INFO);

  if (INFO != 0)
  {
    printf("Error: DGETRS returned a non-zero exit code %d.\n", (int)INFO);
    return INFO;
  }

  // the solution qnew is now in rhs
  // update velocity
  // and update previous and current positions
  for (int i=0; i<r; i++)
  {
    qvel[i] = (rhs[i] - q[i]) / timestep;
    q_1[i] = q[i];
    q[i] = rhs[i];
  }

  ProcessPlasticDeformations();

  counterSystemSolveTime.StopCounter();
  systemSolveTime = counterSystemSolveTime.GetElapsedTime();

  return 0;
}
Exemple #17
0
NS_IMETHODIMP
WorkerThread::Dispatch(already_AddRefed<nsIRunnable> aRunnable, uint32_t aFlags)
{
  // May be called on any thread!
  nsCOMPtr<nsIRunnable> runnable(aRunnable); // in case we exit early

  // Workers only support asynchronous dispatch.
  if (NS_WARN_IF(aFlags != NS_DISPATCH_NORMAL)) {
    return NS_ERROR_UNEXPECTED;
  }

  const bool onWorkerThread = PR_GetCurrentThread() == mThread;

  if (GetSchedulerLoggingEnabled() && onWorkerThread && mWorkerPrivate) {
    PerformanceCounter* performanceCounter = mWorkerPrivate->GetPerformanceCounter();
    if (performanceCounter) {
      performanceCounter->IncrementDispatchCounter(DispatchCategory::Worker);
    }
  }

#ifdef DEBUG
  if (runnable && !onWorkerThread) {
    nsCOMPtr<nsICancelableRunnable> cancelable = do_QueryInterface(runnable);

    {
      MutexAutoLock lock(mLock);

      // Only enforce cancelable runnables after we've started the worker loop.
      if (!mAcceptingNonWorkerRunnables) {
        MOZ_ASSERT(cancelable,
                   "Only nsICancelableRunnable may be dispatched to a worker!");
      }
    }
  }
#endif

  WorkerPrivate* workerPrivate = nullptr;
  if (onWorkerThread) {
    // No need to lock here because it is only modified on this thread.
    MOZ_ASSERT(mWorkerPrivate);
    mWorkerPrivate->AssertIsOnWorkerThread();

    workerPrivate = mWorkerPrivate;
  } else {
    MutexAutoLock lock(mLock);

    MOZ_ASSERT(mOtherThreadsDispatchingViaEventTarget < UINT32_MAX);

    if (mWorkerPrivate) {
      workerPrivate = mWorkerPrivate;

      // Incrementing this counter will make the worker thread sleep if it
      // somehow tries to unset mWorkerPrivate while we're using it.
      mOtherThreadsDispatchingViaEventTarget++;
    }
  }

  nsresult rv;
  if (runnable && onWorkerThread) {
    RefPtr<WorkerRunnable> workerRunnable = workerPrivate->MaybeWrapAsWorkerRunnable(runnable.forget());
    rv = nsThread::Dispatch(workerRunnable.forget(), NS_DISPATCH_NORMAL);
  } else {
    rv = nsThread::Dispatch(runnable.forget(), NS_DISPATCH_NORMAL);
  }

  if (!onWorkerThread && workerPrivate) {
    // We need to wake the worker thread if we're not already on the right
    // thread and the dispatch succeeded.
    if (NS_SUCCEEDED(rv)) {
      MutexAutoLock workerLock(workerPrivate->mMutex);

      workerPrivate->mCondVar.Notify();
    }

    // Now unset our waiting flag.
    {
      MutexAutoLock lock(mLock);

      MOZ_ASSERT(mOtherThreadsDispatchingViaEventTarget);

      if (!--mOtherThreadsDispatchingViaEventTarget) {
        mWorkerPrivateCondVar.Notify();
      }
    }
  }

  if (NS_WARN_IF(NS_FAILED(rv))) {
    return rv;
  }

  return NS_OK;
}
void * MyFrame::LinearModesWorker(
      int numDesiredModes,
      int * r, double ** frequencies_, double ** modes_ )
{
  *r = -1;

  // create mass matrix
  SparseMatrix * massMatrix;
  GenerateMassMatrix::computeMassMatrix(precomputationState.simulationMesh, &massMatrix, true);

  // create stiffness matrix
  StVKElementABCD * precomputedIntegrals = StVKElementABCDLoader::load(precomputationState.simulationMesh);
  StVKInternalForces * internalForces = 
    new StVKInternalForces(precomputationState.simulationMesh, precomputedIntegrals);

  SparseMatrix * stiffnessMatrix;
  StVKStiffnessMatrix * stiffnessMatrixClass = new StVKStiffnessMatrix(internalForces);
  stiffnessMatrixClass->GetStiffnessMatrixTopology(&stiffnessMatrix);
  double * zero = (double*) calloc(3 * precomputationState.simulationMesh->getNumVertices(), sizeof(double));
  stiffnessMatrixClass->ComputeStiffnessMatrix(zero, stiffnessMatrix);

  free(zero);
  delete(precomputedIntegrals);
  delete(stiffnessMatrixClass);
  delete(internalForces);

  // constrain the degrees of freedom
  int numConstrainedVertices = (int) (precomputationState.fixedVertices.size());
  int * constrainedDOFs = (int*) malloc (sizeof(int) * 3 * numConstrainedVertices);
  set<int> :: iterator iter;
  int i = 0;
  for(iter = precomputationState.fixedVertices.begin(); iter != precomputationState.fixedVertices.end(); iter++)
  {
    constrainedDOFs[3*i+0] = 3 * (*iter) + 1;
    constrainedDOFs[3*i+1] = 3 * (*iter) + 2;
    constrainedDOFs[3*i+2] = 3 * (*iter) + 3;
    i++;
  }

  int oneIndexed = 1;
  massMatrix->RemoveRowsColumns(
    3 * numConstrainedVertices, constrainedDOFs, oneIndexed);

  stiffnessMatrix->RemoveRowsColumns(
    3 * numConstrainedVertices, constrainedDOFs, oneIndexed);

  // call ARPACK

  double * frequenciesTemp = (double*) malloc (sizeof(double) * numDesiredModes);
  int numRetainedDOFs = stiffnessMatrix->Getn();
  double * modesTemp = (double*) malloc 
    (sizeof(double) * numDesiredModes * numRetainedDOFs);

  printf("Computing linear modes using ARPACK: ...\n");
  PerformanceCounter ARPACKCounter;
  double sigma = -1.0;

  int numLinearSolverThreads = wxThread::GetCPUCount();
  if (numLinearSolverThreads > 3)
    numLinearSolverThreads = 3; // diminished returns in solver beyond 3 threads

  //massMatrix->Save("MFactory");
  //stiffnessMatrix->Save("KFactory");

  ARPACKSolver generalizedEigenvalueProblem;
  int nconv = generalizedEigenvalueProblem.SolveGenEigShInv
    (stiffnessMatrix, massMatrix, 
     numDesiredModes, frequenciesTemp, 
     modesTemp, sigma, numLinearSolverThreads);

  ARPACKCounter.StopCounter();
  double ARPACKTime = ARPACKCounter.GetElapsedTime();
  printf("ARPACK time: %G s.\n", ARPACKTime); fflush(NULL);

  if (nconv < numDesiredModes)
  {
    free(modesTemp);
    free(frequenciesTemp);
    *r = -3;
    free(constrainedDOFs);
    delete(massMatrix);
    delete(stiffnessMatrix);
    return NULL;
  }

  int n3 = 3 * precomputationState.simulationMesh->getNumVertices();
  *frequencies_ = (double*) calloc (numDesiredModes, sizeof(double));
  *modes_ = (double*) calloc (numDesiredModes * n3, sizeof(double));

  for(int i=0; i<numDesiredModes; i++)
  {
    // insert zero rows into the computed modes
    int oneIndexed = 1;
    InsertRows(n3, &modesTemp[numRetainedDOFs*i], &((*modes_)[n3*i]), 
      3 * numConstrainedVertices, constrainedDOFs, oneIndexed);
  }

  for(int i=0; i<numDesiredModes; i++)
  {
    if (frequenciesTemp[i] <= 0)
      (*frequencies_)[i] = 0.0;
    else
      (*frequencies_)[i] = sqrt((frequenciesTemp)[i]) / (2 * M_PI);
  }
 
  free(modesTemp);
  free(frequenciesTemp);
  free(constrainedDOFs);

  delete(massMatrix);
  delete(stiffnessMatrix);

  *r = numDesiredModes;

  return NULL;
}