void dgWorldDynamicUpdate::ResolveClusterForces(dgBodyCluster* const cluster, dgInt32 threadID, dgFloat32 timestep) const { if (cluster->m_activeJointCount) { SortClusters(cluster, timestep, threadID); } if (!cluster->m_isContinueCollision) { if (cluster->m_activeJointCount) { BuildJacobianMatrix (cluster, threadID, timestep); CalculateClusterReactionForces(cluster, threadID, timestep, DG_SOLVER_MAX_ERROR); //CalculateClusterReactionForces_1(cluster, threadID, timestep, DG_SOLVER_MAX_ERROR); } else { IntegrateExternalForce(cluster, timestep, threadID); } IntegrateVelocity (cluster, DG_SOLVER_MAX_ERROR, timestep, threadID); } else { // calculate reaction forces and new velocities BuildJacobianMatrix (cluster, threadID, timestep); IntegrateReactionsForces (cluster, threadID, timestep, DG_SOLVER_MAX_ERROR); // see if the island goes to sleep bool isAutoSleep = true; bool stackSleeping = true; dgInt32 sleepCounter = 10000; dgWorld* const world = (dgWorld*) this; const dgInt32 bodyCount = cluster->m_bodyCount; dgBodyInfo* const bodyArrayPtr = (dgBodyInfo*) &world->m_bodiesMemory[0]; dgBodyInfo* const bodyArray = &bodyArrayPtr[cluster->m_bodyStart]; const dgFloat32 forceDamp = DG_FREEZZING_VELOCITY_DRAG; dgFloat32 maxAccel = dgFloat32 (0.0f); dgFloat32 maxAlpha = dgFloat32 (0.0f); dgFloat32 maxSpeed = dgFloat32 (0.0f); dgFloat32 maxOmega = dgFloat32 (0.0f); const dgFloat32 speedFreeze = world->m_freezeSpeed2; const dgFloat32 accelFreeze = world->m_freezeAccel2; const dgVector forceDampVect (forceDamp, forceDamp, forceDamp, dgFloat32 (0.0f)); for (dgInt32 i = 1; i < bodyCount; i ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[i].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { dgAssert (body->m_invMass.m_w); const dgFloat32 accel2 = body->m_accel.DotProduct3(body->m_accel); const dgFloat32 alpha2 = body->m_alpha.DotProduct3(body->m_alpha); const dgFloat32 speed2 = body->m_veloc.DotProduct3(body->m_veloc); const dgFloat32 omega2 = body->m_omega.DotProduct3(body->m_omega); maxAccel = dgMax (maxAccel, accel2); maxAlpha = dgMax (maxAlpha, alpha2); maxSpeed = dgMax (maxSpeed, speed2); maxOmega = dgMax (maxOmega, omega2); bool equilibrium = (accel2 < accelFreeze) && (alpha2 < accelFreeze) && (speed2 < speedFreeze) && (omega2 < speedFreeze); if (equilibrium) { dgVector veloc (body->m_veloc * forceDampVect); dgVector omega = body->m_omega * forceDampVect; body->m_veloc = (dgVector (veloc.DotProduct4(veloc)) > m_velocTol) & veloc; body->m_omega = (dgVector (omega.DotProduct4(omega)) > m_velocTol) & omega; } body->m_equilibrium = dgUnsigned32 (equilibrium); stackSleeping &= equilibrium; isAutoSleep &= body->m_autoSleep; sleepCounter = dgMin (sleepCounter, body->m_sleepingCounter); } // clear accel and angular acceleration body->m_accel = dgVector::m_zero; body->m_alpha = dgVector::m_zero; } if (isAutoSleep) { if (stackSleeping) { // the island went to sleep mode, for (dgInt32 i = 1; i < bodyCount; i ++) { dgBody* const body = bodyArray[i].m_body; dgAssert (body->IsRTTIType (dgBody::m_dynamicBodyRTTI) || body->IsRTTIType (dgBody::m_kinematicBodyRTTI)); body->m_accel = dgVector::m_zero; body->m_alpha = dgVector::m_zero; body->m_veloc = dgVector::m_zero; body->m_omega = dgVector::m_zero; } } else { // island is not sleeping but may be resting with small residual velocity for a long time // see if we can force to go to sleep if ((maxAccel > world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxAccel) || (maxAlpha > world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxAlpha) || (maxSpeed > world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxVeloc) || (maxOmega > world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxOmega)) { for (dgInt32 i = 1; i < bodyCount; i ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[i].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->m_sleepingCounter = 0; } } } else { dgInt32 index = 0; for (dgInt32 i = 0; i < DG_SLEEP_ENTRIES; i ++) { if ((maxAccel <= world->m_sleepTable[i].m_maxAccel) && (maxAlpha <= world->m_sleepTable[i].m_maxAlpha) && (maxSpeed <= world->m_sleepTable[i].m_maxVeloc) && (maxOmega <= world->m_sleepTable[i].m_maxOmega)) { index = i; break; } } dgInt32 timeScaleSleepCount = dgInt32 (dgFloat32 (60.0f) * sleepCounter * timestep); if (timeScaleSleepCount > world->m_sleepTable[index].m_steps) { // force island to sleep stackSleeping = true; for (dgInt32 i = 1; i < bodyCount; i ++) { dgBody* const body = bodyArray[i].m_body; dgAssert (body->IsRTTIType (dgBody::m_dynamicBodyRTTI) || body->IsRTTIType (dgBody::m_kinematicBodyRTTI)); body->m_accel = dgVector::m_zero; body->m_alpha = dgVector::m_zero; body->m_veloc = dgVector::m_zero; body->m_omega = dgVector::m_zero; body->m_equilibrium = true; } } else { sleepCounter ++; for (dgInt32 i = 1; i < bodyCount; i ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[i].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->m_sleepingCounter = sleepCounter; } } } } } } if (!(isAutoSleep & stackSleeping)) { // island is not sleeping, need to integrate island velocity const dgUnsigned32 lru = world->GetBroadPhase()->m_lru; const dgInt32 jointCount = cluster->m_jointCount; dgJointInfo* const constraintArrayPtr = (dgJointInfo*) &world->m_jointsMemory[0]; dgJointInfo* const constraintArray = &constraintArrayPtr[cluster->m_jointStart]; dgFloat32 timeRemaining = timestep; const dgFloat32 timeTol = dgFloat32 (0.01f) * timestep; for (dgInt32 i = 0; (i < DG_MAX_CONTINUE_COLLISON_STEPS) && (timeRemaining > timeTol); i ++) { // calculate the closest time to impact dgFloat32 timeToImpact = timeRemaining; for (dgInt32 j = 0; (j < jointCount) && (timeToImpact > timeTol); j ++) { dgContact* const contact = (dgContact*) constraintArray[j].m_joint; if (contact->GetId() == dgConstraint::m_contactConstraint) { dgDynamicBody* const body0 = (dgDynamicBody*)contact->m_body0; dgDynamicBody* const body1 = (dgDynamicBody*)contact->m_body1; if (body0->m_continueCollisionMode | body1->m_continueCollisionMode) { dgVector p; dgVector q; dgVector normal; timeToImpact = dgMin (timeToImpact, world->CalculateTimeToImpact (contact, timeToImpact, threadID, p, q, normal, dgFloat32 (-1.0f / 256.0f))); } } } if (timeToImpact > timeTol) { timeRemaining -= timeToImpact; for (dgInt32 j = 1; j < bodyCount; j ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[j].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->IntegrateVelocity(timeToImpact); body->UpdateWorlCollisionMatrix(); } } } else { if (timeToImpact >= dgFloat32 (-1.0e-5f)) { for (dgInt32 j = 1; j < bodyCount; j++) { dgDynamicBody* const body = (dgDynamicBody*)bodyArray[j].m_body; if (body->IsRTTIType(dgBody::m_dynamicBodyRTTI)) { body->IntegrateVelocity(timeToImpact); body->UpdateWorlCollisionMatrix(); } } } CalculateClusterContacts (cluster, timeRemaining, lru, threadID); BuildJacobianMatrix (cluster, threadID, 0.0f); IntegrateReactionsForces (cluster, threadID, 0.0f, DG_SOLVER_MAX_ERROR); bool clusterReceding = true; const dgFloat32 step = timestep * dgFloat32 (1.0f / DG_MAX_CONTINUE_COLLISON_STEPS); for (dgInt32 k = 0; (k < DG_MAX_CONTINUE_COLLISON_STEPS) && clusterReceding; k ++) { dgFloat32 smallTimeStep = dgMin (step, timeRemaining); timeRemaining -= smallTimeStep; for (dgInt32 j = 1; j < bodyCount; j ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[j].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->IntegrateVelocity (smallTimeStep); body->UpdateWorlCollisionMatrix(); } } clusterReceding = false; if (timeRemaining > timeTol) { CalculateClusterContacts (cluster, timeRemaining, lru, threadID); bool isColliding = false; for (dgInt32 j = 0; (j < jointCount) && !isColliding; j ++) { dgContact* const contact = (dgContact*) constraintArray[j].m_joint; if (contact->GetId() == dgConstraint::m_contactConstraint) { const dgBody* const body0 = contact->m_body0; const dgBody* const body1 = contact->m_body1; const dgVector& veloc0 = body0->m_veloc; const dgVector& veloc1 = body1->m_veloc; const dgVector& omega0 = body0->m_omega; const dgVector& omega1 = body1->m_omega; const dgVector& com0 = body0->m_globalCentreOfMass; const dgVector& com1 = body1->m_globalCentreOfMass; for (dgList<dgContactMaterial>::dgListNode* node = contact->GetFirst(); node; node = node->GetNext()) { const dgContactMaterial* const contactMaterial = &node->GetInfo(); dgVector vel0 (veloc0 + omega0.CrossProduct3(contactMaterial->m_point - com0)); dgVector vel1 (veloc1 + omega1.CrossProduct3(contactMaterial->m_point - com1)); dgVector vRel (vel0 - vel1); dgAssert (contactMaterial->m_normal.m_w == dgFloat32 (0.0f)); dgFloat32 speed = vRel.DotProduct4(contactMaterial->m_normal).m_w; isColliding |= (speed < dgFloat32 (0.0f)); } } } clusterReceding = !isColliding; } } } } if (timeRemaining > dgFloat32 (0.0)) { for (dgInt32 j = 1; j < bodyCount; j ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[j].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->IntegrateVelocity(timeRemaining); body->UpdateCollisionMatrix (timeRemaining, threadID); } } } else { for (dgInt32 j = 1; j < bodyCount; j ++) { dgDynamicBody* const body = (dgDynamicBody*) bodyArray[j].m_body; if (body->IsRTTIType (dgBody::m_dynamicBodyRTTI)) { body->UpdateCollisionMatrix (timestep, threadID); } } } } } }
int main() { std::mt19937 generator(time(nullptr)); sys::ComputeSystem cs; cs.create(sys::ComputeSystem::_gpu); sys::ComputeProgram prog; prog.loadFromFile("resources/neoKernels.cl", cs); // --------------------------- Create the Sparse Coder --------------------------- cl::Image2D inputImage = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), 64, 64); std::ifstream fromFile("resources/train-images.idx3-ubyte", std::ios::binary | std::ios::in); if (!fromFile.is_open()) { std::cerr << "Could not open train-images.idx3-ubyte!" << std::endl; return 1; } std::vector<neo::PredictiveHierarchy::LayerDesc> layerDescs(4); layerDescs[0]._size = { 64, 64 }; layerDescs[0]._feedForwardRadius = 8; layerDescs[1]._size = { 48, 48 }; layerDescs[2]._size = { 32, 32 }; layerDescs[3]._size = { 24, 24 }; neo::PredictiveHierarchy ph; ph.createRandom(cs, prog, { 64, 64 }, layerDescs, { -0.01f, 0.01f }, { 0.01f, 0.05f }, 0.1f, generator); float avgError = 1.0f; float avgErrorDecay = 0.1f; sf::RenderWindow window; window.create(sf::VideoMode(1024, 512), "MNIST Video Test"); vis::Plot plot; plot._curves.push_back(vis::Curve()); plot._curves[0]._name = "Squared Error"; std::uniform_int_distribution<int> digitDist(0, 59999); std::uniform_real<float> dist01(0.0f, 1.0f); sf::RenderTexture rt; rt.create(64, 64); sf::Image digit0; sf::Texture digit0Tex; sf::Image digit1; sf::Texture digit1Tex; sf::Image pred; sf::Texture predTex; digit0.create(28, 28); digit1.create(28, 28); pred.create(rt.getSize().x, rt.getSize().y); const float boundingSize = (64 - 28) / 2; const float center = 32; const float minimum = center - boundingSize; const float maximum = center + boundingSize; float avgError2 = 1.0f; const float avgError2Decay = 0.01f; std::vector<float> prediction(64 * 64, 0.0f); for (int iter = 0; iter < 10000; iter++) { // Select digit indices int d0 = digitDist(generator); int d1 = digitDist(generator); // Load digits Image img0, img1; loadMNISTimage(fromFile, d0, img0); loadMNISTimage(fromFile, d1, img1); for (int x = 0; x < digit0.getSize().x; x++) for (int y = 0; y < digit0.getSize().y; y++) { int index = x + y * digit0.getSize().x; sf::Color c = sf::Color::White; c.a = img0._intensities[index]; digit0.setPixel(x, y, c); } digit0Tex.loadFromImage(digit0); for (int x = 0; x < digit1.getSize().x; x++) for (int y = 0; y < digit1.getSize().y; y++) { int index = x + y * digit1.getSize().x; sf::Color c = sf::Color::White; c.a = img1._intensities[index]; digit1.setPixel(x, y, c); } digit1Tex.loadFromImage(digit1); sf::Vector2f vel0(dist01(generator) * 2.0f - 1.0f, dist01(generator) * 2.0f - 1.0f); sf::Vector2f vel1(dist01(generator) * 2.0f - 1.0f, dist01(generator) * 2.0f - 1.0f); sf::Vector2f pos0(dist01(generator) * (maximum - minimum) + minimum, dist01(generator) * (maximum - minimum) + minimum); sf::Vector2f pos1(dist01(generator) * (maximum - minimum) + minimum, dist01(generator) * (maximum - minimum) + minimum); float vel0mul = dist01(generator) * 6.0f / std::max(1.0f, std::sqrt(vel0.x * vel0.x + vel0.y + vel0.y)); vel0 *= vel0mul; float vel1mul = dist01(generator) * 6.0f / std::max(1.0f, std::sqrt(vel1.x * vel1.x + vel1.y + vel1.y)); vel1 *= vel1mul; // Render video for (int f = 0; f < 20; f++) { sf::Event windowEvent; while (window.pollEvent(windowEvent)) { switch (windowEvent.type) { case sf::Event::Closed: return 0; } } pos0 += vel0; pos1 += vel1; if (pos0.x < minimum) { pos0.x = minimum; vel0.x *= -1.0f; } else if (pos0.x > maximum) { pos0.x = maximum; vel0.x *= -1.0f; } if (pos0.y < minimum) { pos0.y = minimum; vel0.y *= -1.0f; } else if (pos0.y > maximum) { pos0.y = maximum; vel0.y *= -1.0f; } if (pos1.x < minimum) { pos1.x = minimum; vel1.x *= -1.0f; } else if (pos1.x > maximum) { pos1.x = maximum; vel1.x *= -1.0f; } if (pos1.y < minimum) { pos1.y = minimum; vel1.y *= -1.0f; } else if (pos1.y > maximum) { pos1.y = maximum; vel1.y *= -1.0f; } window.clear(); rt.clear(sf::Color::Black); sf::Sprite s0; s0.setTexture(digit0Tex); s0.setOrigin(28 / 2, 28 / 2); s0.setPosition(pos0); rt.draw(s0); sf::Sprite s1; s1.setTexture(digit1Tex); s1.setOrigin(28 / 2, 28 / 2); s1.setPosition(pos1); rt.draw(s1); rt.display(); // Get input image sf::Image res = rt.getTexture().copyToImage(); // Show RT const float scale = 4.0f; sf::Sprite s; s.setScale(scale, scale); s.setTexture(rt.getTexture()); window.draw(s); std::vector<float> input(64 * 64); // Train if (sf::Keyboard::isKeyPressed(sf::Keyboard::T)) { for (int x = 0; x < res.getSize().x; x++) for (int y = 0; y < res.getSize().y; y++) { input[x + y * 64] = prediction[x + y * 64]; } } else { const float predictionIncorporateRatio = 0.1f; for (int x = 0; x < res.getSize().x; x++) for (int y = 0; y < res.getSize().y; y++) { input[x + y * 64] = (1.0f - predictionIncorporateRatio) * res.getPixel(x, y).r / 255.0f + predictionIncorporateRatio * prediction[x + y * 64]; } } // Error float error = 0.0f; for (int x = 0; x < res.getSize().x; x++) for (int y = 0; y < res.getSize().y; y++) { error += std::pow(res.getPixel(x, y).r / 255.0f - prediction[x + y * 64], 2); } error /= res.getSize().x * res.getSize().y; avgError2 = (1.0f - avgError2Decay) * avgError2 + avgError2Decay * error; std::cout << "Squared Error: " << avgError2 << std::endl; cs.getQueue().enqueueWriteImage(inputImage, CL_TRUE, { 0, 0, 0 }, { 64, 64, 1 }, 0, 0, input.data()); ph.simStep(cs, inputImage); cs.getQueue().enqueueReadImage(ph.getPrediction(), CL_TRUE, { 0, 0, 0 }, { 64, 64, 1 }, 0, 0, prediction.data()); // Show prediction for (int x = 0; x < rt.getSize().x; x++) for (int y = 0; y < rt.getSize().y; y++) { sf::Color c = sf::Color::White; c.r = c.b = c.g = std::min(1.0f, std::max(0.0f, prediction[x + y * 64])) * 255.0f; pred.setPixel(x, y, c); } predTex.loadFromImage(pred); sf::Sprite sp; sp.setTexture(predTex); sp.setScale(scale, scale); sp.setPosition(window.getSize().x - scale * rt.getSize().x, 0); window.draw(sp); /*sf::Image sdr; sdr.create(prsdr.getLayerDescs().front()._width, prsdr.getLayerDescs().front()._height); for (int x = 0; x < sdr.getSize().x; x++) for (int y = 0; y < sdr.getSize().y; y++) { sf::Color c = sf::Color::White; c.r = c.g = c.b = prsdr.getLayers().front()._sdr.getHiddenState(x, y) * 255.0f; sdr.setPixel(x, y, c); } sf::Texture sdrTex; sdrTex.loadFromImage(sdr); sf::Sprite sdrS; sdrS.setTexture(sdrTex); sdrS.setPosition(0.0f, window.getSize().y - sdrTex.getSize().y * scale); sdrS.setScale(scale, scale); window.draw(sdrS);*/ window.display(); if (sf::Keyboard::isKeyPressed(sf::Keyboard::Escape)) return 0; } } /*sf::RenderTexture rt; rt.create(1024, 1024); sf::Texture lineGradientTexture; lineGradientTexture.loadFromFile("resources/lineGradient.png"); sf::Font tickFont; tickFont.loadFromFile("resources/arial.ttf"); plot.draw(rt, lineGradientTexture, tickFont, 1.0f, sf::Vector2f(0.0f, step), sf::Vector2f(0.0f, 1.0f), sf::Vector2f(128.0f, 128.0f), sf::Vector2f(500.0f, 0.1f), 2.0f, 3.0f, 1.5f, 3.0f, 20.0f, 6); rt.display(); rt.getTexture().copyToImage().saveToFile("plot.png");*/ return 0; }