const Constraint::LineOnLineContactImpl::VelocityCache&
Constraint::LineOnLineContactImpl::
ensureVelocityCacheRealized(const State& s) const {
    if (getMyMatterSubsystemRep().isCacheValueRealized(s, m_velCacheIx))
        return getVelocityCache(s);
    VelocityCache& vc = updVelocityCache(s);
    const SpatialVec& V_AF = getBodyVelocityFromState(s, m_mobod_F);
    const SpatialVec& V_AB = getBodyVelocityFromState(s, m_mobod_B);

    calcVelocityInfo(s, V_AF, V_AB, vc);

    getMyMatterSubsystemRep().markCacheValueRealized(s, m_velCacheIx);
    return vc;
}
// This costs about 175 flops if position info has already been calculated,
// otherwise we also pay for ensurePositionCacheRealized().
const Constraint::SphereOnSphereContactImpl::VelocityCache& 
Constraint::SphereOnSphereContactImpl::
ensureVelocityCacheRealized(const State& s) const {
    if (getMyMatterSubsystemRep().isCacheValueRealized(s, m_velCacheIx))
        return getVelocityCache(s);

    const PositionCache& pc = ensurePositionCacheRealized(s);
    VelocityCache& vc = updVelocityCache(s);

    const UnitVec3& Cx_A = pc.X_AC.x();
    const UnitVec3& Cy_A = pc.X_AC.y();
    const UnitVec3& Cz_A = pc.X_AC.z();

    const SpatialVec& V_AF = getBodyVelocityFromState(s, m_mobod_F);
    const Vec3&       w_AF = V_AF[0];
    const Vec3&       v_AF = V_AF[1];
    const SpatialVec& V_AB = getBodyVelocityFromState(s, m_mobod_B);
    const Vec3&       w_AB = V_AB[0];
    const Vec3&       v_AB = V_AB[1];

    // These are d/dt_A p_FSf and d/dt_A p_BSb
    const Vec3 wX_p_FSf_A = w_AF % pc.p_FSf_A;      // 9 flops
    const Vec3 wX_p_BSb_A = w_AB % pc.p_BSb_A;      // 9 flops
    const Vec3 v_ASf = v_AF + wX_p_FSf_A;           // 3 flops
    const Vec3 v_ASb = v_AB + wX_p_BSb_A;           // 3 flops
    vc.pd_SfSb_A = v_ASb - v_ASf;                   // 3 flops

    // These are the Coriolis accelerations of Sf and Sb, needed later.
    vc.wXwX_p_FSf_A = w_AF % wX_p_FSf_A;            // 9 flops
    vc.wXwX_p_BSb_A = w_AB % wX_p_BSb_A;            // 9 flops

    // Calculate the velocity of B's material point (station) at Co, 
    // measured in the F frame and expressed in A.
    const Vec3 pd_FB_A  = v_AB - v_AF;              //  3 flops
    const Vec3 vA_BCo_A = v_AB + w_AB % pc.p_BCo_A; // 12 flops
    const Vec3 vA_FCo_A = v_AF + w_AF % pc.p_FCo_A; // 12 flops
    vc.vF_BCo_A = vA_BCo_A - vA_FCo_A;              //  3 flops

    // These are the velocities in the A frame of the *contact point* locations
    // measured from F's and B's origins; these are not stations since the
    // contact point moves relative to the F and B frame.
    const Vec3 pd_FCo_A = w_AF % pc.p_FSf_A + pc.kf*vc.pd_SfSb_A; // 15 flops
    const Vec3 pd_BCo_A = pd_FCo_A - pd_FB_A;       //  3 flops
    vc.wXpd_FCo_A = w_AF % pd_FCo_A;                //  9 flops
    vc.wXpd_BCo_A = w_AB % pd_BCo_A;                //  9 flops

    // Calculate d/dt_A Cz.
    vc.Czd_A = pc.isSingular 
        ? w_AF % Cz_A // rare
        : pc.oor*(vc.pd_SfSb_A - (~vc.pd_SfSb_A*Cz_A)*Cz_A); // 12 flops

    // We also need d/dt_A of Cx and Cy, which we'll call Cxd and Cyd. Here's 
    // how to get those. Since the x-y directions are arbitrary in the plane, we
    // can assume that they are not rotating about z, that is, w_FC is in the 
    // x-y plane. Our strategy will be to work in the F frame here, because we
    // know that CzdF = d/dt_F Cz = w_FC % Cz, a vector perpendicular to both 
    // w_FC and Cz. But that means CzdF is in the x-y plane and since there
    // was no z component of w_FC it is just w_FC rotated 90 degrees. Since x
    // and y are also 90 degrees apart, we can get the derivatives we need:
    //    CxdF = -CzdF x Cy
    //    CydF =  CzdF x Cx
    // We can then convert those to A-frame derivatives. To get CzdF:
    //    CzdF = Czd - w_AF % Cz
    const Vec3 CzdF_A = vc.Czd_A - w_AF % Cz_A; // 12 flops
    const Vec3 CxdF_A = -CzdF_A % Cy_A;         // 12 flops
    const Vec3 CydF_A =  CzdF_A % Cx_A;         //  9 flops
    vc.Cxd_A = CxdF_A + w_AF % Cx_A;            // 12 flops
    vc.Cyd_A = CydF_A + w_AF % Cy_A;            // 12 flops

    getMyMatterSubsystemRep().markCacheValueRealized(s, m_velCacheIx);
    
    return vc;
}