diff --git a/examples/multibody/deformable/mpm_cloth.cc b/examples/multibody/deformable/mpm_cloth.cc index 8f7f9263b50d..8ea9d3fa5df1 100644 --- a/examples/multibody/deformable/mpm_cloth.cc +++ b/examples/multibody/deformable/mpm_cloth.cc @@ -55,7 +55,8 @@ DEFINE_string(contact_approximation, "sap", // NOTE (changyu): here we choose k=100 for smaller amount of penetration (0.01mm or 1e-5m). DEFINE_double(stiffness, 10.0, "Contact Stiffness."); DEFINE_double(friction, 0.0, "Contact Friction."); -DEFINE_double(damping, 1e-5, +DEFINE_double(margin, 1.0, "MPM-Rigid Margin."); +DEFINE_double(damping, 1e-2, "Hunt and Crossley damping for the deformable body, only used when " "'contact_approximation' is set to 'lagged' or 'similar' [s/m]."); DEFINE_bool(exact_line_search, false, "Enable exact_line_search for contact solving."); @@ -207,11 +208,23 @@ int do_main() { std::vector indices; for (int i = 0; i < length; ++i) { for (int j = 0; j < width; ++j) { - double z = FLAGS_testcase == 100 ? 0.051 : (FLAGS_testcase == 3? 0.26 : 0.3 + k * 0.1); + double z = (FLAGS_testcase == 100 || FLAGS_testcase == 111) ? 0.051 : (FLAGS_testcase == 3? 0.26 : 0.3 + k * 0.1); if (FLAGS_testcase == 2) z = 0.27; if (FLAGS_testcase == 5) z = 0.18; - inital_pos.emplace_back((0.5 - 0.5 * l) + i * dx + k * 0.01, (0.5 - 0.5 * l) + j * dx + k * 0.01, z); - inital_vel.emplace_back(0., 0., 0.); + double x = (0.5 - 0.5 * l) + i * dx + k * 0.01; + double y = (0.5 - 0.5 * l) + j * dx + k * 0.01; + inital_pos.emplace_back(x, y, z); + + if (FLAGS_testcase == 111 || FLAGS_testcase == 222) { + double omega = 50.0; + double vx = -omega * (y - 0.5); + double vy = omega * (x - 0.5); + double vz = 0.0; + + inital_vel.emplace_back(vx, vy, vz); + } else { + inital_vel.emplace_back(0., 0., 0.); + } } } @@ -239,6 +252,7 @@ int do_main() { mpm_config.contact_damping = FLAGS_damping; mpm_config.contact_friction_mu = FLAGS_friction; mpm_config.exact_line_search = FLAGS_exact_line_search; + mpm_config.margin = FLAGS_margin; deformable_model.SetMpmConfig(std::move(mpm_config)); /* All rigid and deformable models have been added. Finalize the plant. */ @@ -301,7 +315,7 @@ int do_main() { meshcat->StopRecording(); meshcat->PublishRecording(); - std::ofstream htmlFile("/home/changyu/Desktop/cloth.html"); + std::ofstream htmlFile("/home/changyu/drake/cloth.html"); htmlFile << meshcat->StaticHtml(); htmlFile.close(); } diff --git a/multibody/gpu_mpm/cpu_mpm_model.h b/multibody/gpu_mpm/cpu_mpm_model.h index d6e6c183e70e..4053f9f24679 100644 --- a/multibody/gpu_mpm/cpu_mpm_model.h +++ b/multibody/gpu_mpm/cpu_mpm_model.h @@ -23,6 +23,7 @@ struct MpmConfigParams { int contact_query_frequency{1}; int mpm_bc{-1}; bool exact_line_search {false}; + T margin{static_cast(1.0)}; }; // NOTE(changyu): `CpuMpmModel` is responsive to store the initial config in `DeformableModel`, diff --git a/multibody/gpu_mpm/cuda_mpm_kernels.cuh b/multibody/gpu_mpm/cuda_mpm_kernels.cuh index 3af64ae37797..f66309d6bc44 100644 --- a/multibody/gpu_mpm/cuda_mpm_kernels.cuh +++ b/multibody/gpu_mpm/cuda_mpm_kernels.cuh @@ -69,6 +69,10 @@ __global__ void initialize_fem_state_kernel( } } +/** + \param[in] F \in 2x2 + \param[out] dphi_dF \in 2x2 +*/ template __device__ __host__ inline void fixed_corotated_PK1_2D(const T* F, T* dphi_dF) { @@ -77,17 +81,55 @@ inline void fixed_corotated_PK1_2D(const T* F, T* dphi_dF) { T R[4]; matmulT<2, 2, 2, T>(U, V, R); T J = determinant2(F); - T Finv[4]; - inverse2(F, Finv); - dphi_dF[0] = T(2.) * config::MU * (F[0] - R[0]) + config::LAMBDA * (J - T(1.)) * J * Finv[0]; - dphi_dF[1] = T(2.) * config::MU * (F[1] - R[1]) + config::LAMBDA * (J - T(1.)) * J * Finv[2]; - dphi_dF[2] = T(2.) * config::MU * (F[2] - R[2]) + config::LAMBDA * (J - T(1.)) * J * Finv[1]; - dphi_dF[3] = T(2.) * config::MU * (F[3] - R[3]) + config::LAMBDA * (J - T(1.)) * J * Finv[3]; + T JFinvT[4]; + cofactor_matrix_2x2(F, JFinvT); + + // P.noalias() = (T)2 * mu * (s.F - s.R) + lambda * (s.J - 1) * s.JFinvT; + dphi_dF[0] = T(2.) * config::MU * (F[0] - R[0]) + config::LAMBDA * (J - T(1.)) * JFinvT[0]; + dphi_dF[1] = T(2.) * config::MU * (F[1] - R[1]) + config::LAMBDA * (J - T(1.)) * JFinvT[1]; + dphi_dF[2] = T(2.) * config::MU * (F[2] - R[2]) + config::LAMBDA * (J - T(1.)) * JFinvT[2]; + dphi_dF[3] = T(2.) * config::MU * (F[3] - R[3]) + config::LAMBDA * (J - T(1.)) * JFinvT[3]; } +/** + \param[in] F \in 2x2 + \param[in] dF \in 2x2 + \param[out] dP \in 2x2 +*/ template __device__ __host__ -inline void compute_dphi_dF(const T* F, T* dphi_dF) { +inline void fixed_corotated_PK1_differential_2D(const T *F, const T* dF, T* dP) { + T U[4], sig[4], V[4]; + svd2x2(F, U, sig, V); + T R[4], S[4], tmp[4]; + matmulT<2, 2, 2, T>(U, V, R); + matmul<2, 2, 2, T>(V, sig, tmp); + matmulT<2, 2, 2, T>(tmp, V, S); + T J = determinant2(F); + T JFinvT[4]; + cofactor_matrix_2x2(F, JFinvT); + + // dP.noalias() = lambda * s.JFinvT.cwiseProduct(dF).sum() * s.JFinvT; + T JFinvT_cwiseProduct_dF_sum = JFinvT[0] * dF[0] + JFinvT[1] * dF[1] + JFinvT[2] * dF[2] + JFinvT[3] * dF[3]; + for (int i = 0; i < 4; ++i) dP[i] = config::LAMBDA * JFinvT_cwiseProduct_dF_sum * JFinvT[i]; + + // dP += 2 * mu * dF; + for (int i = 0; i < 4; ++i) dP[i] += T(2.) * config::MU * dF[i]; + + // addScaledRotationalDifferential(s.R, s.S, dF, -2 * mu, dP); + add_scaled_rotational_differential_2x2(R, S, dF, -T(2.) * config::MU, dP); + + // addScaledCofactorMatrixDifferential(s.F, dF, lambda * (s.J - (T)1), dP); + add_scaled_cofactor_matrix_differential_2x2(F, dF, config::LAMBDA * (J - T(1.)), dP); +} + +/** + \param[in] F \in 3x3 + \param[out] dphi_dF \in 3x3 +*/ +template +__device__ __host__ +inline void first_piola(const T* F, T* dphi_dF) { // A00=0, A01=1, A02=2 // A10=3, A11=4, A12=5 // A20=6, A21=7, A22=8 @@ -153,6 +195,145 @@ inline void compute_dphi_dF(const T* F, T* dphi_dF) { dphi_dF[8] = P_plane[8] + P_nonplane[8]; } +/** + \param[in] F \in 3x3 + \param[in] dF \in 3x3 + \param[out] dP \in 3x3 +*/ +// Phat = Phat(R) +// P(F) = Q(F) Phat(R) = Q(F) Phat(R(F)) +// dP(F) = dQ(F) Phat(R(F)) + Q(F) dPhatdR(R(F)):dRdF(F):dF +// = dQ(F) Phat(R(F)) + Q(F) (dPhatdR(R(F)):dR) +// = dQ(F) Phat(R(F)) + Q(F) dPhat(R(F)) +template +__device__ __host__ +inline void first_piola_differential(const T* F, const T* dF, T* dP) { + // updateScratch(const TM& new_F, Scratch& s) { ... + // A00=0, A01=1, A02=2 + // A10=3, A11=4, A12=5 + // A20=6, A21=7, A22=8 + T Q[9], R[9]; + givens_QR<3, 3, T>(F, Q, R); + T R_hat[4] = { + R[0], R[1], + R[3], R[4] + }; + T dphi_dF_2x2[4]; + fixed_corotated_PK1_2D(R_hat, dphi_dF_2x2); + T P_hat[9] = { + dphi_dF_2x2[0], dphi_dF_2x2[1], 0, + dphi_dF_2x2[2], dphi_dF_2x2[3], 0, + 0 , 0, 0 + }; + T P_plane[9]; + matmul<3, 3, 3, T>(Q, P_hat, P_plane); + + T rr = R[2] * R[2] + R[5] * R[5]; + T g = config::GAMMA * rr; + T gp = config::GAMMA; + T fp = 0; + if (R[8] < T(1.)) { + fp = -config::K * (T(1.) - R[8]) * (T(1.) - R[8]); + } + + T A[9]; + // TM A = TM::Zero(); + // A(0, 0) = gp * sqr(s.R(0, 2)); + // A(0, 1) = gp * s.R(0, 2) * s.R(1, 2); + // A(0, 2) = gp * s.R(2, 2) * s.R(0, 2); + // A(1, 1) = gp * sqr(s.R(1, 2)); + // A(1, 2) = gp * s.R(2, 2) * s.R(1, 2); + // A(2, 2) = fp * s.R(2, 2); + // A(1, 0) = A(0, 1); + // A(2, 0) = A(0, 2); + // A(2, 1) = A(1, 2); + A[0] = gp * R[2] * R[2]; + A[1] = gp * R[2] * R[5]; + A[2] = gp * R[8] * R[2]; + A[4] = gp * R[5] * R[5]; + A[5] = gp * R[8] * R[5]; + A[8] = fp * R[8]; + A[3] = A[1]; + A[6] = A[2]; + A[7] = A[5]; + + T P_nonplane[9]; + T Rinv[9], QA[9]; + inverse3(R, Rinv); + matmul<3, 3, 3, T>(Q, A, QA); + matmulT<3, 3, 3, T>(QA, Rinv, P_nonplane); + // ... } + + // TM dQ, dR; + // EIGEN_EXT::QRDifferential(s.Q, s.R, dF, dQ, dR); + T dQ[9], dR[9]; + QR_differential_3x3(Q, R, dF, dQ, dR); + + // Matrix dPhat; + // corotated.firstPiolaDifferential(s.corotated_scratch, dR.template topLeftCorner<2, 2>(), dPhat); + T dPhat[4]; + T dR_hat[4] = { + dR[0], dR[1], + dR[3], dR[4] + }; + fixed_corotated_PK1_differential_2D(R_hat, dR_hat, dPhat); + + // TM dPhat_full = TM::Zero(); + T dPhat_full[9]; + for (int i = 0; i < 9; ++i) dPhat_full[i] = 0; + + // dPhat_full.template topLeftCorner<2, 2>() = dPhat; + dPhat_full[0] = dPhat[0]; + dPhat_full[1] = dPhat[1]; + dPhat_full[3] = dPhat[2]; + dPhat_full[4] = dPhat[3]; + + // dP = s.Q * dPhat_full + dQ * s.Phat; + T QdPhat_full[9], dQPhat[9]; + matmul<3, 3, 3, T>(Q, dPhat_full, QdPhat_full); + matmul<3, 3, 3, T>(dQ, P_hat, dQPhat); + for (int i = 0; i < 9; ++i) dP[i] = QdPhat_full[i] + dQPhat[i]; + + // TM dA; + T dA[9]; + + // T gp = gamma; // dgp = 0; + // T fp = 0, dfp = 0; + // if (s.R(2, 2) < (T)1) { + // T zz = 1 - s.R(2, 2); + // fp = -k * sqr(zz); + // dfp = 2 * k * zz * dR(2, 2); + // } + T dfp = 0; // NOTE: others defined above + if (R[8] < T(1.)) { + T zz = T(1.) - R[8]; + dfp = T(2.) * config::K * zz * dR[8]; + } + + dA[0] = gp * T(2.) * R[2] * dR[2]; + dA[1] = gp * (dR[2] * R[5] + R[2] * dR[5]); + dA[2] = gp * (dR[8] * R[2] + R[8] * dR[2]); + dA[4] = gp * T(2.) * R[5] * dR[5]; + dA[5] = gp * (dR[8] * R[5] + R[8] * dR[5]); + dA[8] = dfp * R[8] + fp * dR[8]; + dA[3] = dA[1]; + dA[6] = dA[2]; + dA[7] = dA[5]; + + // dP += (s.Q * dA - s.P_nonplane * dR.transpose()) * s.R_inverse.transpose() - s.Q * dQ.transpose() * s.P_nonplane; + T Q_dA[9], P_nonplane_dRT[9], Q_dA_minus_P_nonplane_dRt[9], Q_dA_minus_P_nonplane_dRt_RinvT[9]; + matmul<3, 3, 3, T>(Q, dA, Q_dA); + matmulT<3, 3, 3, T>(P_nonplane, dR, P_nonplane_dRT); + for (int i = 0; i < 9; ++i) Q_dA_minus_P_nonplane_dRt[i] = Q_dA[i] - P_nonplane_dRT[i]; + matmulT<3, 3, 3, T>(Q_dA_minus_P_nonplane_dRt, Rinv, Q_dA_minus_P_nonplane_dRt_RinvT); + + T Q_dQT[9], Q_dQT_P_nonplane[9]; + matmulT<3, 3, 3, T>(Q, dQ, Q_dQT); + matmul<3, 3, 3, T>(Q_dQT, P_nonplane, Q_dQT_P_nonplane); + + for (int i = 0; i < 9; ++i) dP[i] += Q_dA_minus_P_nonplane_dRt_RinvT[i] - Q_dQT_P_nonplane[i]; +} + template __device__ __host__ inline void project_strain(T* F) { @@ -190,13 +371,14 @@ inline void project_strain(T* F) { matmul<3, 3, 3, T>(Q, R, F); } -template +template __global__ void calc_fem_state_and_force_kernel( const size_t n_faces, const int* indices, const int* index_mappings, const T* volumes, const T* affine_matrices, + const T* affine_matrices_star, const T* Dm_inverses, T* positions, T* velocities, @@ -217,7 +399,18 @@ __global__ void calc_fem_state_and_force_kernel( } T* F = &deformation_gradients[idx * 9]; - const T* C = &affine_matrices[face_pid * 9]; + T C[9]; + if constexpr(POST_CONTACT) { + #pragma unroll + for (int i = 0; i < 9; ++i) { + C[i] = (affine_matrices[face_pid * 9 + i] - affine_matrices_star[face_pid * 9 + i]); + } + } else { + #pragma unroll + for (int i = 0; i < 9; ++i) { + C[i] = affine_matrices[face_pid * 9 + i]; + } + } T ctF[9]; // cotangent F // Eq.4 in Jiang et.al 2017, dE_p, β(x̂) = (∇x̂)p dE,n_p, β @@ -265,7 +458,7 @@ __global__ void calc_fem_state_and_force_kernel( } T VP_local[9]; - compute_dphi_dF(ctF, VP_local); + first_piola(ctF, VP_local); #pragma unroll for (int i = 0; i < 9; ++i) { VP_local[i] *= volumes[face_pid]; @@ -303,6 +496,98 @@ __global__ void calc_fem_state_and_force_kernel( } } +template +__global__ void calc_implicit_fem_state_and_differential_kernel( + const size_t n_faces, + const int* indices, + const int* index_mappings, + const T* volumes, + const T* Dm_inverses, + const T* deformation_gradients, + + // implicit states + const T* dvs, + const T* gradDvs, + T *dforces, + T* dtaus) { + uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; + int face_pid = index_mappings[idx]; + if (idx < n_faces) { + int v0 = index_mappings[indices[idx * 3 + 0]]; + int v1 = index_mappings[indices[idx * 3 + 1]]; + int v2 = index_mappings[indices[idx * 3 + 2]]; + + const T* F_n = &deformation_gradients[idx * 9]; + const T* gradDv = &gradDvs[face_pid * 9]; + + // tangent_dF + T d0[3], d1[3]; + #pragma unroll + for (int i = 0; i < 3; ++i) { + d0[i] = dvs[v1 * 3 + i] - dvs[v0 * 3 + i]; + d1[i] = dvs[v2 * 3 + i] - dvs[v0 * 3 + i]; + } + T ds[6] { + d0[0], d1[0], + d0[1], d1[1], + d0[2], d1[2] + }; + + T tangent_dF[6]; + const T* Dm_inverse = &Dm_inverses[idx * 4]; + matmul<3, 2, 2>(ds, Dm_inverse, tangent_dF); + + // cotangent_dF + T ctF_n_c2[3] = { F_n[2], F_n[5], F_n[8] }; + T ct_dF[3]; + matmul<3, 3, 1, T>(gradDv, ctF_n_c2, ct_dF); + + // dF + // dF << t_dF, ct_dF; + T dF[9] = { + tangent_dF[0], tangent_dF[1], ct_dF[0], + tangent_dF[2], tangent_dF[3], ct_dF[1], + tangent_dF[4], tangent_dF[5], ct_dF[2] + }; + + T VdP_local[9]; + first_piola_differential(F_n, dF, VdP_local); + #pragma unroll + for (int i = 0; i < 9; ++i) { + VdP_local[i] *= volumes[face_pid]; + } + + // technical document .(15) part 2 + T VdP_local_c2[3] = { VdP_local[2], VdP_local[5], VdP_local[8] }; + outer_product<3, T>(VdP_local_c2, ct_dF, &dtaus[face_pid * 9]); + + T grad_N_hat[6] = { + T(-1.), T(1.), T(0.), + T(-1.), T(0.), T(1.) + }; + T grad_N[6]; + T Dm_inverse_T[4]; + transpose<2, 2, T>(Dm_inverse, Dm_inverse_T); + matmul<2, 2, 3, T>(Dm_inverse_T, grad_N_hat, grad_N); + T VdP_local_c01[6] = { + VdP_local[0], VdP_local[1], + VdP_local[3], VdP_local[4], + VdP_local[6], VdP_local[7] + }; + + T G[9]; + matmul<3, 2, 3, T>(VdP_local_c01, grad_N, G); + + // technical document .(15) part 1 + #pragma unroll + for (int i = 0; i < 3; ++i) { + atomicAdd(&dforces[v0 * 3 + i], -G[i * 3 + 0]); + atomicAdd(&dforces[v1 * 3 + i], -G[i * 3 + 1]); + atomicAdd(&dforces[v2 * 3 + i], -G[i * 3 + 2]); + } + } +} + __device__ __host__ inline std::uint32_t contract_bits(std::uint32_t v) noexcept { v &= 0x09249249u; @@ -425,7 +710,7 @@ __global__ void compute_sorted_state_kernel(const size_t n_particles, } } -template +template __global__ void particle_to_grid_kernel(const size_t n_particles, const T* positions, const T* velocities, @@ -490,10 +775,17 @@ __global__ void particle_to_grid_kernel(const size_t n_particles, T B[9]; const T* C = &affine_matrices[idx * 9]; - const T* stress = &taus[idx * 9]; #pragma unroll for (int i = 0; i < 9; ++i) { - B[i] = (-dt * config::G_D_INV) * stress[i] + C[i] * mass; + if constexpr (APPLY_KDV) { + const T* dstress = &taus[idx * 9]; + B[i] = (-config::G_D_INV) * dstress[i]; + } else if constexpr (APPLY_ELASTICITY) { + const T* stress = &taus[idx * 9]; + B[i] = (-dt * config::G_D_INV) * stress[i] + C[i] * mass; + } else { + B[i] = C[i] * mass; + } } T val[4]; @@ -512,20 +804,37 @@ __global__ void particle_to_grid_kernel(const size_t n_particles, T weight = weights[threadIdx.x][i][0] * weights[threadIdx.x][j][1] * weights[threadIdx.x][k][2]; - val[0] = mass * weight; - val[1] = vel[0] * val[0]; - val[2] = vel[1] * val[0]; - val[3] = vel[2] * val[0]; + if constexpr (!APPLY_KDV) { + val[0] = mass * weight; + val[1] = vel[0] * val[0]; + val[2] = vel[1] * val[0]; + val[3] = vel[2] * val[0]; + } else { + val[0] = 0; + val[1] = 0; + val[2] = 0; + val[3] = 0; + } // apply gravity - val[config::GRAVITY_AXIS + 1] += val[0] * config::GRAVITY * dt; + if constexpr (APPLY_GRAVITY) { + val[config::GRAVITY_AXIS + 1] += val[0] * config::GRAVITY * dt; + } val[1] += (B[0] * xi_minus_xp[0] + B[1] * xi_minus_xp[1] + B[2] * xi_minus_xp[2]) * weight; val[2] += (B[3] * xi_minus_xp[0] + B[4] * xi_minus_xp[1] + B[5] * xi_minus_xp[2]) * weight; val[3] += (B[6] * xi_minus_xp[0] + B[7] * xi_minus_xp[1] + B[8] * xi_minus_xp[2]) * weight; - const T* force = &forces[idx * 3]; - val[1] += force[0] * dt * weight; - val[2] += force[1] * dt * weight; - val[3] += force[2] * dt * weight; + if constexpr (APPLY_KDV) { + const T* dforce = &forces[idx * 3]; + val[1] += dforce[0] * weight; + val[2] += dforce[1] * weight; + val[3] += dforce[2] * weight; + } + if constexpr (APPLY_ELASTICITY) { + const T* force = &forces[idx * 3]; + val[1] += force[0] * dt * weight; + val[2] += force[1] * dt * weight; + val[3] += force[2] * dt * weight; + } for (int iter = 1; iter <= mark; iter <<= 1) { T tmp[4]; @@ -541,7 +850,9 @@ __global__ void particle_to_grid_kernel(const size_t n_particles, const uint32_t target_cell_index = cell_index(base[0] + i, base[1] + j, base[2] + k); const uint32_t target_grid_index = target_cell_index >> (config::G_BLOCK_BITS * 3); g_touched_flags[target_grid_index] = 1; - atomicAdd(&(g_masses[target_cell_index]), val[0]); + if constexpr (!APPLY_KDV) { + atomicAdd(&(g_masses[target_cell_index]), val[0]); + } atomicAdd(&(g_momentum[target_cell_index * 3 + 0]), val[1]); atomicAdd(&(g_momentum[target_cell_index * 3 + 1]), val[2]); atomicAdd(&(g_momentum[target_cell_index * 3 + 2]), val[3]); @@ -639,6 +950,20 @@ __global__ void clean_grid_contact_kernel( } } +template +__global__ void clean_grid_Kdv_kernel( + const uint32_t touched_cells_cnt, + const uint32_t* g_touched_ids, + T* g_Kdv) { + uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; + if (idx < touched_cells_cnt) { + uint32_t block_idx = g_touched_ids[idx >> (config::G_BLOCK_BITS * 3)]; + uint32_t cell_idx = (block_idx << (config::G_BLOCK_BITS * 3)) | (idx & config::G_BLOCK_VOLUME_MASK); + #pragma unroll + for (int i = 0; i < 3; ++i) g_Kdv[cell_idx * 3 + i] = 0; + } +} + template __global__ void update_grid_kernel( const uint32_t touched_cells_cnt, @@ -805,13 +1130,15 @@ __global__ void update_grid_kernel( } } -template +template __global__ void grid_to_particle_kernel(const size_t n_particles, T* positions, T* velocities, T* affine_matrices, + T* affine_matrices_star, const T* g_masses, const T* g_momentum, + const T* g_v_star, const T dt) { uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; // In [Fei et.al 2021], @@ -839,15 +1166,19 @@ __global__ void grid_to_particle_kernel(const size_t n_particles, weights[threadIdx.x][2][i] = T(0.5) * (fx[i] - T(0.5)) * (fx[i] - T(0.5)); } + T old_v[3]; T new_v[3]; T new_C[9], new_CT[9]; + T old_C[9], old_CT[9]; #pragma unroll for (int i = 0; i < 3; ++i) { new_v[i] = 0; + old_v[i] = 0; } #pragma unroll for (int i = 0; i < 9; ++i) { new_C[i] = 0; + old_C[i] = 0; } #pragma unroll @@ -875,6 +1206,11 @@ __global__ void grid_to_particle_kernel(const size_t n_particles, new_v[0] += weight * g_v[0]; new_v[1] += weight * g_v[1]; new_v[2] += weight * g_v[2]; + if constexpr (POST_CONTACT) { + old_v[0] += weight * g_v_star[target_cell_index * 3 + 0]; + old_v[1] += weight * g_v_star[target_cell_index * 3 + 1]; + old_v[2] += weight * g_v_star[target_cell_index * 3 + 2]; + } // printf("weight=%lf, g_v=(%lf %lf %lf)\n", weight, g_v[0], g_v[1], g_v[2]); // printf("i=%d j=%d k=%d\n", i, j, k); @@ -890,6 +1226,17 @@ __global__ void grid_to_particle_kernel(const size_t n_particles, new_C[6] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[0]; new_C[7] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[1]; new_C[8] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[2]; + if constexpr (POST_CONTACT) { + old_C[0] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 0] * xi_minus_xp[0]; + old_C[1] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 0] * xi_minus_xp[1]; + old_C[2] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 0] * xi_minus_xp[2]; + old_C[3] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 1] * xi_minus_xp[0]; + old_C[4] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 1] * xi_minus_xp[1]; + old_C[5] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 1] * xi_minus_xp[2]; + old_C[6] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 2] * xi_minus_xp[0]; + old_C[7] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 2] * xi_minus_xp[1]; + old_C[8] += 4 * config::G_DX_INV * weight * g_v_star[target_cell_index * 3 + 2] * xi_minus_xp[2]; + } } } } @@ -905,20 +1252,49 @@ __global__ void grid_to_particle_kernel(const size_t n_particles, velocities[idx * 3 + 2] = new_v[2]; transpose<3, 3, T>(new_C, new_CT); - affine_matrices[idx * 9 + 0] = ((config::V + T(1.)) * T(.5)) * new_C[0] + ((config::V - T(1.)) * T(.5)) * new_CT[0]; - affine_matrices[idx * 9 + 1] = ((config::V + T(1.)) * T(.5)) * new_C[1] + ((config::V - T(1.)) * T(.5)) * new_CT[1]; - affine_matrices[idx * 9 + 2] = ((config::V + T(1.)) * T(.5)) * new_C[2] + ((config::V - T(1.)) * T(.5)) * new_CT[2]; - affine_matrices[idx * 9 + 3] = ((config::V + T(1.)) * T(.5)) * new_C[3] + ((config::V - T(1.)) * T(.5)) * new_CT[3]; - affine_matrices[idx * 9 + 4] = ((config::V + T(1.)) * T(.5)) * new_C[4] + ((config::V - T(1.)) * T(.5)) * new_CT[4]; - affine_matrices[idx * 9 + 5] = ((config::V + T(1.)) * T(.5)) * new_C[5] + ((config::V - T(1.)) * T(.5)) * new_CT[5]; - affine_matrices[idx * 9 + 6] = ((config::V + T(1.)) * T(.5)) * new_C[6] + ((config::V - T(1.)) * T(.5)) * new_CT[6]; - affine_matrices[idx * 9 + 7] = ((config::V + T(1.)) * T(.5)) * new_C[7] + ((config::V - T(1.)) * T(.5)) * new_CT[7]; - affine_matrices[idx * 9 + 8] = ((config::V + T(1.)) * T(.5)) * new_C[8] + ((config::V - T(1.)) * T(.5)) * new_CT[8]; - // Advection - positions[idx * 3 + 0] += new_v[0] * dt; - positions[idx * 3 + 1] += new_v[1] * dt; - positions[idx * 3 + 2] += new_v[2] * dt; + if constexpr (POST_CONTACT) { + transpose<3, 3, T>(old_C, old_CT); + + affine_matrices[idx * 9 + 0] = ((config::V + T(1.)) * T(.5)) * new_C[0] + ((config::V - T(1.)) * T(.5)) * new_CT[0]; + affine_matrices[idx * 9 + 1] = ((config::V + T(1.)) * T(.5)) * new_C[1] + ((config::V - T(1.)) * T(.5)) * new_CT[1]; + affine_matrices[idx * 9 + 2] = ((config::V + T(1.)) * T(.5)) * new_C[2] + ((config::V - T(1.)) * T(.5)) * new_CT[2]; + affine_matrices[idx * 9 + 3] = ((config::V + T(1.)) * T(.5)) * new_C[3] + ((config::V - T(1.)) * T(.5)) * new_CT[3]; + affine_matrices[idx * 9 + 4] = ((config::V + T(1.)) * T(.5)) * new_C[4] + ((config::V - T(1.)) * T(.5)) * new_CT[4]; + affine_matrices[idx * 9 + 5] = ((config::V + T(1.)) * T(.5)) * new_C[5] + ((config::V - T(1.)) * T(.5)) * new_CT[5]; + affine_matrices[idx * 9 + 6] = ((config::V + T(1.)) * T(.5)) * new_C[6] + ((config::V - T(1.)) * T(.5)) * new_CT[6]; + affine_matrices[idx * 9 + 7] = ((config::V + T(1.)) * T(.5)) * new_C[7] + ((config::V - T(1.)) * T(.5)) * new_CT[7]; + affine_matrices[idx * 9 + 8] = ((config::V + T(1.)) * T(.5)) * new_C[8] + ((config::V - T(1.)) * T(.5)) * new_CT[8]; + + affine_matrices_star[idx * 9 + 0] = ((config::V + T(1.)) * T(.5)) * old_C[0] + ((config::V - T(1.)) * T(.5)) * old_CT[0]; + affine_matrices_star[idx * 9 + 1] = ((config::V + T(1.)) * T(.5)) * old_C[1] + ((config::V - T(1.)) * T(.5)) * old_CT[1]; + affine_matrices_star[idx * 9 + 2] = ((config::V + T(1.)) * T(.5)) * old_C[2] + ((config::V - T(1.)) * T(.5)) * old_CT[2]; + affine_matrices_star[idx * 9 + 3] = ((config::V + T(1.)) * T(.5)) * old_C[3] + ((config::V - T(1.)) * T(.5)) * old_CT[3]; + affine_matrices_star[idx * 9 + 4] = ((config::V + T(1.)) * T(.5)) * old_C[4] + ((config::V - T(1.)) * T(.5)) * old_CT[4]; + affine_matrices_star[idx * 9 + 5] = ((config::V + T(1.)) * T(.5)) * old_C[5] + ((config::V - T(1.)) * T(.5)) * old_CT[5]; + affine_matrices_star[idx * 9 + 6] = ((config::V + T(1.)) * T(.5)) * old_C[6] + ((config::V - T(1.)) * T(.5)) * old_CT[6]; + affine_matrices_star[idx * 9 + 7] = ((config::V + T(1.)) * T(.5)) * old_C[7] + ((config::V - T(1.)) * T(.5)) * old_CT[7]; + affine_matrices_star[idx * 9 + 8] = ((config::V + T(1.)) * T(.5)) * old_C[8] + ((config::V - T(1.)) * T(.5)) * old_CT[8]; + + positions[idx * 3 + 0] += (new_v[0] - old_v[0]) * dt; + positions[idx * 3 + 1] += (new_v[1] - old_v[1]) * dt; + positions[idx * 3 + 2] += (new_v[2] - old_v[2]) * dt; + } + else { + affine_matrices[idx * 9 + 0] = ((config::V + T(1.)) * T(.5)) * new_C[0] + ((config::V - T(1.)) * T(.5)) * new_CT[0]; + affine_matrices[idx * 9 + 1] = ((config::V + T(1.)) * T(.5)) * new_C[1] + ((config::V - T(1.)) * T(.5)) * new_CT[1]; + affine_matrices[idx * 9 + 2] = ((config::V + T(1.)) * T(.5)) * new_C[2] + ((config::V - T(1.)) * T(.5)) * new_CT[2]; + affine_matrices[idx * 9 + 3] = ((config::V + T(1.)) * T(.5)) * new_C[3] + ((config::V - T(1.)) * T(.5)) * new_CT[3]; + affine_matrices[idx * 9 + 4] = ((config::V + T(1.)) * T(.5)) * new_C[4] + ((config::V - T(1.)) * T(.5)) * new_CT[4]; + affine_matrices[idx * 9 + 5] = ((config::V + T(1.)) * T(.5)) * new_C[5] + ((config::V - T(1.)) * T(.5)) * new_CT[5]; + affine_matrices[idx * 9 + 6] = ((config::V + T(1.)) * T(.5)) * new_C[6] + ((config::V - T(1.)) * T(.5)) * new_CT[6]; + affine_matrices[idx * 9 + 7] = ((config::V + T(1.)) * T(.5)) * new_C[7] + ((config::V - T(1.)) * T(.5)) * new_CT[7]; + affine_matrices[idx * 9 + 8] = ((config::V + T(1.)) * T(.5)) * new_C[8] + ((config::V - T(1.)) * T(.5)) * new_CT[8]; + + positions[idx * 3 + 0] += new_v[0] * dt; + positions[idx * 3 + 1] += new_v[1] * dt; + positions[idx * 3 + 2] += new_v[2] * dt; + } // printf("v=\n"); // printf("[%.8lf %.8lf %.8lf]\n", new_v[0], new_v[1], new_v[2]); @@ -930,6 +1306,122 @@ __global__ void grid_to_particle_kernel(const size_t n_particles, } } +// NOTE(changyu): same as: +// +// template +// void MpmForceBase::computeDvAndGradDv(const TVStack& dv) const +// { +// ZIRAN_QUIET_TIMER(); +// evalInterpolantAndGradient([&](int node_id) -> TV { return dv.col(node_id); }, scratch_vp, scratch_gradV); +// } +// +template +__global__ void grid_to_particle_dv_transfer_kernel(const size_t n_particles, + const T* positions, + T* dvs, + T* gradDvs, + const T* g_momentum, + const T* g_Dir, + const T* g_v_star, + const T global_alpha, + const bool apply_dir) { + uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; + // In [Fei et.al 2021], + // we spill the B-spline weights (nine floats for each thread) by storing them into the shared memory + // (instead of registers), although none of them is shared between threads. + // This choice increases performance, particularly when the number of threads is large (§6.2.5). + __shared__ T weights[BLOCK_DIM][3][3]; + + if (idx < n_particles) { + uint32_t base[3] = { + static_cast(positions[idx * 3 + 0] * config::G_DX_INV - T(0.5)), + static_cast(positions[idx * 3 + 1] * config::G_DX_INV - T(0.5)), + static_cast(positions[idx * 3 + 2] * config::G_DX_INV - T(0.5)) + }; + T fx[3] = { + positions[idx * 3 + 0] * config::G_DX_INV - static_cast(base[0]), + positions[idx * 3 + 1] * config::G_DX_INV - static_cast(base[1]), + positions[idx * 3 + 2] * config::G_DX_INV - static_cast(base[2]) + }; + // Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] + #pragma unroll + for (int i = 0; i < 3; ++i) { + weights[threadIdx.x][0][i] = T(0.5) * (T(1.5) - fx[i]) * (T(1.5) - fx[i]); + weights[threadIdx.x][1][i] = T(0.75) - (fx[i] - T(1.0)) * (fx[i] - T(1.0)); + weights[threadIdx.x][2][i] = T(0.5) * (fx[i] - T(0.5)) * (fx[i] - T(0.5)); + } + + T dv[3]; + T gradDv[9]; + #pragma unroll + for (int i = 0; i < 3; ++i) { + dv[i] = 0; + } + #pragma unroll + for (int i = 0; i < 9; ++i) { + gradDv[i] = 0; + } + + #pragma unroll + for (int i = 0; i < 3; ++i) { + #pragma unroll + for (int j = 0; j < 3; ++j) { + #pragma unroll + for (int k = 0; k < 3; ++k) { + T xi_minus_xp[3] = { + (i - fx[0]), + (j - fx[1]), + (k - fx[2]) + }; + + const uint32_t target_cell_index = cell_index(base[0] + i, base[1] + j, base[2] + k); + + T weight = weights[threadIdx.x][i][0] * weights[threadIdx.x][j][1] * weights[threadIdx.x][k][2]; + + T g_v[3]; + if (apply_dir) { + g_v[0] = g_Dir[target_cell_index * 3 + 0]; + g_v[1] = g_Dir[target_cell_index * 3 + 1]; + g_v[2] = g_Dir[target_cell_index * 3 + 2]; + } else { + g_v[0] = (g_momentum[target_cell_index * 3 + 0] + global_alpha * g_Dir[target_cell_index * 3 + 0]) - g_v_star[target_cell_index * 3 + 0]; + g_v[1] = (g_momentum[target_cell_index * 3 + 1] + global_alpha * g_Dir[target_cell_index * 3 + 1]) - g_v_star[target_cell_index * 3 + 1]; + g_v[2] = (g_momentum[target_cell_index * 3 + 2] + global_alpha * g_Dir[target_cell_index * 3 + 2]) - g_v_star[target_cell_index * 3 + 2]; + } + + dv[0] += weight * g_v[0]; + dv[1] += weight * g_v[1]; + dv[2] += weight * g_v[2]; + + gradDv[0] += 4 * config::G_DX_INV * weight * g_v[0] * xi_minus_xp[0]; + gradDv[1] += 4 * config::G_DX_INV * weight * g_v[0] * xi_minus_xp[1]; + gradDv[2] += 4 * config::G_DX_INV * weight * g_v[0] * xi_minus_xp[2]; + gradDv[3] += 4 * config::G_DX_INV * weight * g_v[1] * xi_minus_xp[0]; + gradDv[4] += 4 * config::G_DX_INV * weight * g_v[1] * xi_minus_xp[1]; + gradDv[5] += 4 * config::G_DX_INV * weight * g_v[1] * xi_minus_xp[2]; + gradDv[6] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[0]; + gradDv[7] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[1]; + gradDv[8] += 4 * config::G_DX_INV * weight * g_v[2] * xi_minus_xp[2]; + } + } + } + + dvs[idx * 3 + 0] = dv[0]; + dvs[idx * 3 + 1] = dv[1]; + dvs[idx * 3 + 2] = dv[2]; + + gradDvs[idx * 9 + 0] = gradDv[0]; + gradDvs[idx * 9 + 1] = gradDv[1]; + gradDvs[idx * 9 + 2] = gradDv[2]; + gradDvs[idx * 9 + 3] = gradDv[3]; + gradDvs[idx * 9 + 4] = gradDv[4]; + gradDvs[idx * 9 + 5] = gradDv[5]; + gradDvs[idx * 9 + 6] = gradDv[6]; + gradDvs[idx * 9 + 7] = gradDv[7]; + gradDvs[idx * 9 + 8] = gradDv[8]; + } +} + template __global__ void initialize_contact_velocities(const size_t n_contacts, T* contact_vel, @@ -963,38 +1455,9 @@ __device__ __host__ inline void get_color_coordinates(UINT x, UINT y, UINT z, UI // SAP model template __device__ void compute_contact_grad_and_hess( - const T phi0, const T dt, const T stiffness, const T damping, const T friction_mu, - const T *v0, const T *v_next, + const T phi_star, const T dt, const T stiffness, const T damping, const T friction_mu, + const T *v_star, const T *v_next, T *C_Hess, T *C_Grad) { - /* Solves the contact problem for a single particle against a rigid body - assuming the rigid body has infinite mass and inertia. - - Let phi be the penetration distance (positive when penetration occurs) and vn - be the relative velocity of the particle with respect to the rigid body in the - normal direction (vn>0 when separting). Then we have phi_dot = -vn. - - In the normal direction, the contact force is modeled as a linear elastic system - with Hunt-Crossley dissipation. - - f = k * phi_+ * (1 + d * phi_dot)_+ - - where phi_+ = max(0, phi) - - The momentum balance in the normal direction becomes - - m(vn_next - vn) = k * dt * (phi0 - dt * vn_next)_+ * (1 - d * vn_next)_+ - - where we used the fact that phi = phi0 - dt * vn_next. This is a quadratic - equation in vn_next, and we solve it to get the next velocity vn_next. - - The quadratic equation is ax^2 + bx + c = 0, where - - a = k * d * dt^2 - b = -m - (k * dt * (dt + d * phi0)) - c = k * dt * phi0 + m * vn - - After solving for vn_next, we check if the friction force lies in the friction - cone, if not, we project the velocity back into the friction cone. */ constexpr int kZAxis = 2; // NOTE: follow the pattern in https://github.com/RobotLocomotion/drake/blob/master/multibody/contact_solvers/sap/sap_hunt_crossley_constraint.cc @@ -1003,8 +1466,8 @@ __device__ void compute_contact_grad_and_hess( // NOTE (changyu): in math eqns from (https://arxiv.org/pdf/2312.03908) and in code, // ϕ differs by a sign. - const T phi = phi0 - dt * v_next[kZAxis]; - // If (-ϕ0 - δt vn)+ or (1 − dvn)+ equals zero, no impulse should be applied + const T phi = phi_star - dt * (v_next[kZAxis] - v_star[kZAxis]); + // If (-ϕ* - δt (vn - v*))+ or (1 − dvn)+ equals zero, no impulse should be applied if (T(1.) - damping * v_next[kZAxis] <= 0 || phi <= 0) { // Quick exits #pragma unroll for (int i = 0; i < 9; ++i) C_Hess[i] = 0; @@ -1014,32 +1477,35 @@ __device__ void compute_contact_grad_and_hess( else { // normal component (Compliant Contact) // fn(ϕ, vn) = k (−ϕ)+ (1 − dvn)+ - // γn(vn) = n(vn; ϕ0) = δt fn(ϕ0 + δt vn, vn) - // = δt k (-ϕ0 - δt vn)+ (1 − dvn)+ - const T yn = dt * stiffness * (phi0 - dt * v_next[kZAxis]) * (T(1.) - damping * v_next[kZAxis]); // Eq. 13 + // γn(vn) = n(vn; ϕ*) = δt fn(ϕ* + δt (vn - v*), vn) + // = δt k (-ϕ* - δt (vn - v*))+ (1 − dvn)+ + const T yn = dt * stiffness * (phi_star - dt * (v_next[kZAxis] - v_star[kZAxis])) * (T(1.) - damping * v_next[kZAxis]); // Eq. 13 // ∂²ln / ∂vn² = - δt ∂ fn / ∂vn - // = - δt ∂ fn(ϕ0 + δt vn, vn) / ∂vn - // = - δt k ∂ (-ϕ0 - δt vn)+ (1 − dvn)+ / ∂vn - // when both (-ϕ0 - δt vn) > 0 and (1 − dvn) > 0 satisfied - // = - δt k ∂ (-ϕ0 - δt vn) (1 − dvn) / ∂vn - // = - δt k ∂ (-ϕ0 - δt vn + ϕ0 dvn + d δt vn²) / ∂vn - // = - δt k (- δt + ϕ0 d + 2 d δt vn) - const T d2lndvn2 = - dt * stiffness * (-dt -phi0 * damping + T(2.) * damping * dt * v_next[kZAxis]); // Eq. 8 + // = - δt ∂ fn(ϕ* + δt (vn - v*), vn) / ∂vn + // = - δt k ∂ (-ϕ* - δt (vn - v*))+ (1 − dvn)+ / ∂vn + // when both (-ϕ* - δt (vn - v*)) > 0 and (1 − dvn) > 0 satisfied + // = - δt k ∂ (-ϕ* - δt vn + δt v*) (1 − dvn) / ∂vn + // = - δt k ∂ (-ϕ* - δt vn + δt v* + ϕ* d vn + d δt vn² - d δt v* vn) / ∂vn + // = - δt k (- δt + ϕ* d + 2 d δt vn - d δt v*) + const T d2lndvn2 = -dt * stiffness * (-dt + + (-phi_star) * damping + + T(2.) * damping * dt * v_next[kZAxis] + - damping * dt * v_star[kZAxis]); // Eq. 8 // frictional component (Lagged Model) // For a physical model of compliance for which γn is only a function of vn - // γn0 = δt fn(ϕ0, vn0) = δt k (−ϕ0)+ (1 − dvn0)+ - const T yn0 = max(stiffness * dt * phi0 * (T(1.) - damping * v0[kZAxis]), T(0.)); + // γn* = δt fn(ϕ*, vn*) = δt k (−ϕ*)+ (1 − dvn*)+ + const T yn_star = max(stiffness * dt * phi_star * (T(1.) - damping * v_star[kZAxis]), T(0.)); const T ts_coeff = sqrt(v_next[0] * v_next[0] + v_next[1] * v_next[1] + config::epsv * config::epsv); const T ts_hat[2] = {v_next[0] / ts_coeff, v_next[1] / ts_coeff}; // Eq. 18 - // γt = -μ * γn0 * t̂_s + // γt = -μ * γn* * t̂_s const T yt[2] = { - -friction_mu * yn0 * ts_hat[0], - -friction_mu * yn0 * ts_hat[1] + -friction_mu * yn_star * ts_hat[0], + -friction_mu * yn_star * ts_hat[1] }; // Eq. 33 T P_ts_hat[4]; @@ -1048,8 +1514,8 @@ __device__ void compute_contact_grad_and_hess( T(1.) - P_ts_hat[0], -P_ts_hat[1], -P_ts_hat[2], T(1.) - P_ts_hat[3] }; - const T d2ltdvt2_coeff = friction_mu * yn0 / ts_coeff; // Eq. 33, ts_coeff = ts_soft_norm + epsv - // ∂²lt / ∂vt² = μ * γn0 * (P⊥(t̂_s) / (||v_t||_s + ε_s)) + const T d2ltdvt2_coeff = friction_mu * yn_star / ts_coeff; // Eq. 33, ts_coeff = ts_soft_norm + epsv + // ∂²lt / ∂vt² = μ * γn* * (P⊥(t̂_s) / (||v_t||_s + ε_s)) const T d2ltdvt2[4] = { d2ltdvt2_coeff * P_perp_ts_hat[0], d2ltdvt2_coeff * P_perp_ts_hat[1], @@ -1079,7 +1545,7 @@ template __global__ void contact_particle_to_grid_kernel(const size_t n_particles, const T* contact_pos, const T* contact_vel, - const T* velocities, + const T* contact_vel_star, const T* volumes, const uint32_t* contact_mpm_id, const T* contact_dist, @@ -1141,26 +1607,16 @@ __global__ void contact_particle_to_grid_kernel(const size_t n_particles, } const T mass = volumes[contact_mpm_id[idx]] * config::DENSITY; - const T* particle_vn = &velocities[contact_mpm_id[idx] * 3]; + const T* particle_v_star = &contact_vel_star[idx * 3]; const T* particle_v = &contact_vel[idx * 3]; T nhat_W[3] = {contact_normal[idx * 3 + 0], contact_normal[idx * 3 + 1], contact_normal[idx * 3 + 2]}; - // TODO (changyu): const GpuT phi0 = -( - // static_cast(mpm_contact_pairs[i].penetration_distance) + - // (mpm_state->positions_host()[mpm_contact_pairs[i].particle_in_contact_index] - - // mpm_contact_pairs[i].particle_in_contact_position.template cast()).dot(nhat_W) - // ); - T phi0 = -contact_dist[idx]; -#ifdef DEBUG - if (phi0 < 0) { - printf("IMPOSSIBLE!!!\n"); - } -#endif + T phi_star = -contact_dist[idx]; - T vn_rel_W[3] = { - particle_vn[0] - contact_rigid_v[idx * 3 + 0], - particle_vn[1] - contact_rigid_v[idx * 3 + 1], - particle_vn[2] - contact_rigid_v[idx * 3 + 2] + T v_star_rel_W[3] = { + particle_v_star[0] - contact_rigid_v[idx * 3 + 0], + particle_v_star[1] - contact_rigid_v[idx * 3 + 1], + particle_v_star[2] - contact_rigid_v[idx * 3 + 2] }; T v_rel_W[3] = { particle_v[0] - contact_rigid_v[idx * 3 + 0], @@ -1173,12 +1629,12 @@ __global__ void contact_particle_to_grid_kernel(const size_t n_particles, make_from_one_unit_vector(nhat_W, kZAxis, R_WC); transpose<3, 3, T>(R_WC, R_CW); - T vn_C[3], v_next_C[3]; // in the contact local coordinate - matmul<3, 3, 1, T>(R_CW, vn_rel_W, vn_C); + T v_star_C[3], v_next_C[3]; // in the contact local coordinate + matmul<3, 3, 1, T>(R_CW, v_star_rel_W, v_star_C); matmul<3, 3, 1, T>(R_CW, v_rel_W, v_next_C); T lc_Hess_C[9], lc_Grad_C[3]; // hess and grad in the contact local coordinate - compute_contact_grad_and_hess(phi0, dt, stiffness, damping, friction_mu, vn_C, v_next_C, lc_Hess_C, lc_Grad_C); + compute_contact_grad_and_hess(phi_star, dt, stiffness, damping, friction_mu, v_star_C, v_next_C, lc_Hess_C, lc_Grad_C); // hess and grad in the world local coordinate @@ -1326,7 +1782,7 @@ template __global__ void grid_to_particle_vdb_line_search_kernel(const size_t n_particles, const T* contact_pos, const T* contact_vel, - const T* velocities, + const T* contact_vel_star, const T* volumes, const uint32_t* contact_mpm_id, const T* contact_dist, @@ -1435,20 +1891,15 @@ __global__ void grid_to_particle_vdb_line_search_kernel(const size_t n_particles } const T mass = volumes[contact_mpm_id[idx]] * config::DENSITY; - const T* v_p_n = &velocities[contact_mpm_id[idx] * 3]; + const T* v_p_star = &contact_vel_star[idx * 3]; T nhat_W[3] = {contact_normal[idx * 3 + 0], contact_normal[idx * 3 + 1], contact_normal[idx * 3 + 2]}; - T phi0 = -contact_dist[idx]; -#ifdef DEBUG - if (phi0 < 0) { - printf("IMPOSSIBLE!!!\n"); - } -#endif + T phi_star = -contact_dist[idx]; - T vn_rel_W[3] = { - v_p_n[0] - contact_rigid_v[idx * 3 + 0], - v_p_n[1] - contact_rigid_v[idx * 3 + 1], - v_p_n[2] - contact_rigid_v[idx * 3 + 2] + T v_star_rel_W[3] = { + v_p_star[0] - contact_rigid_v[idx * 3 + 0], + v_p_star[1] - contact_rigid_v[idx * 3 + 1], + v_p_star[2] - contact_rigid_v[idx * 3 + 2] }; T v_current_rel_W[3] = { v_p_current[0] - contact_rigid_v[idx * 3 + 0], @@ -1467,39 +1918,53 @@ __global__ void grid_to_particle_vdb_line_search_kernel(const size_t n_particles make_from_one_unit_vector(nhat_W, kZAxis, R_WC); transpose<3, 3, T>(R_WC, R_CW); - T vn_C[3], v_current_C[3], v_next_C[3]; // in the contact local coordinate - matmul<3, 3, 1, T>(R_CW, vn_rel_W, vn_C); + T v_star_C[3], v_current_C[3], v_next_C[3]; // in the contact local coordinate + matmul<3, 3, 1, T>(R_CW, v_star_rel_W, v_star_C); matmul<3, 3, 1, T>(R_CW, v_current_rel_W, v_current_C); matmul<3, 3, 1, T>(R_CW, v_next_rel_W, v_next_C); // lc(v_p(v_i)) - auto lc = [&](const T* v, const T* v0) { + auto lc = [&](const T* v, const T* v_star) { // frictional component (Lagged Model) - // lt(v_t) = μ * γn0 * ||v_t||_s - const T yn0 = max(stiffness * dt * phi0 * (T(1.) - damping * v0[kZAxis]), T(0.)); - const T lt = friction_mu * yn0 * (sqrt(v[0] * v[0] + v[1] * v[1] + config::epsv * config::epsv) - config::epsv); // Eq. 33 + // lt(v_t) = μ * (γn*) * ||v_t||_s + const T yn_star = max(stiffness * dt * phi_star * (T(1.) - damping * v_star[kZAxis]), T(0.)); + const T lt = friction_mu * yn_star * (sqrt(v[0] * v[0] + v[1] * v[1] + config::epsv * config::epsv) - config::epsv); // Eq. 33 // normal component (Compliant Contact) - // vˆ = min(−ϕ0 / δt, 1 / d), - T v_hat = min(phi0 / dt, T(1.) / damping); + // we need to find the v such that −ϕ = 0 + // Starting with −ϕ = −ϕ* - dt * (v - v*) + // Setting -ϕ = 0 and solving for v, we get v = (-ϕ*) / dt + v* + // vˆ = min(−ϕ* / δt, 1 / d), + T v_hat = min(phi_star / dt + v_star[kZAxis], T(1.) / damping); - // N(vn) = N+(min(vn, vˆ); f0) + // N(vn) = N+(min(vn, vˆ); f*) const T min_vn_v_hat = min(v_hat, v[kZAxis]); - // N+(vn; ϕ0) = δt k [−vn (ϕ0 + 1/2 δt vn) + d vn²/2 (ϕ0 + 2/3 δt vn)] - // = δt k [−ϕ0 vn - 1/2 δt vn² + 1/2 d ϕ0 vn² + 1/3 d δt vn³] - // = δt k [1/3 d δt vn³ + 1/2 (d ϕ0 - δt) vn² - ϕ0 vn] - // = ln_A vn³ + ln_B vn² + ln_C vn + // N+(vn; ϕ*) = ∫ n(vn; ϕ*) ∂vn + // = ∫ δt fn(ϕ* + δt (vn - v*), vn) ∂vn + // = ∫ δt k (-ϕ* - δt (vn - v*))+ (1 − dvn)+ ∂vn + + // when both (-ϕ* - δt (vn - v*)) > 0 and (1 − dvn) > 0 satisfied + // = ∫ δt k (-ϕ* - δt (vn - v*)) (1 − dvn) ∂vn + // = ∫ δt k (-ϕ* - δt vn + δt v*) (1 − dvn) ∂vn + // = ∫ δt k (-ϕ* - δt vn + δt v* + d ϕ* vn + d δt vn² - d δt v* vn) ∂vn + + // = ∫ δt k (d δt vn² + d ϕ* vn - δt vn - d δt v* vn -ϕ* + δt v*) ∂vn + // = ∫ δt k [ d δt vn² + (d ϕ* - δt - d δt v*) vn -ϕ* + δt v*] ∂vn + // = δt k [ 1/3 d δt vn³ + 1/2 (d ϕ* - δt - d δt v*) vn² + (-ϕ* + δt v*) vn] + // = 1/3 δt² k d vn³ + 1/2 δt k (d ϕ* - δt - d δt v*) + δt k (-ϕ* + δt v*) vn + // = N_A vn³ + N_B vn² + N_C vn + // where N_A = 1/3 δt² k d, - // N_B = 1/2 δt k (d ϕ0 - δt) - // N_C = δt k * (-ϕ0) + // N_B = 1/2 δt k (d ϕ* - δt - d δt v*) + // N_C = δt k * (-ϕ* + δt v*) const T N_A = T(1. / 3.) * dt * dt * stiffness * damping; - const T N_B = T(1. / 2.) * dt * stiffness * (-damping * phi0 - dt); - const T N_C = dt * stiffness * phi0; + const T N_B = T(1. / 2.) * dt * stiffness * (-damping * phi_star - dt - damping * dt * v_star[kZAxis]); + const T N_C = dt * stiffness * (phi_star + dt * v_star[kZAxis]); const T N_vn = N_A * min_vn_v_hat * min_vn_v_hat * min_vn_v_hat - + N_B * min_vn_v_hat * min_vn_v_hat - + N_C * min_vn_v_hat; + + N_B * min_vn_v_hat * min_vn_v_hat + + N_C * min_vn_v_hat; // ln(vn) = −N(vn), const T ln = -N_vn; @@ -1515,10 +1980,10 @@ __global__ void grid_to_particle_vdb_line_search_kernel(const size_t n_particles } } if (global_line_search) { - atomicAdd(g_E1, lc(v_next_C, vn_C)); + atomicAdd(g_E1, lc(v_next_C, v_star_C)); if constexpr (SOLVE_DF_DDF) { T lc_Hess_C[9], lc_Grad_C[3]; // hess and grad in the contact local coordinate - compute_contact_grad_and_hess(phi0, dt, stiffness, damping, friction_mu, vn_C, v_next_C, lc_Hess_C, lc_Grad_C); + compute_contact_grad_and_hess(phi_star, dt, stiffness, damping, friction_mu, v_star_C, v_next_C, lc_Hess_C, lc_Grad_C); T global_dir_C[3]; matmul<3, 3, 1, T>(R_CW, vp_search_dir_W, global_dir_C); atomicAdd(g_dE1, dot<3>(lc_Grad_C, global_dir_C)); @@ -1530,16 +1995,16 @@ __global__ void grid_to_particle_vdb_line_search_kernel(const size_t n_particles if constexpr (JACOBI) { printf("ERROR, JACOBI CANNOT USE LOCAL LINE-SEARCH!!!!!!!!!!!!!!!!!!!!!!!\n"); } - atomicAdd(&g_E1[color_index], lc(v_next_C, vn_C)); + atomicAdd(&g_E1[color_index], lc(v_next_C, v_star_C)); } if (eval_E0) { if (global_line_search) { - atomicAdd(g_E0, lc(v_current_C, vn_C)); + atomicAdd(g_E0, lc(v_current_C, v_star_C)); } else { if constexpr (JACOBI) { printf("ERROR, JACOBI CANNOT USE LOCAL LINE-SEARCH!!!!!!!!!!!!!!!!!!!!!!!\n"); } - atomicAdd(&g_E0[color_index], lc(v_current_C, vn_C)); + atomicAdd(&g_E0[color_index], lc(v_current_C, v_star_C)); } } } @@ -1616,12 +2081,16 @@ __global__ void update_global_energy_grid_kernel( const T* g_v_star, T* g_momentum, const T* g_Dir, + const T* g_Kdv0, + const T* g_Kdv, + const T* g_Kdir, T* global_E0, T* global_E1, T* global_dE1, T* global_d2E1, const uint32_t g_color_mask, const T global_alpha, + const T dt, const bool eval_E0) { uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx < touched_cells_cnt) { @@ -1653,13 +2122,25 @@ __global__ void update_global_energy_grid_kernel( } if (eval_E0) { atomicAdd(global_E0, T(0.5) * mass * norm_sqr<3>(v_current_rel)); + // elasticity energy: 1/2 dt^2 ||dv||^2_K = 1/2 dt^2 * dv^T Kdv + const T* Kdv0 = &g_Kdv0[cell_idx * 3]; + atomicAdd(global_E0, T(0.5) * dt * dt * dot<3>(v_current_rel, Kdv0)); } // (1/2) * ||v_i - v_i^*||_M^2 atomicAdd(global_E1, T(0.5) * mass * norm_sqr<3>(v_next_rel)); + // elasticity energy: 1/2 dt^2 ||dv||^2_K = 1/2 dt^2 * dv^T Kdv + const T* Kdv = &g_Kdv[cell_idx * 3]; + atomicAdd(global_E1, T(0.5) * dt * dt * dot<3>(v_next_rel, Kdv)); + if constexpr(SOLVE_DF_DDF) { + const T* Kdir = &g_Kdir[cell_idx * 3]; + atomicAdd(global_dE1, mass * dot<3>(v_next_rel, Dir)); + atomicAdd(global_dE1, dt * dt * dot<3>(v_next_rel, Kdir)); + atomicAdd(global_d2E1, mass * norm_sqr<3>(Dir)); + atomicAdd(global_d2E1, dt * dt * dot<3>(Dir, Kdir)); } } } @@ -1673,7 +2154,8 @@ __global__ void apply_global_line_search_grid_kernel( T* g_momentum, const T* g_Dir, const uint32_t g_color_mask, - const T global_alpha) { + const T global_alpha, + const T dt) { uint32_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx < touched_cells_cnt) { uint32_t block_idx = g_touched_ids[idx >> (config::G_BLOCK_BITS * 3)]; @@ -1686,6 +2168,14 @@ __global__ void apply_global_line_search_grid_kernel( g_vel[0] += global_alpha * Dir[0]; g_vel[1] += global_alpha * Dir[1]; g_vel[2] += global_alpha * Dir[2]; + + // NOTE (changyu): check CFL condition here +#if (DEBUG) + const T CFL_dt = config::G_DX / (norm<3>(g_vel) + 1e-10); + if (dt > CFL_dt) { + printf("dt exceds CFL dt limit (%lf)!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", dt / CFL_dt); + } +#endif } } } diff --git a/multibody/gpu_mpm/cuda_mpm_model.cu b/multibody/gpu_mpm/cuda_mpm_model.cu index 03826ebc4590..b7cda4b691a3 100644 --- a/multibody/gpu_mpm/cuda_mpm_model.cu +++ b/multibody/gpu_mpm/cuda_mpm_model.cu @@ -50,6 +50,7 @@ void GpuMpmState::Finalize() { CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_velocities, sizeof(Vec3) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_volumes, sizeof(T) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_affine_matrices, sizeof(Mat3) * n_particles_)); + CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_affine_matrices_star, sizeof(Mat3) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_pids, sizeof(int) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&particle_buffer_[i].d_sort_keys, sizeof(uint32_t) * n_particles_)); @@ -75,12 +76,17 @@ void GpuMpmState::Finalize() { cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMemset(particle_buffer_[i].d_volumes, 0, sizeof(T) * n_particles_)); CUDA_SAFE_CALL(cudaMemset(particle_buffer_[i].d_affine_matrices, 0, sizeof(Mat3) * n_particles_)); + CUDA_SAFE_CALL(cudaMemset(particle_buffer_[i].d_affine_matrices_star, 0, sizeof(Mat3) * n_particles_)); } } // scratch data CUDA_SAFE_CALL(cudaMalloc(&d_forces_, sizeof(Vec3) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&d_taus_, sizeof(Mat3) * n_particles_)); + CUDA_SAFE_CALL(cudaMalloc(&d_dforces_, sizeof(Vec3) * n_particles_)); + CUDA_SAFE_CALL(cudaMalloc(&d_dtaus_, sizeof(Mat3) * n_particles_)); + CUDA_SAFE_CALL(cudaMalloc(&d_dvs_, sizeof(Vec3) * n_particles_)); + CUDA_SAFE_CALL(cudaMalloc(&d_gradDvs_, sizeof(Mat3) * n_particles_)); CUDA_SAFE_CALL(cudaMalloc(&d_index_mappings_, sizeof(int) * n_particles_)); std::vector initial_index_mappings(n_particles_); std::iota(initial_index_mappings.begin(), initial_index_mappings.end(), 0); @@ -103,6 +109,9 @@ void GpuMpmState::Finalize() { CUDA_SAFE_CALL(cudaMemset(grid_buffer_.d_g_touched_cnt, 0, sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMalloc(&d_g_Hess_, config::G_DOMAIN_VOLUME * sizeof(Mat3))); + CUDA_SAFE_CALL(cudaMalloc(&d_g_Kdv0_, config::G_DOMAIN_VOLUME * sizeof(Vec3))); + CUDA_SAFE_CALL(cudaMalloc(&d_g_Kdir_, config::G_DOMAIN_VOLUME * sizeof(Vec3))); + CUDA_SAFE_CALL(cudaMalloc(&d_g_Kdv_, config::G_DOMAIN_VOLUME * sizeof(Vec3))); CUDA_SAFE_CALL(cudaMalloc(&d_g_Grad_, config::G_DOMAIN_VOLUME * sizeof(Vec3))); CUDA_SAFE_CALL(cudaMalloc(&d_g_Dir_, config::G_DOMAIN_VOLUME * sizeof(Vec3))); CUDA_SAFE_CALL(cudaMalloc(&d_g_alpha_, config::G_DOMAIN_VOLUME * sizeof(T))); @@ -128,6 +137,7 @@ void GpuMpmState::Destroy() { CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_velocities)); CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_volumes)); CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_affine_matrices)); + CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_affine_matrices_star)); CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_pids)); CUDA_SAFE_CALL(cudaFree(particle_buffer_[i].d_sort_keys)); @@ -138,6 +148,7 @@ void GpuMpmState::Destroy() { particle_buffer_[i].d_velocities = nullptr; particle_buffer_[i].d_volumes = nullptr; particle_buffer_[i].d_affine_matrices = nullptr; + particle_buffer_[i].d_affine_matrices_star = nullptr; particle_buffer_[i].d_pids = nullptr; particle_buffer_[i].d_sort_keys = nullptr; particle_buffer_[i].d_sort_ids = nullptr; @@ -145,12 +156,20 @@ void GpuMpmState::Destroy() { CUDA_SAFE_CALL(cudaFree(d_forces_)); CUDA_SAFE_CALL(cudaFree(d_taus_)); + CUDA_SAFE_CALL(cudaFree(d_dforces_)); + CUDA_SAFE_CALL(cudaFree(d_dtaus_)); + CUDA_SAFE_CALL(cudaFree(d_dvs_)); + CUDA_SAFE_CALL(cudaFree(d_gradDvs_)); CUDA_SAFE_CALL(cudaFree(d_index_mappings_)); CUDA_SAFE_CALL(cudaFree(d_deformation_gradients_)); CUDA_SAFE_CALL(cudaFree(d_Dm_inverses_)); CUDA_SAFE_CALL(cudaFree(d_indices_)); d_forces_ = nullptr; d_taus_ = nullptr; + d_dforces_ = nullptr; + d_dtaus_ = nullptr; + d_dvs_ = nullptr; + d_gradDvs_ = nullptr; d_index_mappings_ = nullptr; d_deformation_gradients_ = nullptr; d_Dm_inverses_ = nullptr; @@ -168,6 +187,9 @@ void GpuMpmState::Destroy() { grid_buffer_.d_g_touched_cnt = nullptr; CUDA_SAFE_CALL(cudaFree(d_g_Hess_)); + CUDA_SAFE_CALL(cudaFree(d_g_Kdv0_)); + CUDA_SAFE_CALL(cudaFree(d_g_Kdir_)); + CUDA_SAFE_CALL(cudaFree(d_g_Kdv_)); CUDA_SAFE_CALL(cudaFree(d_g_Grad_)); CUDA_SAFE_CALL(cudaFree(d_g_Dir_)); CUDA_SAFE_CALL(cudaFree(d_g_alpha_)); @@ -175,6 +197,9 @@ void GpuMpmState::Destroy() { CUDA_SAFE_CALL(cudaFree(d_g_E0_)); CUDA_SAFE_CALL(cudaFree(d_g_E1_)); d_g_Hess_ = nullptr; + d_g_Kdv0_ = nullptr; + d_g_Kdir_ = nullptr; + d_g_Kdv_ = nullptr; d_g_Grad_ = nullptr; d_g_Dir_ = nullptr; d_g_alpha_ = nullptr; diff --git a/multibody/gpu_mpm/cuda_mpm_model.cuh b/multibody/gpu_mpm/cuda_mpm_model.cuh index d0904973c768..04bf9352de62 100644 --- a/multibody/gpu_mpm/cuda_mpm_model.cuh +++ b/multibody/gpu_mpm/cuda_mpm_model.cuh @@ -53,6 +53,8 @@ public: const T* current_volumes() const { return particle_buffer_[current_particle_buffer_id_].d_volumes; } T* current_affine_matrices() { return particle_buffer_[current_particle_buffer_id_].d_affine_matrices; } const T* current_affine_matrices() const { return particle_buffer_[current_particle_buffer_id_].d_affine_matrices; } + T* current_affine_matrices_star() { return particle_buffer_[current_particle_buffer_id_].d_affine_matrices_star; } + const T* current_affine_matrices_star() const { return particle_buffer_[current_particle_buffer_id_].d_affine_matrices_star; } int* current_pids() { return particle_buffer_[current_particle_buffer_id_].d_pids; } const int* current_pids() const { return particle_buffer_[current_particle_buffer_id_].d_pids; } @@ -65,6 +67,7 @@ public: T* next_velocities() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_velocities; } T* next_volumes() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_volumes; } T* next_affine_matrices() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_affine_matrices; } + T* next_affine_matrices_star() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_affine_matrices_star; } int* next_pids() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_pids; } uint32_t* next_sort_keys() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_sort_keys; } uint32_t* next_sort_ids() { return particle_buffer_[current_particle_buffer_id_ ^ 1].d_sort_ids; } @@ -75,6 +78,17 @@ public: const T* taus() const { return d_taus_; } T* deformation_gradients() { return d_deformation_gradients_; } const T* deformation_gradients() const { return d_deformation_gradients_; } + + // NOTE (changyu): Implicit states + T* dforces() { return d_dforces_; } + const T* dforces() const { return d_dforces_; } + T* dtaus() { return d_dtaus_; } + const T* dtaus() const { return d_dtaus_; } + T* dvs() { return d_dvs_; } + const T* dvs() const { return d_dvs_; } + T* gradDvs() { return d_gradDvs_; } + const T* gradDvs() const { return d_gradDvs_; } + T* Dm_inverses() { return d_Dm_inverses_; } const T* Dm_inverses() const { return d_Dm_inverses_; } int* indices() { return d_indices_; } @@ -100,6 +114,9 @@ public: } T* grid_Hess() { return d_g_Hess_; } + T* grid_Kdv0() { return d_g_Kdv0_; } + T* grid_Kdir() { return d_g_Kdir_; } + T* grid_Kdv() { return d_g_Kdv_; } T* grid_Grad() { return d_g_Grad_; } T* grid_Dir() { return d_g_Dir_; } T* grid_alpha() { return d_g_alpha_; } @@ -178,6 +195,10 @@ private: // scratch data T* d_forces_ = nullptr; // size: n_faces + n_verts, NO sort T* d_taus_ = nullptr; // size: n_faces + n_verts, NO sort + T* d_dforces_ = nullptr; // size: n_faces + n_verts, NO sort + T* d_dtaus_ = nullptr; // size: n_faces + n_verts, NO sort + T* d_dvs_ = nullptr; // size: n_faces + n_verts, NO sort + T* d_gradDvs_ = nullptr; // size: n_faces + n_verts, NO sort // NOTE (changyu): when particle data get sorted, // the index mapping from element/vertex index to particle index will be changed, // need to be updated in `compute_sorted_state_kernel`. @@ -194,6 +215,7 @@ private: T* d_velocities = nullptr; // size: n_faces + n_verts T* d_volumes = nullptr; // size: n_faces + n_verts T* d_affine_matrices = nullptr; // size: n_faces + n_verts + T* d_affine_matrices_star = nullptr; // size: n_faces + n_verts // used to work with index_mapping to get the original -> reordered mapping. int* d_pids = nullptr; // size: n_faces + n_verts @@ -251,6 +273,9 @@ private: // Grid device ptrs for solving coordinate descent T* d_g_Hess_ = nullptr; + T* d_g_Kdv0_ = nullptr; + T* d_g_Kdir_ = nullptr; + T* d_g_Kdv_ = nullptr; T* d_g_Grad_ = nullptr; T* d_g_Dir_ = nullptr; T* d_g_alpha_ = nullptr; diff --git a/multibody/gpu_mpm/cuda_mpm_solver.cu b/multibody/gpu_mpm/cuda_mpm_solver.cu index e4b639f6c20a..805a0fe7a79e 100644 --- a/multibody/gpu_mpm/cuda_mpm_solver.cu +++ b/multibody/gpu_mpm/cuda_mpm_solver.cu @@ -70,21 +70,31 @@ void GpuMpmSolver::RebuildMapping(GpuMpmState *state, bool sort) const { } template -void GpuMpmSolver::CalcFemStateAndForce(GpuMpmState *state, const T& dt) const { +void GpuMpmSolver::CalcFemStateAndForce(GpuMpmState *state, const T& dt, const bool post_contact) const { CUDA_SAFE_CALL(cudaMemset(state->forces(), 0, sizeof(Vec3) * state->n_particles())); CUDA_SAFE_CALL(cudaMemset(state->taus(), 0, sizeof(Mat3) * state->n_particles())); - CUDA_SAFE_CALL(( - calc_fem_state_and_force_kernel<<< - (state->n_faces() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> - (state->n_faces(), state->indices(), state->index_mappings(), state->current_volumes(), state->current_affine_matrices(), state->Dm_inverses(), - state->current_positions(), state->current_velocities(), state->deformation_gradients(), - state->forces(), state->taus(), dt) - )); + if (post_contact) { + CUDA_SAFE_CALL(( + calc_fem_state_and_force_kernel<<< + (state->n_faces() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_faces(), state->indices(), state->index_mappings(), state->current_volumes(), state->current_affine_matrices(), state->current_affine_matrices_star(), + state->Dm_inverses(), state->current_positions(), state->current_velocities(), state->deformation_gradients(), + state->forces(), state->taus(), dt) + )); + } else { + CUDA_SAFE_CALL(( + calc_fem_state_and_force_kernel<<< + (state->n_faces() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_faces(), state->indices(), state->index_mappings(), state->current_volumes(), state->current_affine_matrices(), nullptr, + state->Dm_inverses(), state->current_positions(), state->current_velocities(), state->deformation_gradients(), + state->forces(), state->taus(), dt) + )); + } } template -void GpuMpmSolver::ParticleToGrid(GpuMpmState *state, const T& dt) const { +void GpuMpmSolver::ParticleToGrid(GpuMpmState *state, const T& dt, bool apply_gravity, bool apply_elasticity) const { const uint32_t &touched_blocks_cnt = state->grid_touched_cnt_host(); const uint32_t &touched_cells_cnt = touched_blocks_cnt * config::G_BLOCK_VOLUME; if (touched_cells_cnt > 0) { @@ -94,14 +104,50 @@ void GpuMpmSolver::ParticleToGrid(GpuMpmState *state, const T& dt) const { (touched_cells_cnt, state->grid_touched_ids(), state->grid_touched_flags(), state->grid_masses(), state->grid_momentum()) )); } - CUDA_SAFE_CALL(( - particle_to_grid_kernel<<< - (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> - (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_volumes(), state->current_affine_matrices(), - state->forces(), state->taus(), - state->current_sort_keys(), - state->grid_touched_flags(), state->grid_masses(), state->grid_momentum(), dt) - )); + + if (!apply_gravity && !apply_elasticity) { + CUDA_SAFE_CALL(( + particle_to_grid_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_volumes(), state->current_affine_matrices(), + state->forces(), state->taus(), + state->current_sort_keys(), + state->grid_touched_flags(), state->grid_masses(), state->grid_momentum(), dt) + )); + } + + if (apply_gravity && !apply_elasticity) { + CUDA_SAFE_CALL(( + particle_to_grid_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_volumes(), state->current_affine_matrices(), + state->forces(), state->taus(), + state->current_sort_keys(), + state->grid_touched_flags(), state->grid_masses(), state->grid_momentum(), dt) + )); + } + + if (!apply_gravity && apply_elasticity) { + CUDA_SAFE_CALL(( + particle_to_grid_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_volumes(), state->current_affine_matrices(), + state->forces(), state->taus(), + state->current_sort_keys(), + state->grid_touched_flags(), state->grid_masses(), state->grid_momentum(), dt) + )); + } + + if (apply_gravity && apply_elasticity) { + CUDA_SAFE_CALL(( + particle_to_grid_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_volumes(), state->current_affine_matrices(), + state->forces(), state->taus(), + state->current_sort_keys(), + state->grid_touched_flags(), state->grid_masses(), state->grid_momentum(), dt) + )); + } } template @@ -153,10 +199,20 @@ void GpuMpmSolver::UpdateGrid(GpuMpmState *state, int mpm_bc) const { template void GpuMpmSolver::GridToParticle(GpuMpmState *state, const T& dt) const { CUDA_SAFE_CALL(( - grid_to_particle_kernel<<< + grid_to_particle_kernel<<< (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> - (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_affine_matrices(), - state->grid_masses(), state->grid_momentum(), dt) + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_affine_matrices(), nullptr, + state->grid_masses(), state->grid_momentum(), state->grid_v_star(), dt) + )); +} + +template +void GpuMpmSolver::ContactGridToParticle(GpuMpmState *state, const T& dt) const { + CUDA_SAFE_CALL(( + grid_to_particle_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->current_velocities(), state->current_affine_matrices(), state->current_affine_matrices_star(), + state->grid_masses(), state->grid_momentum(), state->grid_v_star(), dt) )); } @@ -231,13 +287,18 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons (n_contacts, state->contact_pos(), state->contact_sort_keys(), state->contact_sort_ids()) )); - const int max_newton_iterations = 2000; + const int max_newton_iterations = 500; + const int max_line_search_iterations = 100; constexpr bool use_jacobi = true; const T kTol = 1e-4; bool enable_line_search = true; const T jacobi_relax_coeff = 0.3; const bool global_line_search = use_jacobi; + if (!global_line_search) { + printf("Currently only support global line search!!!\n"); + throw; + } int count = 0; T norm_dir = 1e10; @@ -264,11 +325,12 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons // NOTE (changyu): pre-compute contact particle velocity `contact_vel_star` after p2g2g before contact handling // then the dv changed by the implicit contact optimization problem can be extacted by `dv = contact_vel - contact_vel_star`. + // also, for strong coupling scheme, we track `contact_vel_star` to handle the penetration distance estimation correctly. CUDA_SAFE_CALL(( - grid_to_particle_kernel<<< + grid_to_particle_kernel<<< (n_contacts + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> - (n_contacts, state->contact_pos(), state->contact_vel_star(), nullptr, - state->grid_masses(), state->grid_momentum(), dt) + (n_contacts, state->contact_pos(), state->contact_vel_star(), nullptr, nullptr, + state->grid_masses(), state->grid_momentum(), state->grid_v_star(), dt) )); while (norm_dir > kTol && count < max_newton_iterations) { @@ -293,7 +355,7 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons (n_contacts, state->contact_pos(), state->contact_vel(), - state->current_velocities(), + state->contact_vel_star(), state->current_volumes(), state->contact_mpm_id(), state->contact_dist(), @@ -318,6 +380,46 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons CUDA_SAFE_CALL(cudaDeviceSynchronize()); CUDA_SAFE_CALL(cudaMemcpy(&total_grid_DoFs, total_grid_DoFs_d, sizeof(uint32_t), cudaMemcpyDeviceToHost)); // printf("color(%u) total_grid_DoFs=%u\n", color_mask, total_grid_DoFs); + + // NOTE (changyu): when apply_dir flag is on, + // it computes the differential form Kdv with respect to the search direction + // used for exact line search to get grad/hess of energy with respect to the step size. + const auto &update_Kdv = [&](T *Kdv_ptr, const T alpha, bool apply_dir=false) { + // elasticity line search + // STEP 1. dv_i -> dv_p, gradDv_p + CUDA_SAFE_CALL(( + grid_to_particle_dv_transfer_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), state->dvs(), state->gradDvs(), + state->grid_momentum(), state->grid_Dir(), state->grid_v_star(), alpha, apply_dir) + )); + + + // STEP 2. dv_p -> dF_p -> dP_p -> dstress_p, dforce_p + CUDA_SAFE_CALL(cudaMemset(state->dforces(), 0, sizeof(Vec3) * state->n_particles())); + CUDA_SAFE_CALL(cudaMemset(state->dtaus(), 0, sizeof(Mat3) * state->n_particles())); + CUDA_SAFE_CALL(( + calc_implicit_fem_state_and_differential_kernel<<< + (state->n_faces() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_faces(), state->indices(), state->index_mappings(), state->current_volumes(), state->Dm_inverses(), state->deformation_gradients(), + state->dvs(), state->gradDvs(), state->dforces(), state->dtaus()) + )); + + // STEP 3. dstress_p, dforce_p -> Kdv + CUDA_SAFE_CALL(( + clean_grid_Kdv_kernel<<< + (touched_cells_cnt + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (touched_cells_cnt, state->grid_touched_ids(), Kdv_ptr) + )); + CUDA_SAFE_CALL(( + particle_to_grid_kernel<<< + (state->n_particles() + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> + (state->n_particles(), state->current_positions(), nullptr, state->current_volumes(), nullptr, + state->dforces(), state->dtaus(), + state->current_sort_keys(), + state->grid_touched_flags(), nullptr, Kdv_ptr, dt) + )); + }; // line search const auto & line_search = [&](const T current_alpha) { @@ -330,7 +432,7 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons (n_contacts, state->contact_pos(), state->contact_vel(), - state->current_velocities(), + state->contact_vel_star(), state->current_volumes(), state->contact_mpm_id(), state->contact_dist(), @@ -348,13 +450,14 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons global_line_search, current_alpha) )); + update_Kdv(state->grid_Kdv(), current_alpha, /*apply_dir=*/false); // elasticity energy defined for E1 CUDA_SAFE_CALL(( update_global_energy_grid_kernel<<< (touched_cells_cnt + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> (touched_cells_cnt, state->grid_touched_ids(), state->grid_masses(), - state->grid_v_star(), state->grid_momentum(), state->grid_Dir(), + state->grid_v_star(), state->grid_momentum(), state->grid_Dir(), nullptr, state->grid_Kdv(), state->grid_Kdir(), nullptr, global_E1_d, global_dE1_d, global_d2E1_d, - /*color_mask*/ 0, current_alpha, /*eval_E0=*/ false) + /*color_mask*/ 0, current_alpha, dt, /*eval_E0=*/ false) )); CUDA_SAFE_CALL(cudaDeviceSynchronize()); T E, dE, d2E; @@ -437,7 +540,7 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons } // Exit if f(root) is close to zero. - if (abs(std::get<1>(f_root)) < f_tolerance) { + if (abs(std::get<1>(f_root)) < f_tolerance || line_search_cnt >= max_line_search_iterations) { global_line_search_satisfied = true; } @@ -477,7 +580,7 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons (n_contacts, state->contact_pos(), state->contact_vel(), - state->current_velocities(), + state->contact_vel_star(), state->current_volumes(), state->contact_mpm_id(), state->contact_dist(), @@ -504,13 +607,20 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons global_alpha = root; } } else { + if (line_search_cnt == 0) { + update_Kdv(state->grid_Kdv0(), 0, /*apply_dir=*/false); // elasticity energy defined for E0 + update_Kdv(state->grid_Kdir(), 0, /*apply_dir=*/true); // elasticity energy defined for E0, with respect to the search direction + } + update_Kdv(state->grid_Kdv(), global_alpha, /*apply_dir=*/false); // elasticity energy defined for E1 + + // STEP 4. 1/2 ||dv||^2_A = 1/2 ||dv||^2_(M+dt^2K) = 1/2 ||dv||^2_M + 1/2 dt^2 ||dv||^2_K CUDA_SAFE_CALL(( update_global_energy_grid_kernel<<< (touched_cells_cnt + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> (touched_cells_cnt, state->grid_touched_ids(), state->grid_masses(), - state->grid_v_star(), state->grid_momentum(), state->grid_Dir(), + state->grid_v_star(), state->grid_momentum(), state->grid_Dir(), state->grid_Kdv0(), state->grid_Kdv(), state->grid_Kdir(), global_E0_d, global_E1_d, nullptr, nullptr, - color_mask, global_alpha, /*eval_E0=*/line_search_cnt == 0) + color_mask, global_alpha, dt, /*eval_E0=*/line_search_cnt == 0) )); CUDA_SAFE_CALL(cudaDeviceSynchronize()); CUDA_SAFE_CALL(cudaMemcpy(&global_E0, global_E0_d, sizeof(T), cudaMemcpyDeviceToHost)); @@ -534,7 +644,7 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons (touched_cells_cnt + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> (touched_cells_cnt, state->grid_touched_ids(), state->grid_masses(), state->grid_momentum(), state->grid_Dir(), - color_mask, global_alpha) + color_mask, global_alpha, dt) )); CUDA_SAFE_CALL(cudaDeviceSynchronize()); } @@ -555,10 +665,10 @@ void GpuMpmSolver::UpdateContact(GpuMpmState *state, const int frame, cons } CUDA_SAFE_CALL(( - grid_to_particle_kernel<<< + grid_to_particle_kernel<<< (n_contacts + config::DEFAULT_CUDA_BLOCK_SIZE - 1) / config::DEFAULT_CUDA_BLOCK_SIZE, config::DEFAULT_CUDA_BLOCK_SIZE>>> - (n_contacts, state->contact_pos(), state->contact_vel(), nullptr, - state->grid_masses(), state->grid_momentum(), dt) + (n_contacts, state->contact_pos(), state->contact_vel(), nullptr, nullptr, + state->grid_masses(), state->grid_momentum(), state->grid_v_star(), dt) )); // throw; grid_DoFs += total_grid_DoFs; diff --git a/multibody/gpu_mpm/cuda_mpm_solver.cuh b/multibody/gpu_mpm/cuda_mpm_solver.cuh index 09d53d9490b6..a290009622c4 100644 --- a/multibody/gpu_mpm/cuda_mpm_solver.cuh +++ b/multibody/gpu_mpm/cuda_mpm_solver.cuh @@ -20,10 +20,11 @@ template class GpuMpmSolver { public: void RebuildMapping(GpuMpmState *state, bool sort) const; - void CalcFemStateAndForce(GpuMpmState *state, const T& dt) const; - void ParticleToGrid(GpuMpmState *state, const T& dt) const; + void CalcFemStateAndForce(GpuMpmState *state, const T& dt, const bool post_contact=false) const; + void ParticleToGrid(GpuMpmState *state, const T& dt, bool apply_gravity=true, bool apply_elasticity=true) const; void UpdateGrid(GpuMpmState *state, int mpm_bc = -1) const; void GridToParticle(GpuMpmState *state, const T& dt) const; + void ContactGridToParticle(GpuMpmState *state, const T& dt) const; void GpuSync() const; void SyncParticleStateToCpu(GpuMpmState *state) const; void Dump(const GpuMpmState &state, std::string filename) const; diff --git a/multibody/gpu_mpm/math_tools.cuh b/multibody/gpu_mpm/math_tools.cuh index ac8c1ebf392b..389e7e147d7c 100644 --- a/multibody/gpu_mpm/math_tools.cuh +++ b/multibody/gpu_mpm/math_tools.cuh @@ -509,6 +509,133 @@ inline __host__ __device__ void givens_QR(const T *A, T *Q, T *R) { } } +template +inline __host__ __device__ void QR_differential_3x3(const T *Q, const T *R, const T *dF, T *dQ, T *dR) { + // TM QtdF = Q.transpose() * dF; + T QtdF[9], Qt[9]; + transpose<3, 3, T>(Q, Qt); + matmul<3, 3, 3, T>(Qt, dF, QtdF); + + // T w3 = QtdF(1, 0) / R(0, 0); + // T w2 = -QtdF(2, 0) / R(0, 0); + // T w1 = (QtdF(2, 1) + w2 * R(0, 1)) / R(1, 1); + const T w3 = QtdF[1 * 3 + 0] / R[0 * 3 + 0]; + const T w2 = -QtdF[2 * 3 + 0] / R[0 * 3 + 0]; + const T w1 = (QtdF[2 * 3 + 1] + w2 * R[0 * 3 + 1]) / R[1 * 3 + 1]; + + // TM QtdQ; + // QtdQ << 0, -w3, w2, w3, 0, -w1, -w2, w1, 0; + T QtdQ[9] = { + 0, -w3, w2, + w3, 0, -w1, + -w2, w1, 0 + }; + + // dQ = Q * QtdQ; + matmul<3, 3, 3, T>(Q, QtdQ, dQ); + + // dR = Q.transpose() * dF - QtdQ * R; + T QtdQR[9]; + matmul<3, 3, 3, T>(QtdQ, R, QtdQR); + for (int i = 0; i < 9; ++i) dR[i] = QtdF[i] - QtdQR[i]; +} + +template +inline __host__ __device__ void cofactor_matrix_2x2(const T *F, T *A) { + // A(0, 0) = F(1, 1); + A[0 * 2 + 0] = F[1 * 2 + 1]; + // A(1, 0) = -F(0, 1); + A[1 * 2 + 0] = -F[0 * 2 + 1]; + // A(0, 1) = -F(1, 0); + A[0 * 2 + 1] = -F[1 * 2 + 0]; + // A(1, 1) = F(0, 0); + A[1 * 2 + 1] = F[0 * 2 + 0]; +} + +/** + \brief add scaled 2X2 rotational Derivative (dRdF) + \param[in] R Rotation matrix of F + \param[in] S Symmetric matrix of F + \param[in] scale The scale factor + \param[out] M The matrix to add scale * dRdF to +*/ + +template +__device__ __host__ +void add_scaled_rotational_derivative_2x2(const T* R, const T* S, const T scale, T* M) { + // T trace_s = S.trace(); + const T trace_s = S[0 * 2 + 0] + S[1 * 2 + 1]; + const T scale_over_trace = scale / trace_s; + // Matrix RE; + // RE << -R(0, 1), R(0, 0), -R(1, 1), R(1, 0); + // Vector vec_RE = vec(RE); + const T vec_RE[4] = { + -R[0 * 2 + 1], -R[1 * 2 + 1], R[0 * 2 + 0], R[1 * 2 + 0] + }; + + // M.noalias() += scale_over_trace * (vec_RE * (vec_RE.transpose())); + #pragma unroll + for (int i = 0; i < 4; ++i) + #pragma unroll + for (int j = 0; j < 4; ++j) + M[i * 4 + j] += scale_over_trace * vec_RE[i] * vec_RE[j]; +} + +template +__device__ __host__ +void add_scaled_rotational_differential_2x2(const T* R, const T* S, const T* dF, const T scale, T* M) { + // T trace_s = S.trace(); + const T trace_s = S[0 * 2 + 0] + S[1 * 2 + 1]; + + //Majorly exploited but basically derived from "rotationalDifferential' + // T omega = (R(0, 0) * dF(0, 1) + R(1, 0) * dF(1, 1) - R(0, 1) * dF(0, 0) - R(1, 1) * dF(1, 0)) / trace_s * scale; + const T omega = (R[0 * 2 + 0] * dF[0 * 2 + 1] + + R[1 * 2 + 0] * dF[1 * 2 + 1] + - R[0 * 2 + 1] * dF[0 * 2 + 0] + - R[1 * 2 + 1] * dF[1 * 2 + 0]) / trace_s * scale; + + // M(0, 0) -= omega * R(0, 1); + // M(1, 0) -= omega * R(1, 1); + // M(0, 1) += omega * R(0, 0); + // M(1, 1) += omega * R(1, 0); + M[0 * 2 + 0] -= omega * R[0 * 2 + 1]; + M[1 * 2 + 0] -= omega * R[1 * 2 + 1]; + M[0 * 2 + 1] += omega * R[0 * 2 + 0]; + M[1 * 2 + 1] += omega * R[1 * 2 + 0]; +} + +/** + \brief 2X2 add scaled d( JF^(-T) )/dF + \param[in] F input matrix + \param[in] T scale + \param[in,out] result d( JF^(-T) )/dF +*/ +template +__device__ __host__ +void add_scaled_cofactor_matrix_derivative_2x2(const T* F, const T scale, T* result) { + // result(3, 0) += scale; + result[3 * 4 + 0] += scale; + // result(2, 1) -= scale; + result[2 * 4 + 1] -= scale; + // result(1, 2) -= scale; + result[1 * 4 + 2] -= scale; + // result(0, 3) += scale; + result[0 * 4 + 3] += scale; +} + +template +__device__ __host__ +void add_scaled_cofactor_matrix_differential_2x2(const T* F, const T* dF, const T scale, T* M) { + // M(0, 0) += scale * dF(1, 1); + M[0 * 2 + 0] += scale * dF[1 * 2 + 1]; + // M(1, 0) -= scale * dF(0, 1); + M[1 * 2 + 0] -= scale * dF[0 * 2 + 1]; + // M(0, 1) -= scale * dF(1, 0); + M[0 * 2 + 1] -= scale * dF[1 * 2 + 0]; + // M(1, 1) += scale * dF(0, 0); + M[1 * 2 + 1] += scale * dF[0 * 2 + 0]; +} + template inline __host__ __device__ void polar_decompose2x2(const T *A, T *U, T *P) { U[0] = T(1); U[1] = T(0); @@ -677,6 +804,17 @@ inline __host__ __device__ bool cholesky_solve3(const T* H, const T* b, T* x) { return true; } +template +__device__ __host__ +inline void watch_host(const char *name, const T *M) { + printf("%s\n", name); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < m; j++) { + printf("%.5f ", M[i*m+j]); + } + printf("\n"); + } +} template __device__ __host__ diff --git a/multibody/plant/deformable_driver.h b/multibody/plant/deformable_driver.h index 7a541151a325..f485ca068d65 100644 --- a/multibody/plant/deformable_driver.h +++ b/multibody/plant/deformable_driver.h @@ -119,7 +119,7 @@ class DeformableDriver : public ScalarConvertibleComponent { void CalcMpmContactPairs( const systems::Context& context, gmpm::GpuMpmState *mpm_state, - gmpm::MpmParticleContactPairs* result) const { + gmpm::MpmParticleContactPairs* result, const gmpm::config::GpuT margin) const { using GpuT = gmpm::config::GpuT; DRAKE_ASSERT(result != nullptr); long long before_ts = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); @@ -141,10 +141,11 @@ class DeformableDriver : public ScalarConvertibleComponent { // always remember it is type GpuT and should be casted to type T explicitly. std::vector> p_to_geometries = query_object.geometry_state().ComputeSignedDistanceToPoint( - mpm_state->positions_host()[p].template cast(), T(0)); - // identify those that are in contact, i.e. signed_distance < 0 + mpm_state->positions_host()[p].template cast(), T(margin)); + // identify those that are in contact + // NOTE: register a contact point whenever (phi0 > -margin) instead of only registering contact points when phi0 > 0. for (const auto& p2geometry : p_to_geometries) { - if (p2geometry.distance < 0) { + if (p2geometry.distance < margin) { // if particle is inside rigid body, i.e. in contact // note: normal direction // NOTE (changyu): we treat each collision pair as an individual collision particle, @@ -243,21 +244,30 @@ class DeformableDriver : public ScalarConvertibleComponent { mpm_solver_.SyncParticleStateToCpu(&mutable_mpm_state); mpm_solver_.RebuildMapping(&mutable_mpm_state, false); mpm_solver_.CalcFemStateAndForce(&mutable_mpm_state, ddt); - mpm_solver_.ParticleToGrid(&mutable_mpm_state, ddt); + mpm_solver_.ParticleToGrid(&mutable_mpm_state, ddt, false, true); mpm_solver_.UpdateGrid(&mutable_mpm_state, deformable_model_->cpu_mpm_model().config.mpm_bc); - // NOTE (changyu): update contact information at each substep for weak coupling scheme - CalcMpmContactPairs(context, &mutable_mpm_state, &mpm_contact_pairs); - mpm_solver_.CopyContactPairs(&mutable_mpm_state, mpm_contact_pairs); - mpm_solver_.UpdateContact(&mutable_mpm_state, current_frame, substep, ddt, - deformable_model_->cpu_mpm_model().config.contact_friction_mu, - deformable_model_->cpu_mpm_model().config.contact_stiffness, - deformable_model_->cpu_mpm_model().config.contact_damping, - deformable_model_->cpu_mpm_model().config.write_files, - deformable_model_->cpu_mpm_model().config.exact_line_search); mpm_solver_.GridToParticle(&mutable_mpm_state, ddt); substep += 1; } + + mpm_solver_.SyncParticleStateToCpu(&mutable_mpm_state); + mpm_solver_.RebuildMapping(&mutable_mpm_state, false); + mpm_solver_.ParticleToGrid(&mutable_mpm_state, dt, true, false); + mpm_solver_.UpdateGrid(&mutable_mpm_state, deformable_model_->cpu_mpm_model().config.mpm_bc); + + // NOTE (changyu): update contact information at each substep for strong coupling scheme + CalcMpmContactPairs(context, &mutable_mpm_state, &mpm_contact_pairs, GpuT(deformable_model_->cpu_mpm_model().config.margin)); + mpm_solver_.CopyContactPairs(&mutable_mpm_state, mpm_contact_pairs); + mpm_solver_.UpdateContact(&mutable_mpm_state, current_frame, 0, dt, + deformable_model_->cpu_mpm_model().config.contact_friction_mu, + deformable_model_->cpu_mpm_model().config.contact_stiffness, + deformable_model_->cpu_mpm_model().config.contact_damping, + deformable_model_->cpu_mpm_model().config.write_files, + deformable_model_->cpu_mpm_model().config.exact_line_search + ); + mpm_solver_.ContactGridToParticle(&mutable_mpm_state, dt); + mpm_solver_.CalcFemStateAndForce(&mutable_mpm_state, dt, true); FinalizeExternalContactForces(&mutable_mpm_state, dt); // logging