diff --git a/deepmd/pt/cxx_op.py b/deepmd/pt/cxx_op.py index 7887b5722c..d46f20a0bc 100644 --- a/deepmd/pt/cxx_op.py +++ b/deepmd/pt/cxx_op.py @@ -2,8 +2,12 @@ import platform import torch +from packaging.version import ( + Version, +) from deepmd.env import ( + GLOBAL_CONFIG, SHARED_LIB_DIR, ) @@ -31,7 +35,59 @@ def load_library(module_name: str) -> bool: module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve() if module_file.is_file(): - torch.ops.load_library(module_file) + try: + torch.ops.load_library(module_file) + except OSError as e: + # check: CXX11_ABI_FLAG; version + # from our op + PT_VERSION = GLOBAL_CONFIG["pt_version"] + PT_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["pt_cxx11_abi_flag"]) + # from torch + # strip the local version + pt_py_version = Version(torch.__version__).public + pt_cxx11_abi_flag = int(torch.compiled_with_cxx11_abi()) + + if PT_CXX11_ABI_FLAG != pt_cxx11_abi_flag: + raise RuntimeError( + "This deepmd-kit package was compiled with " + "CXX11_ABI_FLAG=%d, but PyTorch runtime was compiled " + "with CXX11_ABI_FLAG=%d. These two library ABIs are " + "incompatible and thus an error is raised when loading %s. " + "You need to rebuild deepmd-kit against this PyTorch " + "runtime." + % ( + PT_CXX11_ABI_FLAG, + pt_cxx11_abi_flag, + module_name, + ) + ) from e + + # different versions may cause incompatibility, see TF + if PT_VERSION != pt_py_version: + raise RuntimeError( + "The version of PyTorch used to compile this " + f"deepmd-kit package is {PT_VERSION}, but the version of PyTorch " + f"runtime you are using is {pt_py_version}. These two versions are " + f"incompatible and thus an error is raised when loading {module_name}. " + f"You need to install PyTorch {PT_VERSION}, or rebuild deepmd-kit " + f"against PyTorch {pt_py_version}.\nIf you are using a wheel from " + "PyPI, you may consider to install deepmd-kit execuating " + "`DP_ENABLE_PYTORCH=1 pip install deepmd-kit --no-binary deepmd-kit` " + "instead." + ) from e + error_message = ( + "This deepmd-kit package is inconsitent with PyTorch " + f"Runtime, thus an error is raised when loading {module_name}. " + "You need to rebuild deepmd-kit against this PyTorch " + "runtime." + ) + if PT_CXX11_ABI_FLAG == 1: + # #1791 + error_message += ( + "\nWARNING: devtoolset on RHEL6 and RHEL7 does not support _GLIBCXX_USE_CXX11_ABI=1. " + "See https://bugzilla.redhat.com/show_bug.cgi?id=1546704" + ) + raise RuntimeError(error_message) from e return True return False diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py index ef192eab1f..038fc11994 100644 --- a/deepmd/pt/entrypoints/main.py +++ b/deepmd/pt/entrypoints/main.py @@ -23,6 +23,9 @@ from deepmd import ( __version__, ) +from deepmd.env import ( + GLOBAL_CONFIG, +) from deepmd.loggers.loggers import ( set_log_handles, ) @@ -199,10 +202,19 @@ def get_ngpus(self) -> int: def get_backend_info(self) -> dict: """Get backend information.""" + if ENABLE_CUSTOMIZED_OP: + op_info = { + "build with PT ver": GLOBAL_CONFIG["pt_version"], + "build with PT inc": GLOBAL_CONFIG["pt_include_dir"].replace(";", "\n"), + "build with PT lib": GLOBAL_CONFIG["pt_libs"].replace(";", "\n"), + } + else: + op_info = None return { "Backend": "PyTorch", "PT ver": f"v{torch.__version__}-g{torch.version.git_version[:11]}", "Enable custom OP": ENABLE_CUSTOMIZED_OP, + **op_info, } diff --git a/source/config/run_config.ini b/source/config/run_config.ini index 5cdaa35317..fb96ad224e 100644 --- a/source/config/run_config.ini +++ b/source/config/run_config.ini @@ -10,5 +10,9 @@ TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@ TF_LIBS = @TensorFlow_LIBRARY_PATH@ TF_VERSION = @TENSORFLOW_VERSION@ TF_CXX11_ABI_FLAG = @OP_CXX_ABI@ +PT_INCLUDE_DIR = @TORCH_INCLUDE_DIRS@ +PT_LIBS = @PyTorch_LIBRARY_PATH@ +PT_VERSIOn = @Torch_VERSION@ +PT_CXX11_ABI_FLAG = @OP_CXX_ABI_PT@ MODEL_VERSION=@MODEL_VERSION@ DP_VARIANT=@DP_VARIANT@