Skip to content

Add smoke tests conv,linalg,compile. And better version check. #1333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ else
conda env remove -n ${ENV_NAME}
else



# Special case Pypi installation package, only applicable to linux nightly CUDA 11.7 builds, wheel package
if [[ ${TARGET_OS} == 'linux' && ${MATRIX_CHANNEL} == 'nightly' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy
if [[ ${TARGET_OS} == 'linux' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
INSTALLATION_PYPI=${MATRIX_INSTALLATION/"cu117"/"cu117_pypi_cudnn"}
INSTALLATION_PYPI=${INSTALLATION_PYPI/"torchvision torchaudio"/""}
INSTALLATION_PYPI=${INSTALLATION_PYPI/"index-url"/"extra-index-url"}
Expand All @@ -29,9 +31,11 @@ else
conda env remove -p ${ENV_NAME}_pypi
fi

conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy pillow
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
conda activate ${ENV_NAME}
INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
INSTALLATION=${INSTALLATION/"extra-index-url"/"index-url"}
eval $INSTALLATION

if [[ ${TARGET_OS} == 'linux' ]]; then
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/validate-nightly-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ jobs:
nightly:
uses: ./.github/workflows/validate-binaries.yml
with:
channel: nightly
channel: test
os: all
limit-win-builds: enable
limit-win-builds: disable
81 changes: 70 additions & 11 deletions test/smoke_test/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
# use installation env variable to tell if it is nightly channel
installation_str = os.getenv("MATRIX_INSTALLATION")
channel = os.getenv("MATRIX_CHANNEL")
stable_version = os.getenv("MATRIX_STABLE_VERSION")
package_type = os.getenv("MATRIX_PACKAGE_TYPE")

is_cuda_system = gpu_arch_type == "cuda"
SCRIPT_DIR = Path(__file__).parent
NIGHTLY_ALLOWED_DELTA = 3
Expand All @@ -31,6 +33,16 @@
},
]

def check_version(package: str) -> None:
# only makes sense to check nightly package where dates are known
if channel == "nightly":
check_nightly_binaries_date(options.package)
else:
if not torch.__version__.startswith(stable_version):
raise RuntimeError(
f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
)

def check_nightly_binaries_date(package: str) -> None:
from datetime import datetime, timedelta
format_dt = '%Y%m%d'
Expand Down Expand Up @@ -58,6 +70,7 @@ def check_nightly_binaries_date(package: str) -> None:
def test_cuda_runtime_errors_captured() -> None:
cuda_exception_missed=True
try:
print("Testing test_cuda_runtime_errors_captured")
torch._assert_async(torch.tensor(0, device="cuda"))
torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
except RuntimeError as e:
Expand Down Expand Up @@ -95,29 +108,77 @@ def smoke_test_cuda(package: str) -> None:
print(f"torch cudnn: {torch.backends.cudnn.version()}")
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")

# This check has to be run last, since its messing up CUDA runtime
test_cuda_runtime_errors_captured()
# torch.compile is available only on Linux and python 3.8-3.10
if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0):
smoke_test_compile()

# This check has to be run last, since its messing up CUDA runtime.
# Restrict only to conda builds since Wheel seems to crash with
# segmentation fault and don't recover
if(package_type == 'conda'):
test_cuda_runtime_errors_captured()


def smoke_test_conv2d() -> None:
import torch.nn as nn

print("Calling smoke_test_conv2d")
print("Testing smoke_test_conv2d")
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
# non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = torch.randn(20, 16, 50, 100)
output = m(input)
output = basic_conv(input)

if is_cuda_system:
print("Testing smoke_test_conv2d with cuda")
conv = nn.Conv2d(3, 3, 3).cuda()
x = torch.randn(1, 3, 24, 24).cuda()
with torch.cuda.amp.autocast():
out = conv(x)

supported_dtypes = [torch.float16, torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_conv2d with cuda for {dtype}")
conv = basic_conv.to(dtype).cuda()
input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
output = conv(input)

def smoke_test_linalg() -> None:
print("Testing smoke_test_linalg")
A = torch.randn(5, 3)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
U.shape, S.shape, Vh.shape
torch.dist(A, U @ torch.diag(S) @ Vh)

U, S, Vh = torch.linalg.svd(A)
U.shape, S.shape, Vh.shape
torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)

A = torch.randn(7, 5, 3)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
torch.dist(A, U @ torch.diag_embed(S) @ Vh)

if is_cuda_system:
supported_dtypes = [torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_linalg with cuda for {dtype}")
A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
torch.linalg.svd(A)

def smoke_test_compile() -> None:
supported_dtypes = [torch.float32]
def foo(x: torch.Tensor) -> torch.Tensor:
return torch.sin(x) + torch.cos(x)
for dtype in supported_dtypes:
print(f"Testing smoke_test_compile for {dtype}")
x = torch.rand(3, 3, device="cuda").type(dtype)
x_eager = foo(x)
x_pt2 = torch.compile(foo)(x)
print(torch.allclose(x_eager, x_pt2))


def smoke_test_modules():
for module in MODULES:
Expand Down Expand Up @@ -146,15 +207,13 @@ def main() -> None:
)
options = parser.parse_args()
print(f"torch: {torch.__version__}")
check_version(options.package)
smoke_test_conv2d()
smoke_test_linalg()

if options.package == "all":
smoke_test_modules()

# only makes sense to check nightly package where dates are known
if installation_str.find("nightly") != -1:
check_nightly_binaries_date(options.package)

smoke_test_cuda(options.package)


Expand Down