Open
Description
Adding a timestep wizard when using a GPU appears to lead to a CUDA Kernel issue where something is requesting more resources than available. Tested on Oceananigans v0.95.11 with Julia 1.11.1. This bug persists on a different system with the same installations so it appears not to be an installation issue.
MWE:
using Oceananigans
grid = RectilinearGrid(GPU(), size=(100, 100), extent=(10, 10), topology=(Periodic, Periodic, Flat))
model = NonhydrostaticModel(; grid)
simulation = Simulation(model, Δt=1, stop_time=10)
conjure_time_step_wizard!(simulation)
run!(simulation)
Seems to be related to this.
[ Info: Initializing simulation...
ERROR: LoadError: Number of threads per block exceeds kernel limit (1024 > 896).
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] diagnose_launch_failure(f::CUDA.CuFunction, err::CUDA.CuError; blockdim::CUDA.CuDim3, threaddim::CUDA.CuDim3, shmem::Int64)
@ CUDA ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:120
[3] launch(::CUDA.CuFunction, ::CUDA.KernelState, ::CartesianIndices{…}, ::CartesianIndices{…}, ::CUDA.CuDeviceArray{…}, ::KernelFunctionOperation{…}; blocks::Int64, threads::Int64, cooperative::Bool, shmem::Int64, stream::CUDA.CuStream)
@ CUDA ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:73
[4] launch
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:52 [inlined]
[5] #972
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:189 [inlined]
[6] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:149 [inlined]
[7] macro expansion
@ ./none:0 [inlined]
[8] convert_arguments
@ ./none:0 [inlined]
[9] #cudacall#971
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:191 [inlined]
[10] cudacall
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:187 [inlined]
[11] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:279 [inlined]
[12] macro expansion
@ ./none:0 [inlined]
[13] (::CUDA.HostKernel{…})(::typeof(identity), ::typeof(min), ::Nothing, ::CartesianIndices{…}, ::CartesianIndices{…}, ::Val{…}, ::CUDA.CuDeviceArray{…}, ::KernelFunctionOperation{…}; convert::Val{…}, call_kwargs::@Kwargs{…})
@ CUDA ./none:0
[14] AbstractKernel
@ ./none:0 [inlined]
[15] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:114 [inlined]
[16] mapreducedim!(f::typeof(identity), op::typeof(min), R::SubArray{…}, A::KernelFunctionOperation{…}; init::Nothing)
@ CUDA ~/.julia/packages/CUDA/1kIOw/src/mapreduce.jl:271
[17] mapreducedim!(f::typeof(identity), op::typeof(min), R::SubArray{…}, A::KernelFunctionOperation{…})
@ CUDA ~/.julia/packages/CUDA/1kIOw/src/mapreduce.jl:169
[18] mapreducedim!(f::Function, op::Function, R::SubArray{…}, A::KernelFunctionOperation{…})
@ GPUArrays ~/.julia/packages/GPUArrays/uiVyU/src/host/mapreduce.jl:10
[19] #minimum!#969
@ ./reducedim.jl:1006 [inlined]
[20] minimum!(f::Function, r::Field{…}, a::KernelFunctionOperation{…}; condition::Nothing, mask::Float64, kwargs::@Kwargs{…})
@ Oceananigans.Fields ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:677
[21] minimum(f::Function, c::KernelFunctionOperation{…}; condition::Nothing, mask::Float64, dims::Function)
@ Oceananigans.Fields ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:707
[22] minimum
@ ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:696 [inlined]
[23] minimum
@ ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:716 [inlined]
[24] cell_advection_timescale
@ ~/.julia/packages/Oceananigans/rp0vf/src/Advection/cell_advection_timescale.jl:16 [inlined]
[25] cell_advection_timescale
@ ~/.julia/packages/Oceananigans/rp0vf/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl:80 [inlined]
[26] new_time_step
@ ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/time_step_wizard.jl:103 [inlined]
[27] TimeStepWizard
@ ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/time_step_wizard.jl:117 [inlined]
[28] (::Callback{…})(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/callback.jl:15
[29] initialize!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:226
[30] time_step!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:118
[31] run!(sim::Simulation{…}; pickup::Bool)
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:102
[32] run!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:91
[33] top-level scope
@ /reserved/local/mcrowe/Julia_scripts/SI_transport/MWE.jl:7
[34] include(fname::String)
@ Main ./sysimg.jl:38
[35] top-level scope
@ REPL[2]:1
in expression starting at /reserved/local/mcrowe/Julia_scripts/SI_transport/MWE.jl:7
caused by: CUDA error: too many resources requested for launch (code 701, ERROR_LAUNCH_OUT_OF_RESOURCES)
Stacktrace:
[1] throw_api_error(res::CUDA.cudaError_enum)
@ CUDA ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/libcuda.jl:30
[2] check
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/libcuda.jl:37 [inlined]
[3] cuLaunchKernel
@ ~/.julia/packages/CUDA/1kIOw/lib/utils/call.jl:34 [inlined]
[4] (::CUDA.var"#966#967"{Bool, Int64, CUDA.CuStream, CUDA.CuFunction, CUDA.CuDim3, CUDA.CuDim3})(kernelParams::Vector{Ptr{…}})
@ CUDA ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:66
[5] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:33 [inlined]
[6] macro expansion
@ ./none:0 [inlined]
[7] pack_arguments(::CUDA.var"#966#967"{…}, ::CUDA.KernelState, ::CartesianIndices{…}, ::CartesianIndices{…}, ::CUDA.CuDeviceArray{…}, ::KernelFunctionOperation{…})
@ CUDA ./none:0
[8] launch(::CUDA.CuFunction, ::CUDA.KernelState, ::CartesianIndices{…}, ::CartesianIndices{…}, ::CUDA.CuDeviceArray{…}, ::KernelFunctionOperation{…}; blocks::Int64, threads::Int64, cooperative::Bool, shmem::Int64, stream::CUDA.CuStream)
@ CUDA ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:59
[9] launch
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:52 [inlined]
[10] #972
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:189 [inlined]
[11] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:149 [inlined]
[12] macro expansion
@ ./none:0 [inlined]
[13] convert_arguments
@ ./none:0 [inlined]
[14] #cudacall#971
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:191 [inlined]
[15] cudacall
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/execution.jl:187 [inlined]
[16] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:279 [inlined]
[17] macro expansion
@ ./none:0 [inlined]
[18] (::CUDA.HostKernel{…})(::typeof(identity), ::typeof(min), ::Nothing, ::CartesianIndices{…}, ::CartesianIndices{…}, ::Val{…}, ::CUDA.CuDeviceArray{…}, ::KernelFunctionOperation{…}; convert::Val{…}, call_kwargs::@Kwargs{…})
@ CUDA ./none:0
[19] AbstractKernel
@ ./none:0 [inlined]
[20] macro expansion
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:114 [inlined]
[21] mapreducedim!(f::typeof(identity), op::typeof(min), R::SubArray{…}, A::KernelFunctionOperation{…}; init::Nothing)
@ CUDA ~/.julia/packages/CUDA/1kIOw/src/mapreduce.jl:271
[22] mapreducedim!(f::typeof(identity), op::typeof(min), R::SubArray{…}, A::KernelFunctionOperation{…})
@ CUDA ~/.julia/packages/CUDA/1kIOw/src/mapreduce.jl:169
[23] mapreducedim!(f::Function, op::Function, R::SubArray{…}, A::KernelFunctionOperation{…})
@ GPUArrays ~/.julia/packages/GPUArrays/uiVyU/src/host/mapreduce.jl:10
[24] #minimum!#969
@ ./reducedim.jl:1006 [inlined]
[25] minimum!(f::Function, r::Field{…}, a::KernelFunctionOperation{…}; condition::Nothing, mask::Float64, kwargs::@Kwargs{…})
@ Oceananigans.Fields ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:677
[26] minimum(f::Function, c::KernelFunctionOperation{…}; condition::Nothing, mask::Float64, dims::Function)
@ Oceananigans.Fields ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:707
[27] minimum
@ ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:696 [inlined]
[28] minimum
@ ~/.julia/packages/Oceananigans/rp0vf/src/Fields/field.jl:716 [inlined]
[29] cell_advection_timescale
@ ~/.julia/packages/Oceananigans/rp0vf/src/Advection/cell_advection_timescale.jl:16 [inlined]
[30] cell_advection_timescale
@ ~/.julia/packages/Oceananigans/rp0vf/src/Models/NonhydrostaticModels/NonhydrostaticModels.jl:80 [inlined]
[31] new_time_step
@ ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/time_step_wizard.jl:103 [inlined]
[32] TimeStepWizard
@ ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/time_step_wizard.jl:117 [inlined]
[33] (::Callback{…})(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/callback.jl:15
[34] initialize!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:226
[35] time_step!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:118
[36] run!(sim::Simulation{…}; pickup::Bool)
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:102
[37] run!(sim::Simulation{…})
@ Oceananigans.Simulations ~/.julia/packages/Oceananigans/rp0vf/src/Simulations/run.jl:91
[38] top-level scope
@ /reserved/local/mcrowe/Julia_scripts/SI_transport/MWE.jl:7
[39] include(fname::String)
@ Main ./sysimg.jl:38
[40] top-level scope
@ REPL[2]:1
Some type information was truncated. Use `show(err)` to see complete types.