vulkan validation layer enables NCNN_LOGE in shader source (#5963)

nihui · web-flow · commit 84970eed4d73 · 2025-03-31T16:17:16.000+08:00
* NCNN_LOGE in glsl

* Update glsl-extension.md
diff --git a/docs/developer-guide/glsl-extension.md b/docs/developer-guide/glsl-extension.md
@@ -76,13 +76,6 @@ You can write shader code with ncnn glsl extension, compiled to spir-v using ncn
 static const char my_glsl_data[] = R"(
 #version 450
 
-#if NCNN_fp16_storage
-#extension GL_EXT_shader_16bit_storage: require
-#endif
-#if NCNN_fp16_arithmetic
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
-#endif
-
 layout (binding = 0) readonly buffer a_blob { sfpvec4 a_blob_data[]; };
 layout (binding = 1) writeonly buffer b_blob { sfpvec4 b_blob_data[]; };
 
@@ -336,17 +329,106 @@ judge if the current platform is moltenvk, for enabling some platform-specific w
 #endif
 ```
 
-# option macros
+ncnn additionally defines most of the vulkan device-related features as macros, which we can use to distinguish different platforms, device extensions, features, and properties
+
+### extension macros
+
+When the device supports an extension, `ncnn_<extension_name>` is defined as the extension version
+
+```c
+void main()
+{
+#if ncnn_VK_KHR_16bit_storage
+    // here is the code for any device that supports VK_KHR_16bit_storage
+#endif
+
+#if ncnn_VK_KHR_sampler_ycbcr_conversion >= 10
+    // here is the code for any device that supports VK_KHR_sampler_ycbcr_conversion and version >= 10
+#endif
+}
+```
+
+### device feature and property macros
+
+ncnn will query device features and properties and then define them as macros.
+
+The macro name is `ncnn_<feature_name>` or `ncnn_<property_name>`
+
+The `GL_EXT_shader_explicit_arithmetic_types_int64` extension will be automatically enabled without explicit code indication when the device supports `shaderInt64`
+
+```c
+void main()
+{
+#if ncnn_robustBufferAccess
+    // here is the code for any device that supports robustBufferAccess feature
+#endif
+
+#if ncnn_vendorID == 4318
+    // here is the vendor specific code, 4318 is nvidia graphics
+#endif
+
+#if ncnn_subgroupSize == 32
+    // here is the code path optimized for subgroup_size == 32
+#endif
+
+    // use macro definitions
+    uint size; // dynamic value from some previous routines
+    if (size < ncnn_subgroupSize)
+    {
+#if ncnn_supportedOperations & 4
+        // subgroup support arithmetic
+#endif
+
+#if ncnn_subgroup_arithmetic
+        // shorthand style for checking subgroup arithmetic :P
+#endif
+    }
+}
+```
+
+### validation layer macros
+
+ncnn will define some additional convenient macros when the vulkan validation layer enabled
+
+* `ncnn_enable_validataion_layer`
+* `NCNN_LOGE`
+
+currently, you have to modify the `ENABLE_VALIDATION_LAYER` definition at the beginning of `src/gpu.cpp` to `1` to enable these macros.
+
+The `GL_EXT_debug_printf` extension will be enabled automatically without explicitly specifying it in your code.
+
+```c
+void main()
+{
+    int gx = int(gl_GlobalInvocationID.x);
+
+#if ncnn_enable_validataion_layer
+    NCNN_LOGE("gx = %d\n", gx);
+#endif
+}
+```
+
+At runtime, `NCNN_LOGE` will print out the value of `gx`
+
+### option macros
 
 enable glsl extension only if user enable some options
 
+The `GL_EXT_shader_16bit_storage` extension will be automatically enabled without explicit code indication when the device supports 16-bit storage and the user turns on `opt.use_fp16_storage`
+
+The `GL_EXT_shader_explicit_arithmetic_types_float16` extension will be automatically enabled without explicit code indication when the device supports 16-bit arithmetic and the user turns on `opt.use_fp16_arithmetic`
+
 ```c
+void main()
+{
 #if NCNN_fp16_storage
-#extension GL_EXT_shader_16bit_storage: require
+    // the user enable fp16 storage option and the device has fp16 storage support
 #endif
+
 #if NCNN_fp16_arithmetic
-#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
+    // the user enable fp16 arithmetic option and the device has fp16 arithmetic support
 #endif
+}
 ```
 
 declare descriptor binding for image or buffer
diff --git a/src/gpu.cpp b/src/gpu.cpp
@@ -4925,6 +4925,14 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option
             DD_APPEND_PROPERTY(requiredSubgroupSizeStages)
         }
 
+#if ENABLE_VALIDATION_LAYER
+        if (info.support_VK_KHR_shader_non_semantic_info())
+        {
+            device_defines.append("enable_validataion_layer", VK_TRUE);
+            custom_defines.append("NCNN_LOGE", "debugPrintfEXT");
+        }
+#endif
+
 #undef DD_APPEND_PROPERTY
     }
     else
@@ -5038,6 +5046,11 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option
     {
         custom_exts += "#extension GL_EXT_shader_explicit_arithmetic_types_float16: require\n";
     }
+#if ENABLE_VALIDATION_LAYER
+    {
+        custom_exts += "#extension GL_EXT_debug_printf : require\n";
+    }
+#endif
 
     // debug
     // NCNN_LOGE("%s", define_macro_data.c_str());
@@ -5048,16 +5061,37 @@ int compile_spirv_module(const char* comp_data, int comp_data_size, const Option
         glslang::TShader s(EShLangCompute);
 
         // split shader source by token "#version 450\n"
-        int nversion = 0;
-        sscanf(comp_data, "#version %*d\n%n", &nversion);
-        if (nversion == 0)
+        int version_end_pos = -1;
         {
-            NCNN_LOGE("shader source has no #version token");
-            return -1;
+            for (int i = 0; i < comp_data_size - 8; i++)
+            {
+                if (strncmp(comp_data + i, "#version", 8) != 0)
+                    continue;
+
+                // #version shall be the very beginning or after newline
+                if (i != 0 && comp_data[i - 1] != '\n')
+                    continue;
+
+                int nversion = 0;
+                sscanf(comp_data + i, "#version %*d\n%n", &nversion);
+                if (nversion == 0)
+                    continue;
+
+                version_end_pos = i + nversion;
+                break;
+            }
+
+            if (version_end_pos == -1)
+            {
+                NCNN_LOGE("shader source has no #version token");
+                return -1;
+            }
+
+            // NCNN_LOGE("version_end_pos = %d", version_end_pos);
         }
 
-        const char* comp_data_2 = comp_data + nversion;
-        int comp_data_size_1 = nversion;
+        const char* comp_data_2 = comp_data + version_end_pos;
+        int comp_data_size_1 = version_end_pos;
         int comp_data_size_2 = comp_data_size - comp_data_size_1;
 
         const char* comp_datas[4] = {comp_data, custom_exts.c_str(), define_macro_data.c_str(), comp_data_2};