Skip to content

Commit b284dbd

Browse files
authored
discover VK_KHR_shader_non_semantic_info, checked convolution imagestore (#5955)
1 parent eed257d commit b284dbd

12 files changed

+102
-64
lines changed

.github/workflows/test-coverage.yml

-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ jobs:
252252
cd build
253253
lcov -d ./src -c -o lcov.info
254254
lcov -r lcov.info '/usr/*' -o lcov.info
255-
lcov -r lcov.info '*/install/*' -o lcov.info
256255
lcov -r lcov.info '*/build/*' -o lcov.info
257256
lcov --list lcov.info
258257

src/gpu.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ class GpuInfoPrivate
334334
int support_VK_KHR_sampler_ycbcr_conversion;
335335
int support_VK_KHR_shader_float16_int8;
336336
int support_VK_KHR_shader_float_controls;
337+
int support_VK_KHR_shader_non_semantic_info;
337338
int support_VK_KHR_shader_subgroup_extended_types;
338339
int support_VK_KHR_shader_subgroup_rotate;
339340
int support_VK_KHR_storage_buffer_storage_class;
@@ -670,6 +671,7 @@ int GpuInfoPrivate::query_extensions()
670671
support_VK_KHR_sampler_ycbcr_conversion = 0;
671672
support_VK_KHR_shader_float16_int8 = 0;
672673
support_VK_KHR_shader_float_controls = 0;
674+
support_VK_KHR_shader_non_semantic_info = 0;
673675
support_VK_KHR_shader_subgroup_extended_types = 0;
674676
support_VK_KHR_shader_subgroup_rotate = 0;
675677
support_VK_KHR_storage_buffer_storage_class = 0;
@@ -733,6 +735,8 @@ int GpuInfoPrivate::query_extensions()
733735
support_VK_KHR_shader_float16_int8 = exp.specVersion;
734736
else if (strcmp(exp.extensionName, "VK_KHR_shader_float_controls") == 0)
735737
support_VK_KHR_shader_float_controls = exp.specVersion;
738+
else if (strcmp(exp.extensionName, "VK_KHR_shader_non_semantic_info") == 0)
739+
support_VK_KHR_shader_non_semantic_info = exp.specVersion;
736740
else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_extended_types") == 0)
737741
support_VK_KHR_shader_subgroup_extended_types = exp.specVersion;
738742
else if (strcmp(exp.extensionName, "VK_KHR_shader_subgroup_rotate") == 0)
@@ -1552,6 +1556,11 @@ int GpuInfo::support_VK_KHR_shader_float_controls() const
15521556
return d->support_VK_KHR_shader_float_controls;
15531557
}
15541558

1559+
int GpuInfo::support_VK_KHR_shader_non_semantic_info() const
1560+
{
1561+
return d->support_VK_KHR_shader_non_semantic_info;
1562+
}
1563+
15551564
int GpuInfo::support_VK_KHR_shader_subgroup_extended_types() const
15561565
{
15571566
return d->support_VK_KHR_shader_subgroup_extended_types;
@@ -2716,6 +2725,8 @@ VulkanDevice::VulkanDevice(int device_index)
27162725
enabledExtensions.push_back("VK_KHR_shader_float16_int8");
27172726
if (info.support_VK_KHR_shader_float_controls())
27182727
enabledExtensions.push_back("VK_KHR_shader_float_controls");
2728+
if (info.support_VK_KHR_shader_non_semantic_info())
2729+
enabledExtensions.push_back("VK_KHR_shader_non_semantic_info");
27192730
if (info.support_VK_KHR_shader_subgroup_extended_types())
27202731
enabledExtensions.push_back("VK_KHR_shader_subgroup_extended_types");
27212732
if (info.support_VK_KHR_shader_subgroup_rotate())

src/gpu.h

+1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ class NCNN_EXPORT GpuInfo
317317
int support_VK_KHR_sampler_ycbcr_conversion() const;
318318
int support_VK_KHR_shader_float16_int8() const;
319319
int support_VK_KHR_shader_float_controls() const;
320+
int support_VK_KHR_shader_non_semantic_info() const;
320321
int support_VK_KHR_shader_subgroup_extended_types() const;
321322
int support_VK_KHR_shader_subgroup_rotate() const;
322323
int support_VK_KHR_storage_buffer_storage_class() const;

src/layer/vulkan/shader/convolution.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,16 @@ void main()
202202

203203
#if NCNN_image_shader
204204
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
205-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208-
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
209-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
210-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
211-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
205+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208+
if (gz2.y < psc(outc))
209+
{
210+
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
211+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
212+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
213+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
214+
}
212215
#else
213216
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
214217

src/layer/vulkan/shader/convolution_pack1to4.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,16 @@ void main()
202202

203203
#if NCNN_image_shader
204204
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
205-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208-
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
209-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
210-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
211-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
205+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208+
if (gz2.y < psc(outc))
209+
{
210+
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
211+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
212+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
213+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
214+
}
212215
#else
213216
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
214217

src/layer/vulkan/shader/convolution_pack1to8.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -220,13 +220,16 @@ void main()
220220

221221
#if NCNN_image_shader
222222
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
223-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
224-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
225-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
226-
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
227-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
228-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
229-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
223+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
224+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
225+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
226+
if (gz2.y < psc(outc))
227+
{
228+
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
229+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
230+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
231+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
232+
}
230233
#else
231234
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
232235

src/layer/vulkan/shader/convolution_pack4.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,16 @@ void main()
233233

234234
#if NCNN_image_shader
235235
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
236-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
237-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
238-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
239-
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
240-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
241-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
242-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
236+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
237+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
238+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
239+
if (gz2.y < psc(outc))
240+
{
241+
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
242+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
243+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
244+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
245+
}
243246
#else
244247
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
245248

src/layer/vulkan/shader/convolution_pack4to1.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,16 @@ void main()
202202

203203
#if NCNN_image_shader
204204
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
205-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208-
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
209-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
210-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
211-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
205+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
206+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
207+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
208+
if (gz2.y < psc(outc))
209+
{
210+
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
211+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
212+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
213+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
214+
}
212215
#else
213216
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
214217

src/layer/vulkan/shader/convolution_pack4to8.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -348,13 +348,16 @@ void main()
348348

349349
#if NCNN_image_shader
350350
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
351-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
352-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
353-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
354-
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
355-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
356-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
357-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
351+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
352+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
353+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
354+
if (gz2.y < psc(outc))
355+
{
356+
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
357+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
358+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
359+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
360+
}
358361
#else
359362
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
360363

src/layer/vulkan/shader/convolution_pack8.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -348,13 +348,16 @@ void main()
348348

349349
#if NCNN_image_shader
350350
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
351-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
352-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
353-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
354-
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
355-
image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
356-
image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
357-
image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
351+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
352+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
353+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
354+
if (gz2.y < psc(outc))
355+
{
356+
image3d_st8(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
357+
if (gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
358+
if (gy2.y < psc(outh)) image3d_st8(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
359+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st8(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
360+
}
358361
#else
359362
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
360363

src/layer/vulkan/shader/convolution_pack8to1.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,16 @@ void main()
204204

205205
#if NCNN_image_shader
206206
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
207-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
208-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
209-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
210-
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
211-
image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
212-
image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
213-
image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
207+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
208+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
209+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
210+
if (gz2.y < psc(outc))
211+
{
212+
image3d_st1(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
213+
if (gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
214+
if (gy2.y < psc(outh)) image3d_st1(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
215+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st1(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
216+
}
214217
#else
215218
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
216219

src/layer/vulkan/shader/convolution_pack8to4.comp

+10-7
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,16 @@ void main()
264264

265265
#if NCNN_image_shader
266266
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.x), sum0);
267-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
268-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
269-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
270-
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
271-
image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
272-
image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
273-
image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
267+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.x), sum1);
268+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.x), sum2);
269+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.x), sum3);
270+
if (gz2.y < psc(outc))
271+
{
272+
image3d_st4(top_blob, ivec3(gx2.x, gy2.x, gz2.y), sum4);
273+
if (gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.x, gz2.y), sum5);
274+
if (gy2.y < psc(outh)) image3d_st4(top_blob, ivec3(gx2.x, gy2.y, gz2.y), sum6);
275+
if (gy2.y < psc(outh) && gx2.y < psc(outw)) image3d_st4(top_blob, ivec3(gx2.y, gy2.y, gz2.y), sum7);
276+
}
274277
#else
275278
const ivec2 gi = gz2 * psc(outcstep) + gy * psc(outw) + gx;
276279

0 commit comments

Comments
 (0)