Skip to content

Commit bf13c30

Browse files
authored
define device feature macros for glslang, discover VK_EXT_shader_atomic_float and VK_EXT_shader_atomic_float2 (#5949)
1 parent 1d1ad06 commit bf13c30

File tree

12 files changed

+2015
-1706
lines changed

12 files changed

+2015
-1706
lines changed

python/src/main.cpp

+1-66
Original file line numberDiff line numberDiff line change
@@ -214,72 +214,7 @@ PYBIND11_MODULE(ncnn, m)
214214
.def_readwrite("use_int8_arithmetic", &Option::use_int8_arithmetic)
215215
.def_readwrite("use_packing_layout", &Option::use_packing_layout)
216216
.def_readwrite("use_shader_pack8", &Option::use_shader_pack8)
217-
.def_property(
218-
"use_subgroup_basic", [](const Option& opt) {
219-
return opt.use_subgroup_basic;
220-
}, [](Option& opt, bool v) {
221-
opt.use_subgroup_basic = v;
222-
})
223-
.def_property(
224-
"use_subgroup_vote", [](const Option& opt) {
225-
return opt.use_subgroup_vote;
226-
}, [](Option& opt, bool v) {
227-
opt.use_subgroup_vote = v;
228-
})
229-
.def_property(
230-
"use_subgroup_arithmetic", [](const Option& opt) {
231-
return opt.use_subgroup_arithmetic;
232-
}, [](Option& opt, bool v) {
233-
opt.use_subgroup_arithmetic = v;
234-
})
235-
.def_property(
236-
"use_subgroup_ballot", [](const Option& opt) {
237-
return opt.use_subgroup_ballot;
238-
}, [](Option& opt, bool v) {
239-
opt.use_subgroup_ballot = v;
240-
})
241-
.def_property(
242-
"use_subgroup_shuffle", [](const Option& opt) {
243-
return opt.use_subgroup_shuffle;
244-
}, [](Option& opt, bool v) {
245-
opt.use_subgroup_shuffle = v;
246-
})
247-
.def_property(
248-
"use_subgroup_shuffle_relative", [](const Option& opt) {
249-
return opt.use_subgroup_shuffle_relative;
250-
}, [](Option& opt, bool v) {
251-
opt.use_subgroup_shuffle_relative = v;
252-
})
253-
.def_property(
254-
"use_subgroup_clustered", [](const Option& opt) {
255-
return opt.use_subgroup_clustered;
256-
}, [](Option& opt, bool v) {
257-
opt.use_subgroup_clustered = v;
258-
})
259-
.def_property(
260-
"use_subgroup_quad", [](const Option& opt) {
261-
return opt.use_subgroup_quad;
262-
}, [](Option& opt, bool v) {
263-
opt.use_subgroup_quad = v;
264-
})
265-
.def_property(
266-
"use_subgroup_rotate", [](const Option& opt) {
267-
return opt.use_subgroup_rotate;
268-
}, [](Option& opt, bool v) {
269-
opt.use_subgroup_rotate = v;
270-
})
271-
.def_property(
272-
"use_subgroup_rotate_clustered", [](const Option& opt) {
273-
return opt.use_subgroup_rotate_clustered;
274-
}, [](Option& opt, bool v) {
275-
opt.use_subgroup_rotate_clustered = v;
276-
})
277-
.def_property(
278-
"use_subgroup_extended_types", [](const Option& opt) {
279-
return opt.use_subgroup_extended_types;
280-
}, [](Option& opt, bool v) {
281-
opt.use_subgroup_extended_types = v;
282-
})
217+
.def_readwrite("use_subgroup_ops", &Option::use_subgroup_ops)
283218
.def_readwrite("use_image_storage", &Option::use_image_storage)
284219
.def_readwrite("use_tensor_storage", &Option::use_tensor_storage);
285220

python/tests/test_option.py

+4-19
Original file line numberDiff line numberDiff line change
@@ -108,25 +108,10 @@ def test_option():
108108
opt.use_shader_pack8 = False
109109
assert opt.use_shader_pack8 == False
110110

111-
opt.use_subgroup_basic = True
112-
assert opt.use_subgroup_basic == True
113-
opt.use_subgroup_basic = False
114-
assert opt.use_subgroup_basic == False
115-
116-
opt.use_subgroup_vote = True
117-
assert opt.use_subgroup_vote == True
118-
opt.use_subgroup_vote = False
119-
assert opt.use_subgroup_vote == False
120-
121-
opt.use_subgroup_ballot = True
122-
assert opt.use_subgroup_ballot == True
123-
opt.use_subgroup_ballot = False
124-
assert opt.use_subgroup_ballot == False
125-
126-
opt.use_subgroup_shuffle = True
127-
assert opt.use_subgroup_shuffle == True
128-
opt.use_subgroup_shuffle = False
129-
assert opt.use_subgroup_shuffle == False
111+
opt.use_subgroup_ops = True
112+
assert opt.use_subgroup_ops == True
113+
opt.use_subgroup_ops = False
114+
assert opt.use_subgroup_ops == False
130115

131116
opt.use_image_storage = True
132117
assert opt.use_image_storage == True

src/allocator.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ VkBufferMemory* VkBlobAllocator::fastMalloc(size_t size)
741741

742742
// on amd integrated gpu, there is a faster and larger device-only heap
743743
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
744-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
744+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
745745
uint32_t buffer_heap_index = memory_properties.memoryTypes[buffer_memory_type_index].heapIndex;
746746
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
747747
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)
@@ -1003,7 +1003,7 @@ VkImageMemory* VkBlobAllocator::fastMalloc(int w, int h, int c, size_t elemsize,
10031003

10041004
// on amd integrated gpu, there is a faster and larger device-only heap
10051005
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1006-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
1006+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
10071007
uint32_t buffer_heap_index = memory_properties.memoryTypes[image_memory_type_index].heapIndex;
10081008
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
10091009
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)
@@ -1322,7 +1322,7 @@ VkBufferMemory* VkWeightAllocator::fastMalloc(size_t size)
13221322

13231323
// on amd integrated gpu, there is a faster and larger device-only heap
13241324
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements2.memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1325-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
1325+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
13261326
uint32_t buffer_heap_index = memory_properties.memoryTypes[buffer_memory_type_index].heapIndex;
13271327
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
13281328
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)
@@ -1381,7 +1381,7 @@ VkBufferMemory* VkWeightAllocator::fastMalloc(size_t size)
13811381

13821382
// on amd integrated gpu, there is a faster and larger device-only heap
13831383
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1384-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
1384+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
13851385
uint32_t buffer_heap_index = memory_properties.memoryTypes[buffer_memory_type_index].heapIndex;
13861386
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
13871387
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)
@@ -1527,7 +1527,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size_t elemsiz
15271527

15281528
// on amd integrated gpu, there is a faster and larger device-only heap
15291529
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements2.memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1530-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
1530+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
15311531
uint32_t buffer_heap_index = memory_properties.memoryTypes[image_memory_type_index].heapIndex;
15321532
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
15331533
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)
@@ -1631,7 +1631,7 @@ VkImageMemory* VkWeightAllocator::fastMalloc(int w, int h, int c, size_t elemsiz
16311631

16321632
// on amd integrated gpu, there is a faster and larger device-only heap
16331633
uint32_t device_local_memory_type_index = vkdev->find_memory_index(memoryRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 0, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1634-
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physical_device_memory_properties();
1634+
const VkPhysicalDeviceMemoryProperties& memory_properties = vkdev->info.physicalDeviceMemoryProperties();
16351635
uint32_t buffer_heap_index = memory_properties.memoryTypes[image_memory_type_index].heapIndex;
16361636
uint32_t device_local_heap_index = memory_properties.memoryTypes[device_local_memory_type_index].heapIndex;
16371637
if (device_local_heap_index < buffer_heap_index && memory_properties.memoryHeaps[device_local_heap_index].size > memory_properties.memoryHeaps[buffer_heap_index].size)

0 commit comments

Comments
 (0)