Skip to content

Commit 725846b

Browse files
committed
Add amx option in CMakeLists; add amx detection and register header file
Signed-off-by: Molly Sophia <[email protected]>
1 parent 25a22e0 commit 725846b

File tree

5 files changed

+72
-0
lines changed

5 files changed

+72
-0
lines changed

CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "
150150
endif()
151151
endif()
152152

153+
if(MACOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
154+
option(NCNN_APPLE_AMX "optimize apple silicon platforms with apple amx" ON)
155+
endif()
156+
153157
if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
154158
OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
155159
OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")

src/cpu.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -2378,6 +2378,21 @@ int cpu_support_arm_svef32mm()
23782378
#endif
23792379
}
23802380

2381+
int cpu_support_arm_amx()
2382+
{
2383+
try_initialize_global_cpu_info();
2384+
#if __aarch64__ && __APPLE__
2385+
return g_hw_cpufamily == CPUFAMILY_ARM_FIRESTORM_ICESTORM
2386+
|| g_hw_cpufamily == CPUFAMILY_ARM_AVALANCHE_BLIZZARD
2387+
|| g_hw_cpufamily == CPUFAMILY_ARM_IBIZA
2388+
|| g_hw_cpufamily == CPUFAMILY_ARM_LOBOS
2389+
|| g_hw_cpufamily == CPUFAMILY_ARM_PALMA;
2390+
2391+
#else
2392+
return 0;
2393+
#endif
2394+
}
2395+
23812396
int cpu_support_x86_avx()
23822397
{
23832398
try_initialize_global_cpu_info();

src/cpu.h

+2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ NCNN_EXPORT int cpu_support_arm_svebf16();
8080
NCNN_EXPORT int cpu_support_arm_svei8mm();
8181
// svef32mm = aarch64 svef32mm
8282
NCNN_EXPORT int cpu_support_arm_svef32mm();
83+
// amx = aarch64 apple amx
84+
NCNN_EXPORT int cpu_support_arm_amx();
8385

8486
// avx = x86 avx
8587
NCNN_EXPORT int cpu_support_x86_avx();

src/layer/arm/amx_usability.h

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Tencent is pleased to support the open source community by making ncnn available.
2+
//
3+
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4+
//
5+
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6+
// in compliance with the License. You may obtain a copy of the License at
7+
//
8+
// https://opensource.org/licenses/BSD-3-Clause
9+
//
10+
// Unless required by applicable law or agreed to in writing, software distributed
11+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
// specific language governing permissions and limitations under the License.
14+
15+
#ifndef AMX_USABILITY_H
16+
#define AMX_USABILITY_H
17+
18+
// From https://github.com/corsix/amx/blob/main/aarch64.h
19+
#define AMX_NOP_OP_IMM5(op, imm5) \
20+
__asm("nop\nnop\nnop\n.word (0x201000 + (%0 << 5) + %1)" : : "i"(op), "i"(imm5) : "memory")
21+
22+
#define AMX_OP_GPR(op, gpr) \
23+
__asm(".word (0x201000 + (%0 << 5) + 0%1 - ((0%1 >> 4) * 6))" : : "i"(op), "r"((uint64_t)(gpr)) : "memory")
24+
25+
#define AMX_LDX(gpr) AMX_OP_GPR( 0, gpr)
26+
#define AMX_LDY(gpr) AMX_OP_GPR( 1, gpr)
27+
#define AMX_STX(gpr) AMX_OP_GPR( 2, gpr)
28+
#define AMX_STY(gpr) AMX_OP_GPR( 3, gpr)
29+
#define AMX_LDZ(gpr) AMX_OP_GPR( 4, gpr)
30+
#define AMX_STZ(gpr) AMX_OP_GPR( 5, gpr)
31+
#define AMX_LDZI(gpr) AMX_OP_GPR( 6, gpr)
32+
#define AMX_STZI(gpr) AMX_OP_GPR( 7, gpr)
33+
#define AMX_EXTRX(gpr) AMX_OP_GPR( 8, gpr)
34+
#define AMX_EXTRY(gpr) AMX_OP_GPR( 9, gpr)
35+
#define AMX_FMA64(gpr) AMX_OP_GPR(10, gpr)
36+
#define AMX_FMS64(gpr) AMX_OP_GPR(11, gpr)
37+
#define AMX_FMA32(gpr) AMX_OP_GPR(12, gpr)
38+
#define AMX_FMS32(gpr) AMX_OP_GPR(13, gpr)
39+
#define AMX_MAC16(gpr) AMX_OP_GPR(14, gpr)
40+
#define AMX_FMA16(gpr) AMX_OP_GPR(15, gpr)
41+
#define AMX_FMS16(gpr) AMX_OP_GPR(16, gpr)
42+
#define AMX_SET() AMX_NOP_OP_IMM5(17, 0)
43+
#define AMX_CLR() AMX_NOP_OP_IMM5(17, 1)
44+
#define AMX_VECINT(gpr) AMX_OP_GPR(18, gpr)
45+
#define AMX_VECFP(gpr) AMX_OP_GPR(19, gpr)
46+
#define AMX_MATINT(gpr) AMX_OP_GPR(20, gpr)
47+
#define AMX_MATFP(gpr) AMX_OP_GPR(21, gpr)
48+
#define AMX_GENLUT(gpr) AMX_OP_GPR(22, gpr)
49+
50+
#endif // AMX_USABILITY_H

src/platform.h.in

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#cmakedefine01 NCNN_AVX512BF16
4646
#cmakedefine01 NCNN_AVX512FP16
4747
#cmakedefine01 NCNN_VFPV4
48+
#cmakedefine01 NCNN_APPLE_AMX
4849
#cmakedefine01 NCNN_ARM82
4950
#cmakedefine01 NCNN_ARM82DOT
5051
#cmakedefine01 NCNN_ARM82FP16FML

0 commit comments

Comments
 (0)