Skip to content

Commit 4369ca6

Browse files
authored
Fix endianness issues in macho module (#2041)
This is backward compatible change that affects the `magic` field. After this change the value in the `magic` field looks exactly as it looks in the file regardless of the endianness of the current platform, if the file starts with `CA FE BA BE` the value in magic is `0xCAFEBABE`, not `0xBEBAFECA` as it used to be in little-endian architectures.
1 parent f3b3027 commit 4369ca6

File tree

3 files changed

+69
-51
lines changed

3 files changed

+69
-51
lines changed

.github/workflows/build.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -145,5 +145,6 @@ jobs:
145145
./bootstrap.sh &&
146146
./configure --disable-proc-scan --enable-macho &&
147147
make &&
148-
make check
148+
make check &&
149+
cat test-suite.log
149150
"

libyara/modules/macho/macho.c

+65-48
Original file line numberDiff line numberDiff line change
@@ -35,49 +35,43 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535
#define MODULE_NAME macho
3636

3737
// Check for Mach-O binary magic constant.
38-
3938
int is_macho_file_block(const uint32_t* magic)
4039
{
4140
return *magic == MH_MAGIC || *magic == MH_CIGAM || *magic == MH_MAGIC_64 ||
4241
*magic == MH_CIGAM_64;
4342
}
4443

4544
// Check if file is for 32-bit architecture.
46-
47-
int macho_is_32(const uint8_t* magic)
48-
{
49-
// Magic must be [CE]FAEDFE or FEEDFA[CE].
50-
return magic[0] == 0xce || magic[3] == 0xce;
51-
}
52-
53-
// Check if file is for big-endian architecture.
54-
55-
int macho_is_big(const uint8_t* magic)
45+
int macho_is_32(uint32_t magic)
5646
{
57-
// Magic must be [FE]EDFACE or [FE]EDFACF.
58-
return magic[0] == 0xfe;
47+
return magic == MH_MAGIC || magic == MH_CIGAM;
5948
}
6049

6150
// Check for Mach-O fat binary magic constant.
62-
6351
int is_fat_macho_file_block(const uint32_t* magic)
6452
{
6553
return *magic == FAT_MAGIC || *magic == FAT_CIGAM || *magic == FAT_MAGIC_64 ||
6654
*magic == FAT_CIGAM_64;
6755
}
6856

6957
// Check if file is 32-bit fat file.
70-
71-
int macho_fat_is_32(const uint8_t* magic)
58+
int macho_fat_is_32(const uint32_t* magic)
7259
{
73-
// Magic must be CAFEBA[BE].
74-
return magic[3] == 0xbe;
60+
return yr_be32toh(*magic) == FAT_MAGIC;
7561
}
7662

7763
static int should_swap_bytes(const uint32_t magic)
7864
{
65+
// In big-endian platforms byte swapping is needed for little-endian files
66+
// but in little-endian platforms the files that need swapping are the
67+
// the big-endian ones.
68+
#if defined(WORDS_BIGENDIAN)
7969
return magic == MH_CIGAM || magic == MH_CIGAM_64 || magic == FAT_CIGAM ||
8070
magic == FAT_CIGAM_64;
71+
#else
72+
return magic == MH_MAGIC || magic == MH_MAGIC_64 || magic == FAT_MAGIC ||
73+
magic == FAT_MAGIC_64;
74+
#endif
8175
}
8276

8377
static void swap_mach_header(yr_mach_header_64_t* mh)
@@ -90,7 +84,7 @@ static void swap_mach_header(yr_mach_header_64_t* mh)
9084
mh->sizeofcmds = yr_bswap32(mh->sizeofcmds);
9185
mh->flags = yr_bswap32(mh->flags);
9286

93-
if (!macho_is_32((const uint8_t*) &mh->magic))
87+
if (!macho_is_32(mh->magic))
9488
mh->reserved = yr_bswap32(mh->reserved);
9589
}
9690

@@ -222,8 +216,8 @@ void macho_handle_unixthread(
222216
return;
223217

224218
// command_size is the size indicated in yr_thread_command_t structure, but
225-
// limited to the data's size because we can't rely on the structure having a
226-
// valid size.
219+
// limited to the data's size because we can't rely on the structure having
220+
// a valid size.
227221
uint32_t command_size = yr_min(size, ((yr_thread_command_t*) data)->cmdsize);
228222

229223
// command_size should be at least the size of yr_thread_command_t.
@@ -441,13 +435,16 @@ void macho_handle_segment(
441435

442436
yr_set_integer(sec.size, object, "segments[%i].sections[%i].size", i, j);
443437

444-
yr_set_integer(sec.offset, object, "segments[%i].sections[%i].offset", i, j);
438+
yr_set_integer(
439+
sec.offset, object, "segments[%i].sections[%i].offset", i, j);
445440

446441
yr_set_integer(sec.align, object, "segments[%i].sections[%i].align", i, j);
447442

448-
yr_set_integer(sec.reloff, object, "segments[%i].sections[%i].reloff", i, j);
443+
yr_set_integer(
444+
sec.reloff, object, "segments[%i].sections[%i].reloff", i, j);
449445

450-
yr_set_integer(sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j);
446+
yr_set_integer(
447+
sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j);
451448

452449
yr_set_integer(sec.flags, object, "segments[%i].sections[%i].flags", i, j);
453450

@@ -528,13 +525,16 @@ void macho_handle_segment_64(
528525

529526
yr_set_integer(sec.size, object, "segments[%i].sections[%i].size", i, j);
530527

531-
yr_set_integer(sec.offset, object, "segments[%i].sections[%i].offset", i, j);
528+
yr_set_integer(
529+
sec.offset, object, "segments[%i].sections[%i].offset", i, j);
532530

533531
yr_set_integer(sec.align, object, "segments[%i].sections[%i].align", i, j);
534532

535-
yr_set_integer(sec.reloff, object, "segments[%i].sections[%i].reloff", i, j);
533+
yr_set_integer(
534+
sec.reloff, object, "segments[%i].sections[%i].reloff", i, j);
536535

537-
yr_set_integer(sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j);
536+
yr_set_integer(
537+
sec.nreloc, object, "segments[%i].sections[%i].nreloc", i, j);
538538

539539
yr_set_integer(sec.flags, object, "segments[%i].sections[%i].flags", i, j);
540540

@@ -563,15 +563,20 @@ void macho_parse_file(
563563
if (size < sizeof(yr_mach_header_64_t))
564564
return;
565565

566-
size_t header_size = macho_is_32(data) ? sizeof(yr_mach_header_32_t)
567-
: sizeof(yr_mach_header_64_t);
568-
569-
// yr_mach_header_64_t is used for storing the header for both for 32-bits and
570-
// 64-bits files. yr_mach_header_64_t is exactly like yr_mach_header_32_t
571-
// but with an extra "reserved" field at the end.
566+
// yr_mach_header_64_t is used for storing the header for both for 32-bits
567+
// and 64-bits files. yr_mach_header_64_t is exactly like
568+
// yr_mach_header_32_t but with an extra "reserved" field at the end.
572569
yr_mach_header_64_t header;
573570

574-
memcpy(&header, data, header_size);
571+
memcpy(&header, data, sizeof(yr_mach_header_64_t));
572+
573+
// The magic number is always handled as big-endian. If the magic bytes are
574+
// CA FE BA BE, then header.magic is 0xCAFEBABE.
575+
header.magic = yr_be32toh(header.magic);
576+
577+
size_t header_size = (header.magic == MH_MAGIC || header.magic == MH_CIGAM)
578+
? sizeof(yr_mach_header_32_t)
579+
: sizeof(yr_mach_header_64_t);
575580

576581
int should_swap = should_swap_bytes(header.magic);
577582

@@ -587,7 +592,7 @@ void macho_parse_file(
587592
yr_set_integer(header.flags, object, "flags");
588593

589594
// The "reserved" field exists only in 64 bits files.
590-
if (!macho_is_32(data))
595+
if (!macho_is_32(header.magic))
591596
yr_set_integer(header.reserved, object, "reserved");
592597

593598
// The first command parsing pass handles only segments.
@@ -652,7 +657,8 @@ void macho_parse_file(
652657
switch (command_struct.cmd)
653658
{
654659
case LC_UNIXTHREAD:
655-
macho_handle_unixthread(command, size - parsed_size, base_address, object, context);
660+
macho_handle_unixthread(
661+
command, size - parsed_size, base_address, object, context);
656662
break;
657663
case LC_MAIN:
658664
macho_handle_main(command, size - parsed_size, object, context);
@@ -672,10 +678,11 @@ void macho_load_fat_arch_header(
672678
uint32_t num,
673679
yr_fat_arch_64_t* arch)
674680
{
675-
if (macho_fat_is_32(data))
681+
if (macho_fat_is_32((uint32_t*) data))
676682
{
677683
yr_fat_arch_32_t* arch32 =
678-
(yr_fat_arch_32_t*) (data + sizeof(yr_fat_header_t) + (num * sizeof(yr_fat_arch_32_t)));
684+
(yr_fat_arch_32_t*) (data + sizeof(yr_fat_header_t) +
685+
(num * sizeof(yr_fat_arch_32_t)));
679686

680687
arch->cputype = yr_be32toh(arch32->cputype);
681688
arch->cpusubtype = yr_be32toh(arch32->cpusubtype);
@@ -687,7 +694,8 @@ void macho_load_fat_arch_header(
687694
else
688695
{
689696
yr_fat_arch_64_t* arch64 =
690-
(yr_fat_arch_64_t*) (data + sizeof(yr_fat_header_t) + (num * sizeof(yr_fat_arch_64_t)));
697+
(yr_fat_arch_64_t*) (data + sizeof(yr_fat_header_t) +
698+
(num * sizeof(yr_fat_arch_64_t)));
691699

692700
arch->cputype = yr_be32toh(arch64->cputype);
693701
arch->cpusubtype = yr_be32toh(arch64->cpusubtype);
@@ -707,7 +715,7 @@ void macho_parse_fat_file(
707715
{
708716
size_t fat_arch_sz = sizeof(yr_fat_arch_64_t);
709717

710-
if (macho_fat_is_32(data))
718+
if (macho_fat_is_32((uint32_t*) data))
711719
fat_arch_sz = sizeof(yr_fat_arch_32_t);
712720

713721
if (size < sizeof(yr_fat_header_t))
@@ -810,10 +818,12 @@ void macho_set_definitions(YR_OBJECT* object)
810818
yr_set_integer(CPU_SUBTYPE_PENTII_M3, object, "CPU_SUBTYPE_PENTII_M3");
811819
yr_set_integer(CPU_SUBTYPE_PENTII_M5, object, "CPU_SUBTYPE_PENTII_M5");
812820
yr_set_integer(CPU_SUBTYPE_CELERON, object, "CPU_SUBTYPE_CELERON");
813-
yr_set_integer(CPU_SUBTYPE_CELERON_MOBILE, object, "CPU_SUBTYPE_CELERON_MOBILE");
821+
yr_set_integer(
822+
CPU_SUBTYPE_CELERON_MOBILE, object, "CPU_SUBTYPE_CELERON_MOBILE");
814823
yr_set_integer(CPU_SUBTYPE_PENTIUM_3, object, "CPU_SUBTYPE_PENTIUM_3");
815824
yr_set_integer(CPU_SUBTYPE_PENTIUM_3_M, object, "CPU_SUBTYPE_PENTIUM_3_M");
816-
yr_set_integer(CPU_SUBTYPE_PENTIUM_3_XEON, object, "CPU_SUBTYPE_PENTIUM_3_XEON");
825+
yr_set_integer(
826+
CPU_SUBTYPE_PENTIUM_3_XEON, object, "CPU_SUBTYPE_PENTIUM_3_XEON");
817827
yr_set_integer(CPU_SUBTYPE_PENTIUM_M, object, "CPU_SUBTYPE_PENTIUM_M");
818828
yr_set_integer(CPU_SUBTYPE_PENTIUM_4, object, "CPU_SUBTYPE_PENTIUM_4");
819829
yr_set_integer(CPU_SUBTYPE_PENTIUM_4_M, object, "CPU_SUBTYPE_PENTIUM_4_M");
@@ -843,7 +853,8 @@ void macho_set_definitions(YR_OBJECT* object)
843853
yr_set_integer(CPU_SUBTYPE_POWERPC_602, object, "CPU_SUBTYPE_POWERPC_602");
844854
yr_set_integer(CPU_SUBTYPE_POWERPC_603, object, "CPU_SUBTYPE_POWERPC_603");
845855
yr_set_integer(CPU_SUBTYPE_POWERPC_603e, object, "CPU_SUBTYPE_POWERPC_603e");
846-
yr_set_integer(CPU_SUBTYPE_POWERPC_603ev, object, "CPU_SUBTYPE_POWERPC_603ev");
856+
yr_set_integer(
857+
CPU_SUBTYPE_POWERPC_603ev, object, "CPU_SUBTYPE_POWERPC_603ev");
847858
yr_set_integer(CPU_SUBTYPE_POWERPC_604, object, "CPU_SUBTYPE_POWERPC_604");
848859
yr_set_integer(CPU_SUBTYPE_POWERPC_604e, object, "CPU_SUBTYPE_POWERPC_604e");
849860
yr_set_integer(CPU_SUBTYPE_POWERPC_620, object, "CPU_SUBTYPE_POWERPC_620");
@@ -881,7 +892,8 @@ void macho_set_definitions(YR_OBJECT* object)
881892
yr_set_integer(MH_NOFIXPREBINDING, object, "MH_NOFIXPREBINDING");
882893
yr_set_integer(MH_PREBINDABLE, object, "MH_PREBINDABLE");
883894
yr_set_integer(MH_ALLMODSBOUND, object, "MH_ALLMODSBOUND");
884-
yr_set_integer(MH_SUBSECTIONS_VIA_SYMBOLS, object, "MH_SUBSECTIONS_VIA_SYMBOLS");
895+
yr_set_integer(
896+
MH_SUBSECTIONS_VIA_SYMBOLS, object, "MH_SUBSECTIONS_VIA_SYMBOLS");
885897
yr_set_integer(MH_CANONICAL, object, "MH_CANONICAL");
886898
yr_set_integer(MH_WEAK_DEFINES, object, "MH_WEAK_DEFINES");
887899
yr_set_integer(MH_BINDS_TO_WEAK, object, "MH_BINDS_TO_WEAK");
@@ -914,7 +926,8 @@ void macho_set_definitions(YR_OBJECT* object)
914926
yr_set_integer(S_CSTRING_LITERALS, object, "S_CSTRING_LITERALS");
915927
yr_set_integer(S_4BYTE_LITERALS, object, "S_4BYTE_LITERALS");
916928
yr_set_integer(S_8BYTE_LITERALS, object, "S_8BYTE_LITERALS");
917-
yr_set_integer(S_NON_LAZY_SYMBOL_POINTERS, object, "S_NON_LAZY_SYMBOL_POINTERS");
929+
yr_set_integer(
930+
S_NON_LAZY_SYMBOL_POINTERS, object, "S_NON_LAZY_SYMBOL_POINTERS");
918931
yr_set_integer(S_LAZY_SYMBOL_POINTERS, object, "S_LAZY_SYMBOL_POINTERS");
919932
yr_set_integer(S_LITERAL_POINTERS, object, "S_LITERAL_POINTERS");
920933
yr_set_integer(S_SYMBOL_STUBS, object, "S_SYMBOL_STUBS");
@@ -946,7 +959,8 @@ void macho_set_definitions(YR_OBJECT* object)
946959
yr_set_integer(S_ATTR_STRIP_STATIC_SYMS, object, "S_ATTR_STRIP_STATIC_SYMS");
947960
yr_set_integer(S_ATTR_NO_DEAD_STRIP, object, "S_ATTR_NO_DEAD_STRIP");
948961
yr_set_integer(S_ATTR_LIVE_SUPPORT, object, "S_ATTR_LIVE_SUPPORT");
949-
yr_set_integer(S_ATTR_SELF_MODIFYING_CODE, object, "S_ATTR_SELF_MODIFYING_CODE");
962+
yr_set_integer(
963+
S_ATTR_SELF_MODIFYING_CODE, object, "S_ATTR_SELF_MODIFYING_CODE");
950964
yr_set_integer(S_ATTR_DEBUG, object, "S_ATTR_DEBUG");
951965
yr_set_integer(S_ATTR_SOME_INSTRUCTIONS, object, "S_ATTR_SOME_INSTRUCTIONS");
952966
yr_set_integer(S_ATTR_EXT_RELOC, object, "S_ATTR_EXT_RELOC");
@@ -1048,9 +1062,12 @@ define_function(ep_for_arch_subtype)
10481062
uint64_t entry_point = yr_get_integer(module, "file[%i].entry_point", i);
10491063
uint64_t file_offset = yr_get_integer(module, "fat_arch[%i].offset", i);
10501064

1051-
if (entry_point == YR_UNDEFINED) {
1065+
if (entry_point == YR_UNDEFINED)
1066+
{
10521067
return_integer(YR_UNDEFINED);
1053-
} else {
1068+
}
1069+
else
1070+
{
10541071
return_integer(file_offset + entry_point);
10551072
}
10561073
}

tests/test-macho.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,8 @@ int main(int argc, char** argv)
236236

237237
assert_true_rule_file(
238238
"import \"macho\" rule test { condition: \
239-
macho.file[0].magic == 0xfeedface and \
240-
macho.file[1].magic == 0xfeedfacf }",
239+
macho.file[0].magic == 0xcefaedfe and \
240+
macho.file[1].magic == 0xcffaedfe }",
241241
"tests/data/tiny-universal");
242242

243243
// Entry points for files (LC_MAIN)

0 commit comments

Comments
 (0)