Skip to content

Commit 1dbd111

Browse files
committed
Address comments from Ellis
1 parent a8f5e1a commit 1dbd111

File tree

11 files changed

+201
-148
lines changed

11 files changed

+201
-148
lines changed

llvm/include/llvm/CodeGenData/CodeGenData.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ class CodeGenData {
107107
/// Global outlined hash tree that has oulined hash sequences across modules.
108108
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
109109

110-
/// This flag is set when -fcgdata-generate is passed.
111-
/// Or, it can be mutated with -ftwo-codegen-rounds during two codegen runs.
110+
/// This flag is set when -fcodegen-data-generate is passed.
111+
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
112112
bool EmitCGData;
113113

114114
/// This is a singleton instance which is thread-safe. Unlike profile data
@@ -174,7 +174,7 @@ namespace IndexedCGData {
174174
const uint64_t Magic = 0x81617461646763ff; // "\xffcgdata\x81"
175175

176176
enum CGDataVersion {
177-
// Version 1 is the first version. This version support the outlined
177+
// Version 1 is the first version. This version supports the outlined
178178
// hash tree.
179179
Version1 = 1,
180180
CurrentVersion = CG_DATA_INDEX_VERSION

llvm/lib/CodeGenData/CodeGenDataReader.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -144,23 +144,23 @@ Error TextCodeGenDataReader::read() {
144144

145145
// Parse the custom header line by line.
146146
while (Line->starts_with(":")) {
147-
StringRef Str = Line->substr(1);
147+
StringRef Str = Line->drop_front().rtrim();
148148
if (Str.equals_insensitive("outlined_hash_tree"))
149149
DataKind |= CGDataKind::FunctionOutlinedHashTree;
150150
else
151151
return error(cgdata_error::bad_header);
152152
++Line;
153153
}
154154

155-
// We treat an empty header (that as a comment # only) as a valid header.
155+
// We treat an empty header (that is a comment # only) as a valid header.
156156
if (Line.is_at_eof()) {
157157
if (DataKind != CGDataKind::Unknown)
158158
return error(cgdata_error::bad_header);
159159
return Error::success();
160160
}
161161

162162
// The YAML docs follow after the header.
163-
const char *Pos = (*Line).data();
163+
const char *Pos = Line->data();
164164
size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
165165
reinterpret_cast<size_t>(Pos);
166166
yaml::Input YOS(StringRef(Pos, Size));

llvm/lib/CodeGenData/CodeGenDataWriter.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
108108

109109
Header.OutlinedHashTreeOffset = 0;
110110

111-
// Only write out up to the CGDataKind. We need to remember the offest of the
112-
// remaing fields to allow back patching later.
111+
// Only write up to the CGDataKind. We need to remember the offset of the
112+
// remaining fields to allow back-patching later.
113113
COS.write(Header.Magic);
114114
COS.write32(Header.Version);
115115
COS.write32(Header.DataKind);

llvm/test/tools/llvm-cgdata/dump.test

+2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ RUN: split-file %s %t
55
RUN: llvm-cgdata dump -binary %t/dump.cgtext -o %t/dump.cgdata
66
RUN: llvm-cgdata dump -text %t/dump.cgdata -o %t/dump-round.cgtext
77
RUN: llvm-cgdata dump -binary %t/dump-round.cgtext -o %t/dump-round.cgdata
8+
RUN: llvm-cgdata dump -text %t/dump-round.cgtext -o %t/dump-round-round.cgtext
89
RUN: diff %t/dump.cgdata %t/dump-round.cgdata
10+
RUN: diff %t/dump-round.cgtext %t/dump-round-round.cgtext
911

1012
;--- dump.cgtext
1113
# Outlined stable hash tree

llvm/test/tools/llvm-cgdata/empty.test

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1+
# Test no input file
2+
RUN: not llvm-cgdata dump -o - 2>&1 | FileCheck %s --check-prefix=NOFILE --ignore-case
3+
NOFILE: error: No such file or directory
4+
15
# Test for empty cgdata file, which is invalid.
26
RUN: touch %t_emptyfile.cgtext
3-
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text -o - 2>&1 | FileCheck %s --check-prefix ERROR
4-
ERROR: {{.}}emptyfile.cgtext: empty codegen data
7+
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text 2>&1 | FileCheck %s --check-prefix=EMPTY
8+
EMPTY: {{.}}emptyfile.cgtext: empty codegen data
59

610
# Test for empty header in the text format. It can be converted to a valid binary file.
711
RUN: printf '#' > %t_emptyheader.cgtext
812
RUN: llvm-cgdata dump %t_emptyheader.cgtext -binary -o %t_emptyheader.cgdata
913

1014
# Without any cgdata other than the header, no data shows by default.
11-
RUN: llvm-cgdata show %t_emptyheader.cgdata | FileCheck %s --allow-empty --check-prefix EMPTY
12-
EMPTY-NOT: any
15+
RUN: llvm-cgdata show %t_emptyheader.cgdata | count 0
1316

1417
# The version number appears when asked, as it's in the header
15-
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix VERSION
16-
VERSION: Version: {{.}}
18+
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
19+
VERSION: Version: 1
1720

1821
# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
19-
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text -o - | FileCheck %s --allow-empty --check-prefix EMPTY
22+
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text | count 0
2023

2124
# Synthesize a header only cgdata.
2225
# struct Header {

llvm/test/tools/llvm-cgdata/error.test

+5-5
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,31 @@
88
# uint64_t OutlinedHashTreeOffset;
99
# }
1010
RUN: touch %t_empty.cgdata
11-
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix EMPTY
11+
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
1212
EMPTY: {{.}}cgdata: empty codegen data
1313

1414
# Not a magic.
1515
RUN: printf '\xff' > %t_malformed.cgdata
16-
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix MALFORMED
16+
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED
1717
MALFORMED: {{.}}cgdata: malformed codegen data
1818

1919
# The minimum header size is 24.
2020
RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
21-
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix CORRUPT
21+
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
2222
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
2323

2424
# The current version 1 while the header says 2.
2525
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
2626
RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
2727
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
2828
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
29-
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix BAD_VERSION
29+
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION
3030
BAD_VERSION: {{.}}cgdata: unsupported codegen data version
3131

3232
# Header says an outlined hash tree, but the file ends after the header.
3333
RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
3434
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
3535
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
3636
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
37-
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix EOF
37+
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF
3838
EOF: {{.}}cgdata: end of File
+50-35
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,34 @@
1+
# REQUIRES: shell
2+
# UNSUPPORTED: system-windows
3+
14
# Merge an archive that has two object files having cgdata (__llvm_outline)
25

36
RUN: split-file %s %t
47

8+
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
9+
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
10+
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
11+
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
512
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
13+
14+
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
15+
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
16+
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
17+
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
618
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
19+
20+
# Make an archive from two object files
721
RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
22+
23+
# Merge the archive into the codegen data file.
824
RUN: llvm-cgdata merge %t/merge-archive.a -o %t/merge-archive.cgdata
925
RUN: llvm-cgdata show %t/merge-archive.cgdata | FileCheck %s
1026
CHECK: Outlined hash tree:
1127
CHECK-NEXT: Total Node Count: 4
1228
CHECK-NEXT: Terminal Node Count: 2
1329
CHECK-NEXT: Depth: 2
1430

15-
RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix TREE
31+
RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE
1632
TREE: # Outlined stable hash tree
1733
TREE-NEXT: :outlined_hash_tree
1834
TREE-NEXT: ---
@@ -34,42 +50,41 @@ TREE-NEXT: Terminals: 4
3450
TREE-NEXT: SuccessorIds: [ ]
3551
TREE-NEXT: ...
3652

53+
;--- raw-1.cgtext
54+
:outlined_hash_tree
55+
0:
56+
Hash: 0x0
57+
Terminals: 0
58+
SuccessorIds: [ 1 ]
59+
1:
60+
Hash: 0x1
61+
Terminals: 0
62+
SuccessorIds: [ 2 ]
63+
2:
64+
Hash: 0x2
65+
Terminals: 4
66+
SuccessorIds: [ ]
67+
...
68+
3769
;--- merge-1.ll
70+
@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
3871

39-
; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
40-
;---
41-
;0:
42-
; Hash: 0x0
43-
; Terminals: 0
44-
; SuccessorIds: [ 1 ]
45-
;1:
46-
; Hash: 0x1
47-
; Terminals: 0
48-
; SuccessorIds: [ 2 ]
49-
;2:
50-
; Hash: 0x2
51-
; Terminals: 4
52-
; SuccessorIds: [ ]
53-
;...
5472

55-
@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
73+
;--- raw-2.cgtext
74+
:outlined_hash_tree
75+
0:
76+
Hash: 0x0
77+
Terminals: 0
78+
SuccessorIds: [ 1 ]
79+
1:
80+
Hash: 0x1
81+
Terminals: 0
82+
SuccessorIds: [ 2 ]
83+
2:
84+
Hash: 0x3
85+
Terminals: 5
86+
SuccessorIds: [ ]
87+
...
5688

5789
;--- merge-2.ll
58-
59-
; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
60-
;---
61-
;0:
62-
; Hash: 0x0
63-
; Terminals: 0
64-
; SuccessorIds: [ 1 ]
65-
;1:
66-
; Hash: 0x1
67-
; Terminals: 0
68-
; SuccessorIds: [ 2 ]
69-
;2:
70-
; Hash: 0x3
71-
; Terminals: 5
72-
; SuccessorIds: [ ]
73-
;...
74-
75-
@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
90+
@.data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"
+48-33
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1+
# REQUIRES: shell
2+
# UNSUPPORTED: system-windows
3+
14
# Merge a binary file (e.g., a linked executable) having concatnated cgdata (__llvm_outline)
25

36
RUN: split-file %s %t
47

8+
# Synthesize two set of raw cgdata without the header (24 byte) from the indexed cgdata.
9+
# Concatenate them in merge-concat.ll
10+
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
11+
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
12+
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll
13+
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
14+
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
15+
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll
16+
517
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
618
RUN: llvm-cgdata merge %t/merge-concat.o -o %t/merge-concat.cgdata
719
RUN: llvm-cgdata show %t/merge-concat.cgdata | FileCheck %s
@@ -10,7 +22,7 @@ CHECK-NEXT: Total Node Count: 4
1022
CHECK-NEXT: Terminal Node Count: 2
1123
CHECK-NEXT: Depth: 2
1224

13-
RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix TREE
25+
RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE
1426
TREE: # Outlined stable hash tree
1527
TREE-NEXT: :outlined_hash_tree
1628
TREE-NEXT: ---
@@ -32,37 +44,40 @@ TREE-NEXT: Terminals: 4
3244
TREE-NEXT: SuccessorIds: [ ]
3345
TREE-NEXT: ...
3446

35-
;--- merge-concat.ll
47+
;--- raw-1.cgtext
48+
:outlined_hash_tree
49+
0:
50+
Hash: 0x0
51+
Terminals: 0
52+
SuccessorIds: [ 1 ]
53+
1:
54+
Hash: 0x1
55+
Terminals: 0
56+
SuccessorIds: [ 2 ]
57+
2:
58+
Hash: 0x2
59+
Terminals: 4
60+
SuccessorIds: [ ]
61+
...
3662

37-
; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other word, the following two trees are encoded back-to-back in a binary format.
38-
;---
39-
;0:
40-
; Hash: 0x0
41-
; Terminals: 0
42-
; SuccessorIds: [ 1 ]
43-
;1:
44-
; Hash: 0x1
45-
; Terminals: 0
46-
; SuccessorIds: [ 2 ]
47-
;2:
48-
; Hash: 0x2
49-
; Terminals: 4
50-
; SuccessorIds: [ ]
51-
;...
52-
;---
53-
;0:
54-
; Hash: 0x0
55-
; Terminals: 0
56-
; SuccessorIds: [ 1 ]
57-
;1:
58-
; Hash: 0x1
59-
; Terminals: 0
60-
; SuccessorIds: [ 2 ]
61-
;2:
62-
; Hash: 0x3
63-
; Terminals: 5
64-
; SuccessorIds: [ ]
65-
;...
63+
;--- raw-2.cgtext
64+
:outlined_hash_tree
65+
0:
66+
Hash: 0x0
67+
Terminals: 0
68+
SuccessorIds: [ 1 ]
69+
1:
70+
Hash: 0x1
71+
Terminals: 0
72+
SuccessorIds: [ 2 ]
73+
2:
74+
Hash: 0x3
75+
Terminals: 5
76+
SuccessorIds: [ ]
77+
...
78+
79+
;--- merge-concat.ll
6680

67-
@.data1 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
68-
@.data2 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
81+
; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format.
82+
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
83+
@.data2 = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

0 commit comments

Comments
 (0)