Skip to content

Commit d3fb41d

Browse files
kyulee-comKyungwoo Lee
and
Kyungwoo Lee
authored
[CGData] llvm-cgdata (#89884)
The llvm-cgdata tool has been introduced to handle reading and writing of codegen data. This data includes an optimistic codegen summary that can be utilized to enhance subsequent codegen. Currently, the tool supports saving and restoring the outlined hash tree, facilitating machine function outlining across modules. Additional codegen summaries can be incorporated into separate sections as required. This patch primarily establishes basic support for the reader and writer, similar to llvm-profdata. The high-level operations of llvm-cgdata are as follows: 1. It reads local raw codegen data from a custom section (for example, __llvm_outline) embedded in native binary files 2. It merges local raw codegen data into an indexed codegen data, complete with a suitable header. 3. It handles reading and writing of the indexed codegen data into a standalone file. This depends on #89792. This is a patch for https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753. --------- Co-authored-by: Kyungwoo Lee <[email protected]>
1 parent c473e75 commit d3fb41d

20 files changed

+1736
-0
lines changed
+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
//===- CodeGenData.h --------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains support for codegen data that has stable summary which
10+
// can be used to optimize the code in the subsequent codegen.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLVM_CODEGENDATA_CODEGENDATA_H
15+
#define LLVM_CODEGENDATA_CODEGENDATA_H
16+
17+
#include "llvm/ADT/BitmaskEnum.h"
18+
#include "llvm/Bitcode/BitcodeReader.h"
19+
#include "llvm/CodeGenData/OutlinedHashTree.h"
20+
#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
21+
#include "llvm/IR/Module.h"
22+
#include "llvm/Object/ObjectFile.h"
23+
#include "llvm/Support/ErrorHandling.h"
24+
#include "llvm/TargetParser/Triple.h"
25+
#include <mutex>
26+
27+
namespace llvm {
28+
29+
enum CGDataSectKind {
30+
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
31+
#include "llvm/CodeGenData/CodeGenData.inc"
32+
};
33+
34+
std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
35+
Triple::ObjectFormatType OF,
36+
bool AddSegmentInfo = true);
37+
38+
enum class CGDataKind {
39+
Unknown = 0x0,
40+
// A function outlining info.
41+
FunctionOutlinedHashTree = 0x1,
42+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree)
43+
};
44+
45+
const std::error_category &cgdata_category();
46+
47+
enum class cgdata_error {
48+
success = 0,
49+
eof,
50+
bad_magic,
51+
bad_header,
52+
empty_cgdata,
53+
malformed,
54+
unsupported_version,
55+
};
56+
57+
inline std::error_code make_error_code(cgdata_error E) {
58+
return std::error_code(static_cast<int>(E), cgdata_category());
59+
}
60+
61+
class CGDataError : public ErrorInfo<CGDataError> {
62+
public:
63+
CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
64+
: Err(Err), Msg(ErrStr.str()) {
65+
assert(Err != cgdata_error::success && "Not an error");
66+
}
67+
68+
std::string message() const override;
69+
70+
void log(raw_ostream &OS) const override { OS << message(); }
71+
72+
std::error_code convertToErrorCode() const override {
73+
return make_error_code(Err);
74+
}
75+
76+
cgdata_error get() const { return Err; }
77+
const std::string &getMessage() const { return Msg; }
78+
79+
/// Consume an Error and return the raw enum value contained within it, and
80+
/// the optional error message. The Error must either be a success value, or
81+
/// contain a single CGDataError.
82+
static std::pair<cgdata_error, std::string> take(Error E) {
83+
auto Err = cgdata_error::success;
84+
std::string Msg;
85+
handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
86+
assert(Err == cgdata_error::success && "Multiple errors encountered");
87+
Err = IPE.get();
88+
Msg = IPE.getMessage();
89+
});
90+
return {Err, Msg};
91+
}
92+
93+
static char ID;
94+
95+
private:
96+
cgdata_error Err;
97+
std::string Msg;
98+
};
99+
100+
enum CGDataMode {
101+
None,
102+
Read,
103+
Write,
104+
};
105+
106+
class CodeGenData {
107+
/// Global outlined hash tree that has oulined hash sequences across modules.
108+
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
109+
110+
/// This flag is set when -fcodegen-data-generate is passed.
111+
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
112+
bool EmitCGData;
113+
114+
/// This is a singleton instance which is thread-safe. Unlike profile data
115+
/// which is largely function-based, codegen data describes the whole module.
116+
/// Therefore, this can be initialized once, and can be used across modules
117+
/// instead of constructing the same one for each codegen backend.
118+
static std::unique_ptr<CodeGenData> Instance;
119+
static std::once_flag OnceFlag;
120+
121+
CodeGenData() = default;
122+
123+
public:
124+
~CodeGenData() = default;
125+
126+
static CodeGenData &getInstance();
127+
128+
/// Returns true if we have a valid outlined hash tree.
129+
bool hasOutlinedHashTree() {
130+
return PublishedHashTree && !PublishedHashTree->empty();
131+
}
132+
133+
/// Returns the outlined hash tree. This can be globally used in a read-only
134+
/// manner.
135+
const OutlinedHashTree *getOutlinedHashTree() {
136+
return PublishedHashTree.get();
137+
}
138+
139+
/// Returns true if we should write codegen data.
140+
bool emitCGData() { return EmitCGData; }
141+
142+
/// Publish the (globally) merged or read outlined hash tree.
143+
void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
144+
PublishedHashTree = std::move(HashTree);
145+
// Ensure we disable emitCGData as we do not want to read and write both.
146+
EmitCGData = false;
147+
}
148+
};
149+
150+
namespace cgdata {
151+
152+
inline bool hasOutlinedHashTree() {
153+
return CodeGenData::getInstance().hasOutlinedHashTree();
154+
}
155+
156+
inline const OutlinedHashTree *getOutlinedHashTree() {
157+
return CodeGenData::getInstance().getOutlinedHashTree();
158+
}
159+
160+
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
161+
162+
inline void
163+
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
164+
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
165+
}
166+
167+
void warn(Error E, StringRef Whence = "");
168+
void warn(Twine Message, std::string Whence = "", std::string Hint = "");
169+
170+
} // end namespace cgdata
171+
172+
namespace IndexedCGData {
173+
174+
// A signature for data validation, representing "\xffcgdata\x81" in
175+
// little-endian order
176+
const uint64_t Magic = 0x81617461646763ff;
177+
178+
enum CGDataVersion {
179+
// Version 1 is the first version. This version supports the outlined
180+
// hash tree.
181+
Version1 = 1,
182+
CurrentVersion = CG_DATA_INDEX_VERSION
183+
};
184+
const uint64_t Version = CGDataVersion::CurrentVersion;
185+
186+
struct Header {
187+
uint64_t Magic;
188+
uint32_t Version;
189+
uint32_t DataKind;
190+
uint64_t OutlinedHashTreeOffset;
191+
192+
// New fields should only be added at the end to ensure that the size
193+
// computation is correct. The methods below need to be updated to ensure that
194+
// the new field is read correctly.
195+
196+
// Reads a header struct from the buffer.
197+
static Expected<Header> readFromBuffer(const unsigned char *Curr);
198+
};
199+
200+
} // end namespace IndexedCGData
201+
202+
} // end namespace llvm
203+
204+
#endif // LLVM_CODEGEN_PREPARE_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\
2+
|*
3+
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
|* See https://llvm.org/LICENSE.txt for license information.
5+
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
|*
7+
\*===----------------------------------------------------------------------===*/
8+
/*
9+
* This is the main file that defines all the data structure, signature,
10+
* constant literals that are shared across compiler, host tools (reader/writer)
11+
* to support codegen data.
12+
*
13+
\*===----------------------------------------------------------------------===*/
14+
15+
/* Helper macros. */
16+
#define CG_DATA_SIMPLE_QUOTE(x) #x
17+
#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x)
18+
19+
#ifdef CG_DATA_SECT_ENTRY
20+
#define CG_DATA_DEFINED
21+
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON),
22+
CG_DATA_OUTLINE_COFF, "__DATA,")
23+
24+
#undef CG_DATA_SECT_ENTRY
25+
#endif
26+
27+
/* section name strings common to all targets other
28+
than WIN32 */
29+
#define CG_DATA_OUTLINE_COMMON __llvm_outline
30+
/* Since cg data sections are not allocated, we don't need to
31+
* access them at runtime.
32+
*/
33+
#define CG_DATA_OUTLINE_COFF ".loutline"
34+
35+
#ifdef _WIN32
36+
/* Runtime section names and name strings. */
37+
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF
38+
39+
#else
40+
/* Runtime section names and name strings. */
41+
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON)
42+
43+
#endif
44+
45+
/* Indexed codegen data format version (start from 1). */
46+
#define CG_DATA_INDEX_VERSION 1

0 commit comments

Comments
 (0)