-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[CGData] llvm-cgdata #89884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
[CGData] llvm-cgdata #89884
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
94c2659
[CGData] llvm-cgdata
kyulee-com ea8714f
Address comments from Ellis
kyulee-com d8f8b23
test fix on macosx
59952c7
Address feedbacks from Alex
kyulee-com 09fd9d4
address comments from Vincent
kyulee-com 3949783
Address comments from Ellis #2
kyulee-com File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
//===- CodeGenData.h --------------------------------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file contains support for codegen data that has stable summary which | ||
// can be used to optimize the code in the subsequent codegen. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CODEGENDATA_CODEGENDATA_H | ||
#define LLVM_CODEGENDATA_CODEGENDATA_H | ||
|
||
#include "llvm/ADT/BitmaskEnum.h" | ||
#include "llvm/Bitcode/BitcodeReader.h" | ||
#include "llvm/CodeGenData/OutlinedHashTree.h" | ||
#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/Object/ObjectFile.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include "llvm/TargetParser/Triple.h" | ||
#include <mutex> | ||
|
||
namespace llvm { | ||
|
||
enum CGDataSectKind { | ||
#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, | ||
#include "llvm/CodeGenData/CodeGenData.inc" | ||
}; | ||
|
||
std::string getCodeGenDataSectionName(CGDataSectKind CGSK, | ||
Triple::ObjectFormatType OF, | ||
bool AddSegmentInfo = true); | ||
|
||
enum class CGDataKind { | ||
Unknown = 0x0, | ||
// A function outlining info. | ||
FunctionOutlinedHashTree = 0x1, | ||
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) | ||
}; | ||
|
||
const std::error_category &cgdata_category(); | ||
|
||
enum class cgdata_error { | ||
success = 0, | ||
eof, | ||
bad_magic, | ||
bad_header, | ||
empty_cgdata, | ||
malformed, | ||
unsupported_version, | ||
}; | ||
|
||
inline std::error_code make_error_code(cgdata_error E) { | ||
return std::error_code(static_cast<int>(E), cgdata_category()); | ||
} | ||
|
||
class CGDataError : public ErrorInfo<CGDataError> { | ||
public: | ||
CGDataError(cgdata_error Err, const Twine &ErrStr = Twine()) | ||
: Err(Err), Msg(ErrStr.str()) { | ||
assert(Err != cgdata_error::success && "Not an error"); | ||
} | ||
|
||
std::string message() const override; | ||
|
||
void log(raw_ostream &OS) const override { OS << message(); } | ||
|
||
std::error_code convertToErrorCode() const override { | ||
return make_error_code(Err); | ||
} | ||
|
||
cgdata_error get() const { return Err; } | ||
const std::string &getMessage() const { return Msg; } | ||
|
||
/// Consume an Error and return the raw enum value contained within it, and | ||
/// the optional error message. The Error must either be a success value, or | ||
/// contain a single CGDataError. | ||
static std::pair<cgdata_error, std::string> take(Error E) { | ||
auto Err = cgdata_error::success; | ||
std::string Msg; | ||
handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) { | ||
assert(Err == cgdata_error::success && "Multiple errors encountered"); | ||
Err = IPE.get(); | ||
Msg = IPE.getMessage(); | ||
}); | ||
return {Err, Msg}; | ||
} | ||
|
||
static char ID; | ||
|
||
private: | ||
cgdata_error Err; | ||
std::string Msg; | ||
}; | ||
|
||
enum CGDataMode { | ||
None, | ||
Read, | ||
Write, | ||
}; | ||
|
||
class CodeGenData { | ||
/// Global outlined hash tree that has oulined hash sequences across modules. | ||
std::unique_ptr<OutlinedHashTree> PublishedHashTree; | ||
|
||
/// This flag is set when -fcodegen-data-generate is passed. | ||
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds. | ||
bool EmitCGData; | ||
|
||
/// This is a singleton instance which is thread-safe. Unlike profile data | ||
/// which is largely function-based, codegen data describes the whole module. | ||
/// Therefore, this can be initialized once, and can be used across modules | ||
/// instead of constructing the same one for each codegen backend. | ||
static std::unique_ptr<CodeGenData> Instance; | ||
static std::once_flag OnceFlag; | ||
|
||
CodeGenData() = default; | ||
|
||
public: | ||
~CodeGenData() = default; | ||
|
||
static CodeGenData &getInstance(); | ||
|
||
/// Returns true if we have a valid outlined hash tree. | ||
bool hasOutlinedHashTree() { | ||
return PublishedHashTree && !PublishedHashTree->empty(); | ||
} | ||
|
||
/// Returns the outlined hash tree. This can be globally used in a read-only | ||
/// manner. | ||
const OutlinedHashTree *getOutlinedHashTree() { | ||
return PublishedHashTree.get(); | ||
} | ||
|
||
/// Returns true if we should write codegen data. | ||
bool emitCGData() { return EmitCGData; } | ||
|
||
/// Publish the (globally) merged or read outlined hash tree. | ||
void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { | ||
PublishedHashTree = std::move(HashTree); | ||
// Ensure we disable emitCGData as we do not want to read and write both. | ||
EmitCGData = false; | ||
} | ||
}; | ||
|
||
namespace cgdata { | ||
|
||
inline bool hasOutlinedHashTree() { | ||
return CodeGenData::getInstance().hasOutlinedHashTree(); | ||
} | ||
|
||
inline const OutlinedHashTree *getOutlinedHashTree() { | ||
return CodeGenData::getInstance().getOutlinedHashTree(); | ||
} | ||
|
||
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); } | ||
|
||
inline void | ||
publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { | ||
CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); | ||
} | ||
|
||
void warn(Error E, StringRef Whence = ""); | ||
void warn(Twine Message, std::string Whence = "", std::string Hint = ""); | ||
|
||
} // end namespace cgdata | ||
|
||
namespace IndexedCGData { | ||
|
||
// A signature for data validation, representing "\xffcgdata\x81" in | ||
// little-endian order | ||
const uint64_t Magic = 0x81617461646763ff; | ||
|
||
enum CGDataVersion { | ||
// Version 1 is the first version. This version supports the outlined | ||
// hash tree. | ||
Version1 = 1, | ||
CurrentVersion = CG_DATA_INDEX_VERSION | ||
}; | ||
const uint64_t Version = CGDataVersion::CurrentVersion; | ||
|
||
struct Header { | ||
uint64_t Magic; | ||
uint32_t Version; | ||
uint32_t DataKind; | ||
uint64_t OutlinedHashTreeOffset; | ||
|
||
// New fields should only be added at the end to ensure that the size | ||
// computation is correct. The methods below need to be updated to ensure that | ||
// the new field is read correctly. | ||
|
||
// Reads a header struct from the buffer. | ||
static Expected<Header> readFromBuffer(const unsigned char *Curr); | ||
}; | ||
|
||
} // end namespace IndexedCGData | ||
|
||
} // end namespace llvm | ||
|
||
#endif // LLVM_CODEGEN_PREPARE_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/*===-- CodeGenData.inc ----------------------------------------*- C++ -*-=== *\ | ||
|* | ||
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
|* See https://llvm.org/LICENSE.txt for license information. | ||
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|* | ||
\*===----------------------------------------------------------------------===*/ | ||
/* | ||
* This is the main file that defines all the data structure, signature, | ||
* constant literals that are shared across compiler, host tools (reader/writer) | ||
* to support codegen data. | ||
* | ||
\*===----------------------------------------------------------------------===*/ | ||
|
||
/* Helper macros. */ | ||
#define CG_DATA_SIMPLE_QUOTE(x) #x | ||
#define CG_DATA_QUOTE(x) CG_DATA_SIMPLE_QUOTE(x) | ||
|
||
#ifdef CG_DATA_SECT_ENTRY | ||
#define CG_DATA_DEFINED | ||
CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), | ||
CG_DATA_OUTLINE_COFF, "__DATA,") | ||
|
||
#undef CG_DATA_SECT_ENTRY | ||
#endif | ||
|
||
/* section name strings common to all targets other | ||
than WIN32 */ | ||
#define CG_DATA_OUTLINE_COMMON __llvm_outline | ||
/* Since cg data sections are not allocated, we don't need to | ||
* access them at runtime. | ||
*/ | ||
#define CG_DATA_OUTLINE_COFF ".loutline" | ||
|
||
#ifdef _WIN32 | ||
/* Runtime section names and name strings. */ | ||
#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF | ||
|
||
#else | ||
/* Runtime section names and name strings. */ | ||
#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) | ||
|
||
#endif | ||
|
||
/* Indexed codegen data format version (start from 1). */ | ||
#define CG_DATA_INDEX_VERSION 1 |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it make sense to also have a SubVersion - which is compatible within the same Version but data might be logically different (i.e. minor fixes, etc) ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't anticipate many changes of this format in the future, and aim for simplicity. The code is largely modeled after the IRPGO profile -- https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/ProfileData/InstrProf.h#L1071