Skip to content

Commit e643061

Browse files
Incorporate clang thread-safety static analysis into core (#4727)
Introduce clang thread safety analysis https://clang.llvm.org/docs/ThreadSafetyAnalysis.html, and perform it when building core with thread sanitizer. - For now, I've only annotated `FlowControl` and `BucketSnapshotManager` classes, but we should make it a standard for all classes doing concurrency. - This is useful to identify data races and deadlocks at compile time. This change was able to identify two unprotected shared data bugs we had in the past: #4702 and #4458 - It also identified undefined behavior, where [shared_mutex](https://en.cppreference.com/w/cpp/thread/shared_mutex/lock_shared) is locked twice by the same thread. This PR includes a fix as well.
2 parents c5b46a8 + 80cc1a9 commit e643061

13 files changed

+446
-118
lines changed

configure.ac

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ AS_IF([test "x$enable_threadsanitizer" = "xyes"], [
127127
sanitizeopts="thread"
128128
])
129129

130+
AC_ARG_ENABLE([threadsafety],
131+
AS_HELP_STRING([--enable-threadsafety],
132+
[build with thread safety static analysis instrumentation (Clang only)]))
133+
AS_IF([test "x$enable_threadsafety" = "xyes"], [
134+
AC_MSG_NOTICE([enabling thread safety static analysis, see https://clang.llvm.org/docs/ThreadSafetyAnalysis.html])
135+
CXXFLAGS="$CXXFLAGS -Wthread-safety -Werror=thread-safety"
136+
])
137+
130138
AC_ARG_ENABLE([memcheck],
131139
AS_HELP_STRING([--enable-memcheck],
132140
[build with memcheck (memory-sanitizer) instrumentation]))

src/bucket/BucketSnapshotManager.cpp

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,24 @@ BucketSnapshotManager::BucketSnapshotManager(
5555
SearchableSnapshotConstPtr
5656
BucketSnapshotManager::copySearchableLiveBucketListSnapshot() const
5757
{
58-
std::shared_lock<std::shared_mutex> lock(mSnapshotMutex);
58+
SharedLockShared guard(mSnapshotMutex);
59+
// Can't use std::make_shared due to private constructor
60+
return copySearchableLiveBucketListSnapshot(guard);
61+
}
62+
63+
SearchableHotArchiveSnapshotConstPtr
64+
BucketSnapshotManager::copySearchableHotArchiveBucketListSnapshot() const
65+
{
66+
SharedLockShared guard(mSnapshotMutex);
67+
releaseAssert(mCurrHotArchiveSnapshot);
68+
// Can't use std::make_shared due to private constructor
69+
return copySearchableHotArchiveBucketListSnapshot(guard);
70+
}
71+
72+
SearchableSnapshotConstPtr
73+
BucketSnapshotManager::copySearchableLiveBucketListSnapshot(
74+
SharedLockShared const& guard) const
75+
{
5976
// Can't use std::make_shared due to private constructor
6077
return std::shared_ptr<SearchableLiveBucketListSnapshot>(
6178
new SearchableLiveBucketListSnapshot(
@@ -66,9 +83,9 @@ BucketSnapshotManager::copySearchableLiveBucketListSnapshot() const
6683
}
6784

6885
SearchableHotArchiveSnapshotConstPtr
69-
BucketSnapshotManager::copySearchableHotArchiveBucketListSnapshot() const
86+
BucketSnapshotManager::copySearchableHotArchiveBucketListSnapshot(
87+
SharedLockShared const& guard) const
7088
{
71-
std::shared_lock<std::shared_mutex> lock(mSnapshotMutex);
7289
releaseAssert(mCurrHotArchiveSnapshot);
7390
// Can't use std::make_shared due to private constructor
7491
return std::shared_ptr<SearchableHotArchiveBucketListSnapshot>(
@@ -86,12 +103,11 @@ BucketSnapshotManager::maybeCopySearchableBucketListSnapshot(
86103
// The canonical snapshot held by the BucketSnapshotManager is not being
87104
// modified. Rather, a thread is checking it's copy against the canonical
88105
// snapshot, so use a shared lock.
89-
std::shared_lock<std::shared_mutex> lock(mSnapshotMutex);
90-
106+
SharedLockShared guard(mSnapshotMutex);
91107
if (!snapshot ||
92108
snapshot->getLedgerSeq() < mCurrLiveSnapshot->getLedgerSeq())
93109
{
94-
snapshot = copySearchableLiveBucketListSnapshot();
110+
snapshot = copySearchableLiveBucketListSnapshot(guard);
95111
}
96112
}
97113

@@ -102,12 +118,12 @@ BucketSnapshotManager::maybeCopySearchableHotArchiveBucketListSnapshot(
102118
// The canonical snapshot held by the BucketSnapshotManager is not being
103119
// modified. Rather, a thread is checking it's copy against the canonical
104120
// snapshot, so use a shared lock.
105-
std::shared_lock<std::shared_mutex> lock(mSnapshotMutex);
121+
SharedLockShared guard(mSnapshotMutex);
106122

107123
if (!snapshot ||
108124
snapshot->getLedgerSeq() < mCurrHotArchiveSnapshot->getLedgerSeq())
109125
{
110-
snapshot = copySearchableHotArchiveBucketListSnapshot();
126+
snapshot = copySearchableHotArchiveBucketListSnapshot(guard);
111127
}
112128
}
113129

@@ -142,7 +158,7 @@ BucketSnapshotManager::updateCurrentSnapshot(
142158

143159
// Updating the BucketSnapshotManager canonical snapshot, must lock
144160
// exclusively for write access.
145-
std::unique_lock<std::shared_mutex> lock(mSnapshotMutex);
161+
SharedLockExclusive guard(mSnapshotMutex);
146162
updateSnapshot(mCurrLiveSnapshot, mLiveHistoricalSnapshots, liveSnapshot);
147163
updateSnapshot(mCurrHotArchiveSnapshot, mHotArchiveHistoricalSnapshots,
148164
hotArchiveSnapshot);

src/bucket/BucketSnapshotManager.h

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "bucket/LiveBucket.h"
1010
#include "main/AppConnector.h"
1111
#include "util/NonCopyable.h"
12+
#include "util/ThreadAnnotations.h"
1213

1314
#include <map>
1415
#include <memory>
@@ -45,28 +46,31 @@ class BucketSnapshotManager : NonMovableOrCopyable
4546
private:
4647
AppConnector mAppConnector;
4748

49+
// Lock must be held when accessing any member variables holding snapshots
50+
mutable SharedMutex mSnapshotMutex;
51+
4852
// Snapshot that is maintained and periodically updated by BucketManager on
4953
// the main thread. When background threads need to generate or refresh a
5054
// snapshot, they will copy this snapshot.
51-
SnapshotPtrT<LiveBucket> mCurrLiveSnapshot{};
52-
SnapshotPtrT<HotArchiveBucket> mCurrHotArchiveSnapshot{};
55+
SnapshotPtrT<LiveBucket> mCurrLiveSnapshot GUARDED_BY(mSnapshotMutex){};
56+
SnapshotPtrT<HotArchiveBucket>
57+
mCurrHotArchiveSnapshot GUARDED_BY(mSnapshotMutex){};
5358

5459
// ledgerSeq that the snapshot is based on -> snapshot
55-
std::map<uint32_t, SnapshotPtrT<LiveBucket>> mLiveHistoricalSnapshots;
60+
std::map<uint32_t, SnapshotPtrT<LiveBucket>>
61+
mLiveHistoricalSnapshots GUARDED_BY(mSnapshotMutex);
5662
std::map<uint32_t, SnapshotPtrT<HotArchiveBucket>>
57-
mHotArchiveHistoricalSnapshots;
63+
mHotArchiveHistoricalSnapshots GUARDED_BY(mSnapshotMutex);
5864

5965
uint32_t const mNumHistoricalSnapshots;
6066

61-
// Lock must be held when accessing any member variables holding snapshots
62-
mutable std::shared_mutex mSnapshotMutex;
63-
6467
public:
6568
// Called by main thread to update snapshots whenever the BucketList
6669
// is updated
6770
void
6871
updateCurrentSnapshot(SnapshotPtrT<LiveBucket>&& liveSnapshot,
69-
SnapshotPtrT<HotArchiveBucket>&& hotArchiveSnapshot);
72+
SnapshotPtrT<HotArchiveBucket>&& hotArchiveSnapshot)
73+
LOCKS_EXCLUDED(mSnapshotMutex);
7074

7175
// numHistoricalLedgers is the number of historical snapshots that the
7276
// snapshot manager will maintain. If numHistoricalLedgers is 5, snapshots
@@ -76,18 +80,34 @@ class BucketSnapshotManager : NonMovableOrCopyable
7680
uint32_t numHistoricalLedgers);
7781

7882
// Copy the most recent snapshot for the live bucket list
79-
SearchableSnapshotConstPtr copySearchableLiveBucketListSnapshot() const;
83+
SearchableSnapshotConstPtr copySearchableLiveBucketListSnapshot() const
84+
LOCKS_EXCLUDED(mSnapshotMutex);
8085

8186
// Copy the most recent snapshot for the hot archive bucket list
8287
SearchableHotArchiveSnapshotConstPtr
83-
copySearchableHotArchiveBucketListSnapshot() const;
88+
copySearchableHotArchiveBucketListSnapshot() const
89+
LOCKS_EXCLUDED(mSnapshotMutex);
90+
91+
// Copy the most recent snapshot for the live bucket list, while holding the
92+
// lock
93+
SearchableSnapshotConstPtr
94+
copySearchableLiveBucketListSnapshot(SharedLockShared const& guard) const
95+
REQUIRES_SHARED(mSnapshotMutex);
96+
97+
// Copy the most recent snapshot for the hot archive bucket list, while
98+
// holding the lock
99+
SearchableHotArchiveSnapshotConstPtr
100+
copySearchableHotArchiveBucketListSnapshot(
101+
SharedLockShared const& guard) const REQUIRES_SHARED(mSnapshotMutex);
84102

85103
// `maybeCopy` interface refreshes `snapshot` if a newer snapshot is
86104
// available. It's a no-op otherwise. This is useful to avoid unnecessary
87105
// copying.
88106
void
89-
maybeCopySearchableBucketListSnapshot(SearchableSnapshotConstPtr& snapshot);
107+
maybeCopySearchableBucketListSnapshot(SearchableSnapshotConstPtr& snapshot)
108+
LOCKS_EXCLUDED(mSnapshotMutex);
90109
void maybeCopySearchableHotArchiveBucketListSnapshot(
91-
SearchableHotArchiveSnapshotConstPtr& snapshot);
110+
SearchableHotArchiveSnapshotConstPtr& snapshot)
111+
LOCKS_EXCLUDED(mSnapshotMutex);
92112
};
93113
}

src/bucket/SearchableBucketList.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ class SearchableLiveBucketListSnapshot
3636
StateArchivalSettings const& sas, uint32_t ledgerVers) const;
3737

3838
friend SearchableSnapshotConstPtr
39-
BucketSnapshotManager::copySearchableLiveBucketListSnapshot() const;
39+
BucketSnapshotManager::copySearchableLiveBucketListSnapshot(
40+
SharedLockShared const& guard) const;
4041
};
4142

4243
class SearchableHotArchiveBucketListSnapshot
@@ -54,6 +55,7 @@ class SearchableHotArchiveBucketListSnapshot
5455
loadKeys(std::set<LedgerKey, LedgerEntryIdCmp> const& inKeys) const;
5556

5657
friend SearchableHotArchiveSnapshotConstPtr
57-
BucketSnapshotManager::copySearchableHotArchiveBucketListSnapshot() const;
58+
BucketSnapshotManager::copySearchableHotArchiveBucketListSnapshot(
59+
SharedLockShared const& guard) const;
5860
};
5961
}

src/overlay/FlowControl.cpp

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ namespace stellar
1616
{
1717

1818
size_t
19-
FlowControl::getOutboundQueueByteLimit(
20-
std::lock_guard<std::mutex>& lockGuard) const
19+
FlowControl::getOutboundQueueByteLimit(MutexLocker& lockGuard) const
2120
{
2221
#ifdef BUILD_TESTS
2322
if (mOutboundQueueLimit)
@@ -44,7 +43,7 @@ FlowControl::FlowControl(AppConnector& connector, bool useBackgroundThread)
4443

4544
bool
4645
FlowControl::hasOutboundCapacity(StellarMessage const& msg,
47-
std::lock_guard<std::mutex>& lockGuard) const
46+
MutexLocker& lockGuard) const
4847
{
4948
releaseAssert(!threadIsMain() || !mUseBackgroundThread);
5049
return mFlowControlCapacity.hasOutboundCapacity(msg) &&
@@ -55,15 +54,15 @@ bool
5554
FlowControl::noOutboundCapacityTimeout(VirtualClock::time_point now,
5655
std::chrono::seconds timeout) const
5756
{
58-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
57+
MutexLocker guard(mFlowControlMutex);
5958
return mNoOutboundCapacity && now - *mNoOutboundCapacity >= timeout;
6059
}
6160

6261
void
6362
FlowControl::setPeerID(NodeID const& peerID)
6463
{
6564
releaseAssert(threadIsMain());
66-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
65+
MutexLocker guard(mFlowControlMutex);
6766
mNodeID = peerID;
6867
}
6968

@@ -72,7 +71,7 @@ FlowControl::maybeReleaseCapacity(StellarMessage const& msg)
7271
{
7372
ZoneScoped;
7473
releaseAssert(threadIsMain());
75-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
74+
MutexLocker guard(mFlowControlMutex);
7675

7776
if (msg.type() == SEND_MORE_EXTENDED)
7877
{
@@ -103,7 +102,7 @@ FlowControl::processSentMessages(
103102
ZoneScoped;
104103
releaseAssert(!threadIsMain() || !mUseBackgroundThread);
105104

106-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
105+
MutexLocker guard(mFlowControlMutex);
107106
for (int i = 0; i < sentMessages.size(); i++)
108107
{
109108
auto const& sentMsgs = sentMessages[i];
@@ -162,7 +161,7 @@ FlowControl::getNextBatchToSend()
162161
ZoneScoped;
163162
releaseAssert(!threadIsMain() || !mUseBackgroundThread);
164163

165-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
164+
MutexLocker guard(mFlowControlMutex);
166165
std::vector<QueuedOutboundMessage> batchToSend;
167166

168167
int sent = 0;
@@ -217,7 +216,7 @@ FlowControl::updateMsgMetrics(std::shared_ptr<StellarMessage const> msg,
217216
{
218217
// The lock isn't strictly needed here, but is added for consistency and
219218
// future-proofing this function
220-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
219+
MutexLocker guard(mFlowControlMutex);
221220
auto diff = mAppConnector.now() - timePlaced;
222221

223222
auto updateQueueDelay = [&](auto& queue, auto& metrics) {
@@ -258,7 +257,7 @@ FlowControl::handleTxSizeIncrease(uint32_t increase)
258257
{
259258
ZoneScoped;
260259
releaseAssert(threadIsMain());
261-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
260+
MutexLocker guard(mFlowControlMutex);
262261
releaseAssert(increase > 0);
263262
// Bump flood capacity to accommodate the upgrade
264263
mFlowControlBytesCapacity.handleTxSizeIncrease(increase);
@@ -269,7 +268,7 @@ FlowControl::beginMessageProcessing(StellarMessage const& msg)
269268
{
270269
ZoneScoped;
271270
releaseAssert(!threadIsMain() || !mUseBackgroundThread);
272-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
271+
MutexLocker guard(mFlowControlMutex);
273272

274273
return mFlowControlCapacity.lockLocalCapacity(msg) &&
275274
mFlowControlBytesCapacity.lockLocalCapacity(msg);
@@ -279,7 +278,7 @@ SendMoreCapacity
279278
FlowControl::endMessageProcessing(StellarMessage const& msg)
280279
{
281280
ZoneScoped;
282-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
281+
MutexLocker guard(mFlowControlMutex);
283282

284283
mFloodDataProcessed += mFlowControlCapacity.releaseLocalCapacity(msg);
285284
mFloodDataProcessedBytes +=
@@ -318,7 +317,7 @@ FlowControl::endMessageProcessing(StellarMessage const& msg)
318317
}
319318

320319
bool
321-
FlowControl::canRead(std::lock_guard<std::mutex> const& guard) const
320+
FlowControl::canRead(MutexLocker const& guard) const
322321
{
323322
return mFlowControlBytesCapacity.canRead() &&
324323
mFlowControlCapacity.canRead();
@@ -327,7 +326,7 @@ FlowControl::canRead(std::lock_guard<std::mutex> const& guard) const
327326
bool
328327
FlowControl::canRead() const
329328
{
330-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
329+
MutexLocker guard(mFlowControlMutex);
331330
return canRead(guard);
332331
}
333332

@@ -365,7 +364,7 @@ FlowControl::isSendMoreValid(StellarMessage const& msg,
365364
std::string& errorMsg) const
366365
{
367366
releaseAssert(threadIsMain());
368-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
367+
MutexLocker guard(mFlowControlMutex);
369368

370369
if (msg.type() != SEND_MORE_EXTENDED)
371370
{
@@ -404,7 +403,7 @@ FlowControl::addMsgAndMaybeTrimQueue(std::shared_ptr<StellarMessage const> msg)
404403
{
405404
ZoneScoped;
406405
releaseAssert(threadIsMain());
407-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
406+
MutexLocker guard(mFlowControlMutex);
408407
releaseAssert(msg);
409408
auto type = msg->type();
410409
size_t msgQInd = 0;
@@ -461,16 +460,12 @@ FlowControl::addMsgAndMaybeTrimQueue(std::shared_ptr<StellarMessage const> msg)
461460
auto& om = mOverlayMetrics;
462461
if (type == TRANSACTION)
463462
{
464-
auto isOverLimit = [&](auto const& queue) {
465-
bool overLimit =
466-
queue.size() > limit ||
467-
mTxQueueByteCount > getOutboundQueueByteLimit(guard);
468-
return overLimit;
469-
};
463+
bool isOverLimit = queue.size() > limit ||
464+
mTxQueueByteCount > getOutboundQueueByteLimit(guard);
470465

471466
// If we are at limit, we're probably really behind, so drop the entire
472467
// queue
473-
if (isOverLimit(queue))
468+
if (isOverLimit)
474469
{
475470
dropped = queue.size();
476471
mTxQueueByteCount = 0;
@@ -559,7 +554,7 @@ Json::Value
559554
FlowControl::getFlowControlJsonInfo(bool compact) const
560555
{
561556
releaseAssert(threadIsMain());
562-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
557+
MutexLocker guard(mFlowControlMutex);
563558

564559
Json::Value res;
565560
if (mFlowControlCapacity.getCapacity().mTotalCapacity)
@@ -601,7 +596,7 @@ FlowControl::getFlowControlJsonInfo(bool compact) const
601596
bool
602597
FlowControl::maybeThrottleRead()
603598
{
604-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
599+
MutexLocker guard(mFlowControlMutex);
605600
if (!canRead(guard))
606601
{
607602
CLOG_DEBUG(Overlay, "Throttle reading from peer {}",
@@ -615,7 +610,7 @@ FlowControl::maybeThrottleRead()
615610
void
616611
FlowControl::stopThrottling()
617612
{
618-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
613+
MutexLocker guard(mFlowControlMutex);
619614
releaseAssert(mLastThrottle);
620615
CLOG_DEBUG(Overlay, "Stop throttling reading from peer {}",
621616
mAppConnector.getConfig().toShortString(mNodeID));
@@ -627,7 +622,7 @@ FlowControl::stopThrottling()
627622
bool
628623
FlowControl::isThrottled() const
629624
{
630-
std::lock_guard<std::mutex> guard(mFlowControlMutex);
625+
MutexLocker guard(mFlowControlMutex);
631626
return static_cast<bool>(mLastThrottle);
632627
}
633628

0 commit comments

Comments
 (0)