Skip to content

Commit f436bfd

Browse files
committed
Merge bitcoin#22953: refactor: introduce single-separator split helper (boost::split replacement)
a62e844 fuzz: add `SplitString` fuzz target (MarcoFalke) 4fad7e4 test: add unit tests for `SplitString` helper (Kiminuo) 9cc8e87 refactor: introduce single-separator split helper `SplitString` (Sebastian Falbesoner) Pull request description: This PR adds a simple string split helper `SplitString` that takes use of the spanparsing `Split` function that was first introduced in bitcoin#13697 (commit fe8a7dc). This enables to replace most calls to `boost::split`, in the cases where only a single separator character is used. Note that while previous attempts to replace `boost::split` were controversial (e.g. bitcoin#13751), this one has a trivial implementation: it merely uses an internal helper (that is unit tested and in regular use with output descriptiors) and converts its result from spans to strings. As a drawback though, not all `boost::split` instances can be tackled. As a possible optimization, one could return a vector of `std::string_view`s (available since C++17) instead of strings, to avoid copies. This would need more carefulness on the caller sites though, to avoid potential lifetime issues, and it's probably not worth it, considering that none of the places where strings are split are really performance-critical. ACKs for top commit: martinus: Code review ACK a62e844. Ran all tests. I also like that with `boost::split` it was not obvious that the resulting container was cleared, and with `SplitString` API that's obvious. Tree-SHA512: 10cb22619ebe46831b1f8e83584a89381a036b54c88701484ac00743e2a62cfe52c9f3ecdbb2d0815e536c99034558277cc263600ec3f3588b291c07eef8ed24
2 parents a19f641 + a62e844 commit f436bfd

14 files changed

+97
-71
lines changed

src/bitcoin-tx.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
#include <memory>
3232
#include <stdio.h>
3333

34-
#include <boost/algorithm/string.hpp>
35-
3634
static bool fCreateBlank;
3735
static std::map<std::string,UniValue> registers;
3836
static const int CONTINUE_EXECUTION=-1;
@@ -251,8 +249,7 @@ static T TrimAndParse(const std::string& int_str, const std::string& err)
251249

252250
static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInput)
253251
{
254-
std::vector<std::string> vStrInputParts;
255-
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
252+
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
256253

257254
// separate TXID:VOUT in string
258255
if (vStrInputParts.size()<2)
@@ -287,8 +284,7 @@ static void MutateTxAddInput(CMutableTransaction& tx, const std::string& strInpu
287284
static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strInput)
288285
{
289286
// Separate into VALUE:ADDRESS
290-
std::vector<std::string> vStrInputParts;
291-
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
287+
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
292288

293289
if (vStrInputParts.size() != 2)
294290
throw std::runtime_error("TX output missing or too many separators");
@@ -312,8 +308,7 @@ static void MutateTxAddOutAddr(CMutableTransaction& tx, const std::string& strIn
312308
static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& strInput)
313309
{
314310
// Separate into VALUE:PUBKEY[:FLAGS]
315-
std::vector<std::string> vStrInputParts;
316-
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
311+
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
317312

318313
if (vStrInputParts.size() < 2 || vStrInputParts.size() > 3)
319314
throw std::runtime_error("TX output missing or too many separators");
@@ -356,8 +351,7 @@ static void MutateTxAddOutPubKey(CMutableTransaction& tx, const std::string& str
356351
static void MutateTxAddOutMultiSig(CMutableTransaction& tx, const std::string& strInput)
357352
{
358353
// Separate into VALUE:REQUIRED:NUMKEYS:PUBKEY1:PUBKEY2:....[:FLAGS]
359-
std::vector<std::string> vStrInputParts;
360-
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
354+
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
361355

362356
// Check that there are enough parameters
363357
if (vStrInputParts.size()<3)
@@ -460,8 +454,7 @@ static void MutateTxAddOutData(CMutableTransaction& tx, const std::string& strIn
460454
static void MutateTxAddOutScript(CMutableTransaction& tx, const std::string& strInput)
461455
{
462456
// separate VALUE:SCRIPT[:FLAGS]
463-
std::vector<std::string> vStrInputParts;
464-
boost::split(vStrInputParts, strInput, boost::is_any_of(":"));
457+
std::vector<std::string> vStrInputParts = SplitString(strInput, ':');
465458
if (vStrInputParts.size() < 2)
466459
throw std::runtime_error("TX output missing separator");
467460

src/chainparams.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,11 @@
1010
#include <deploymentinfo.h>
1111
#include <hash.h> // for signet block challenge hash
1212
#include <script/interpreter.h>
13+
#include <util/string.h>
1314
#include <util/system.h>
1415

1516
#include <assert.h>
1617

17-
#include <boost/algorithm/string/classification.hpp>
18-
#include <boost/algorithm/string/split.hpp>
19-
2018
static CBlock CreateGenesisBlock(const char* pszTimestamp, const CScript& genesisOutputScript, uint32_t nTime, uint32_t nNonce, uint32_t nBits, int32_t nVersion, const CAmount& genesisReward)
2119
{
2220
CMutableTransaction txNew;
@@ -528,8 +526,7 @@ void CRegTestParams::UpdateActivationParametersFromArgs(const ArgsManager& args)
528526
if (!args.IsArgSet("-vbparams")) return;
529527

530528
for (const std::string& strDeployment : args.GetArgs("-vbparams")) {
531-
std::vector<std::string> vDeploymentParams;
532-
boost::split(vDeploymentParams, strDeployment, boost::is_any_of(":"));
529+
std::vector<std::string> vDeploymentParams = SplitString(strDeployment, ':');
533530
if (vDeploymentParams.size() < 3 || 4 < vDeploymentParams.size()) {
534531
throw std::runtime_error("Version bits parameters malformed, expecting deployment:start:end[:min_activation_height]");
535532
}

src/rest.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@
3232
#include <any>
3333
#include <string>
3434

35-
#include <boost/algorithm/string.hpp>
36-
3735
#include <univalue.h>
3836

3937
using node::GetTransaction;
@@ -191,8 +189,7 @@ static bool rest_headers(const std::any& context,
191189
return false;
192190
std::string param;
193191
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
194-
std::vector<std::string> path;
195-
boost::split(path, param, boost::is_any_of("/"));
192+
std::vector<std::string> path = SplitString(param, '/');
196193

197194
std::string raw_count;
198195
std::string hashStr;
@@ -362,8 +359,7 @@ static bool rest_filter_header(const std::any& context, HTTPRequest* req, const
362359
std::string param;
363360
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
364361

365-
std::vector<std::string> uri_parts;
366-
boost::split(uri_parts, param, boost::is_any_of("/"));
362+
std::vector<std::string> uri_parts = SplitString(param, '/');
367363
std::string raw_count;
368364
std::string raw_blockhash;
369365
if (uri_parts.size() == 3) {
@@ -483,8 +479,7 @@ static bool rest_block_filter(const std::any& context, HTTPRequest* req, const s
483479
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
484480

485481
// request is sent over URI scheme /rest/blockfilter/filtertype/blockhash
486-
std::vector<std::string> uri_parts;
487-
boost::split(uri_parts, param, boost::is_any_of("/"));
482+
std::vector<std::string> uri_parts = SplitString(param, '/');
488483
if (uri_parts.size() != 2) {
489484
return RESTERR(req, HTTP_BAD_REQUEST, "Invalid URI format. Expected /rest/blockfilter/<filtertype>/<blockhash>");
490485
}
@@ -712,7 +707,7 @@ static bool rest_getutxos(const std::any& context, HTTPRequest* req, const std::
712707
if (param.length() > 1)
713708
{
714709
std::string strUriParams = param.substr(1);
715-
boost::split(uriParts, strUriParams, boost::is_any_of("/"));
710+
uriParts = SplitString(strUriParams, '/');
716711
}
717712

718713
// throw exception in case of an empty request

src/rpc/server.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@
99
#include <shutdown.h>
1010
#include <sync.h>
1111
#include <util/strencodings.h>
12+
#include <util/string.h>
1213
#include <util/system.h>
1314

14-
#include <boost/algorithm/string/classification.hpp>
15-
#include <boost/algorithm/string/split.hpp>
1615
#include <boost/signals2/signal.hpp>
1716

1817
#include <cassert>
@@ -407,8 +406,7 @@ static inline JSONRPCRequest transformNamedArguments(const JSONRPCRequest& in, c
407406
// Process expected parameters.
408407
int hole = 0;
409408
for (const std::string &argNamePattern: argNames) {
410-
std::vector<std::string> vargNames;
411-
boost::algorithm::split(vargNames, argNamePattern, boost::algorithm::is_any_of("|"));
409+
std::vector<std::string> vargNames = SplitString(argNamePattern, '|');
412410
auto fr = argsIn.end();
413411
for (const std::string & argName : vargNames) {
414412
fr = argsIn.find(argName);

src/rpc/util.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616

1717
#include <tuple>
1818

19-
#include <boost/algorithm/string/classification.hpp>
20-
#include <boost/algorithm/string/split.hpp>
21-
2219
const std::string UNIX_EPOCH_TIME = "UNIX epoch time";
2320
const std::string EXAMPLE_ADDRESS[2] = {"bc1q09vm5lfy0j5reeulh4x5752q25uqqvz34hufdl", "bc1q02ad21edsxd23d32dfgqqsz4vv4nmtfzuklhy3"};
2421

@@ -514,8 +511,7 @@ RPCHelpMan::RPCHelpMan(std::string name, std::string description, std::vector<RP
514511
{
515512
std::set<std::string> named_args;
516513
for (const auto& arg : m_args) {
517-
std::vector<std::string> names;
518-
boost::split(names, arg.m_names, boost::is_any_of("|"));
514+
std::vector<std::string> names = SplitString(arg.m_names, '|');
519515
// Should have unique named arguments
520516
for (const std::string& name : names) {
521517
CHECK_NONFATAL(named_args.insert(name).second);
@@ -666,8 +662,7 @@ UniValue RPCHelpMan::GetArgMap() const
666662
UniValue arr{UniValue::VARR};
667663
for (int i{0}; i < int(m_args.size()); ++i) {
668664
const auto& arg = m_args.at(i);
669-
std::vector<std::string> arg_names;
670-
boost::split(arg_names, arg.m_names, boost::is_any_of("|"));
665+
std::vector<std::string> arg_names = SplitString(arg.m_names, '|');
671666
for (const auto& arg_name : arg_names) {
672667
UniValue map{UniValue::VARR};
673668
map.push_back(m_name);

src/test/fuzz/script_assets_test_minimizer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
#include <streams.h>
1212
#include <univalue.h>
1313
#include <util/strencodings.h>
14+
#include <util/string.h>
1415

15-
#include <boost/algorithm/string.hpp>
1616
#include <cstdint>
1717
#include <string>
1818
#include <vector>
@@ -130,8 +130,7 @@ unsigned int ParseScriptFlags(const std::string& str)
130130
if (str.empty()) return 0;
131131

132132
unsigned int flags = 0;
133-
std::vector<std::string> words;
134-
boost::algorithm::split(words, str, boost::algorithm::is_any_of(","));
133+
std::vector<std::string> words = SplitString(str, ',');
135134

136135
for (const std::string& word : words) {
137136
auto it = FLAG_NAMES.find(word);

src/test/fuzz/string.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ FUZZ_TARGET(string)
224224
int64_t amount_out;
225225
(void)ParseFixedPoint(random_string_1, fuzzed_data_provider.ConsumeIntegralInRange<int>(0, 1024), &amount_out);
226226
}
227+
(void)SplitString(random_string_1, fuzzed_data_provider.ConsumeIntegral<char>());
227228
{
228229
(void)Untranslated(random_string_1);
229230
const bilingual_str bs1{random_string_1, random_string_2};

src/test/transaction_tests.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
#include <map>
3232
#include <string>
3333

34-
#include <boost/algorithm/string/classification.hpp>
35-
#include <boost/algorithm/string/split.hpp>
3634
#include <boost/test/unit_test.hpp>
3735

3836
#include <univalue.h>
@@ -70,8 +68,7 @@ unsigned int ParseScriptFlags(std::string strFlags)
7068
{
7169
if (strFlags.empty() || strFlags == "NONE") return 0;
7270
unsigned int flags = 0;
73-
std::vector<std::string> words;
74-
boost::algorithm::split(words, strFlags, boost::algorithm::is_any_of(","));
71+
std::vector<std::string> words = SplitString(strFlags, ',');
7572

7673
for (const std::string& word : words)
7774
{

src/test/util_tests.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,6 +2349,55 @@ BOOST_AUTO_TEST_CASE(test_spanparsing)
23492349
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
23502350
}
23512351

2352+
BOOST_AUTO_TEST_CASE(test_SplitString)
2353+
{
2354+
// Empty string.
2355+
{
2356+
std::vector<std::string> result = SplitString("", '-');
2357+
BOOST_CHECK_EQUAL(result.size(), 1);
2358+
BOOST_CHECK_EQUAL(result[0], "");
2359+
}
2360+
2361+
// Empty items.
2362+
{
2363+
std::vector<std::string> result = SplitString("-", '-');
2364+
BOOST_CHECK_EQUAL(result.size(), 2);
2365+
BOOST_CHECK_EQUAL(result[0], "");
2366+
BOOST_CHECK_EQUAL(result[1], "");
2367+
}
2368+
2369+
// More empty items.
2370+
{
2371+
std::vector<std::string> result = SplitString("--", '-');
2372+
BOOST_CHECK_EQUAL(result.size(), 3);
2373+
BOOST_CHECK_EQUAL(result[0], "");
2374+
BOOST_CHECK_EQUAL(result[1], "");
2375+
BOOST_CHECK_EQUAL(result[2], "");
2376+
}
2377+
2378+
// Separator is not present.
2379+
{
2380+
std::vector<std::string> result = SplitString("abc", '-');
2381+
BOOST_CHECK_EQUAL(result.size(), 1);
2382+
BOOST_CHECK_EQUAL(result[0], "abc");
2383+
}
2384+
2385+
// Basic behavior.
2386+
{
2387+
std::vector<std::string> result = SplitString("a-b", '-');
2388+
BOOST_CHECK_EQUAL(result.size(), 2);
2389+
BOOST_CHECK_EQUAL(result[0], "a");
2390+
BOOST_CHECK_EQUAL(result[1], "b");
2391+
}
2392+
2393+
// Case-sensitivity of the separator.
2394+
{
2395+
std::vector<std::string> result = SplitString("AAA", 'a');
2396+
BOOST_CHECK_EQUAL(result.size(), 1);
2397+
BOOST_CHECK_EQUAL(result[0], "AAA");
2398+
}
2399+
}
2400+
23522401
BOOST_AUTO_TEST_CASE(test_LogEscapeMessage)
23532402
{
23542403
// ASCII and UTF-8 must pass through unaltered.

src/torcontrol.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424
#include <set>
2525
#include <vector>
2626

27-
#include <boost/algorithm/string/classification.hpp>
2827
#include <boost/algorithm/string/replace.hpp>
29-
#include <boost/algorithm/string/split.hpp>
3028

3129
#include <event2/buffer.h>
3230
#include <event2/bufferevent.h>
@@ -347,8 +345,8 @@ void TorController::get_socks_cb(TorControlConnection& _conn, const TorControlRe
347345
for (const auto& line : reply.lines) {
348346
if (0 == line.compare(0, 20, "net/listeners/socks=")) {
349347
const std::string port_list_str = line.substr(20);
350-
std::vector<std::string> port_list;
351-
boost::split(port_list, port_list_str, boost::is_any_of(" "));
348+
std::vector<std::string> port_list = SplitString(port_list_str, ' ');
349+
352350
for (auto& portstr : port_list) {
353351
if (portstr.empty()) continue;
354352
if ((portstr[0] == '"' || portstr[0] == '\'') && portstr.size() >= 2 && (*portstr.rbegin() == portstr[0])) {
@@ -542,8 +540,10 @@ void TorController::protocolinfo_cb(TorControlConnection& _conn, const TorContro
542540
if (l.first == "AUTH") {
543541
std::map<std::string,std::string> m = ParseTorReplyMapping(l.second);
544542
std::map<std::string,std::string>::iterator i;
545-
if ((i = m.find("METHODS")) != m.end())
546-
boost::split(methods, i->second, boost::is_any_of(","));
543+
if ((i = m.find("METHODS")) != m.end()) {
544+
std::vector<std::string> m_vec = SplitString(i->second, ',');
545+
methods = std::set<std::string>(m_vec.begin(), m_vec.end());
546+
}
547547
if ((i = m.find("COOKIEFILE")) != m.end())
548548
cookiefile = i->second;
549549
} else if (l.first == "VERSION") {

0 commit comments

Comments
 (0)