Skip to content

Commit a3f5569

Browse files
authored
Fix #2082 (#2170)
1 parent a636a09 commit a3f5569

File tree

3 files changed

+334
-51
lines changed

3 files changed

+334
-51
lines changed

README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,26 @@ res->status; // 200
893893
cli.set_interface("eth0"); // Interface name, IP address or host name
894894
```
895895

896+
### Automatic Path Encoding
897+
898+
The client automatically encodes special characters in URL paths by default:
899+
900+
```cpp
901+
httplib::Client cli("https://example.com");
902+
903+
// Automatic path encoding (default behavior)
904+
cli.set_path_encode(true);
905+
auto res = cli.Get("/path with spaces/file.txt"); // Automatically encodes spaces
906+
907+
// Disable automatic path encoding
908+
cli.set_path_encode(false);
909+
auto res = cli.Get("/already%20encoded/path"); // Use pre-encoded paths
910+
```
911+
912+
- `set_path_encode(bool on)` - Controls automatic encoding of special characters in URL paths
913+
- `true` (default): Automatically encodes spaces, plus signs, newlines, and other special characters
914+
- `false`: Sends paths as-is without encoding (useful for pre-encoded URLs)
915+
896916
Compression
897917
-----------
898918
@@ -969,6 +989,33 @@ cli.set_address_family(AF_UNIX);
969989
"my-socket.sock" can be a relative path or an absolute path. Your application must have the appropriate permissions for the path. You can also use an abstract socket address on Linux. To use an abstract socket address, prepend a null byte ('\x00') to the path.
970990
971991
992+
URI Encoding/Decoding Utilities
993+
-------------------------------
994+
995+
cpp-httplib provides utility functions for URI encoding and decoding:
996+
997+
```cpp
998+
#include <httplib.h>
999+
1000+
std::string url = "https://example.com/search?q=hello world";
1001+
std::string encoded = httplib::encode_uri(url);
1002+
std::string decoded = httplib::decode_uri(encoded);
1003+
1004+
std::string param = "hello world";
1005+
std::string encoded_component = httplib::encode_uri_component(param);
1006+
std::string decoded_component = httplib::decode_uri_component(encoded_component);
1007+
```
1008+
1009+
### Functions
1010+
1011+
- `encode_uri(const std::string &value)` - Encodes a full URI, preserving reserved characters like `://`, `?`, `&`, `=`
1012+
- `decode_uri(const std::string &value)` - Decodes a URI-encoded string
1013+
- `encode_uri_component(const std::string &value)` - Encodes a URI component (query parameter, path segment), encoding all reserved characters
1014+
- `decode_uri_component(const std::string &value)` - Decodes a URI component
1015+
1016+
Use `encode_uri()` for full URLs and `encode_uri_component()` for individual query parameters or path segments.
1017+
1018+
9721019
Split httplib.h into .h and .cc
9731020
-------------------------------
9741021

httplib.h

Lines changed: 121 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,7 @@ class ClientImpl {
14441444
void set_keep_alive(bool on);
14451445
void set_follow_location(bool on);
14461446

1447-
void set_url_encode(bool on);
1447+
void set_path_encode(bool on);
14481448

14491449
void set_compress(bool on);
14501450

@@ -1556,7 +1556,7 @@ class ClientImpl {
15561556
bool keep_alive_ = false;
15571557
bool follow_location_ = false;
15581558

1559-
bool url_encode_ = true;
1559+
bool path_encode_ = true;
15601560

15611561
int address_family_ = AF_UNSPEC;
15621562
bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY;
@@ -1794,6 +1794,7 @@ class Client {
17941794
void set_keep_alive(bool on);
17951795
void set_follow_location(bool on);
17961796

1797+
void set_path_encode(bool on);
17971798
void set_url_encode(bool on);
17981799

17991800
void set_compress(bool on);
@@ -2248,6 +2249,16 @@ std::string hosted_at(const std::string &hostname);
22482249

22492250
void hosted_at(const std::string &hostname, std::vector<std::string> &addrs);
22502251

2252+
std::string encode_uri_component(const std::string &value);
2253+
2254+
std::string encode_uri(const std::string &value);
2255+
2256+
std::string decode_uri_component(const std::string &value);
2257+
2258+
std::string decode_uri(const std::string &value);
2259+
2260+
std::string encode_query_param(const std::string &value);
2261+
22512262
std::string append_query_params(const std::string &path, const Params &params);
22522263

22532264
std::pair<std::string, std::string> make_range_header(const Ranges &ranges);
@@ -2289,9 +2300,7 @@ struct FileStat {
22892300
int ret_ = -1;
22902301
};
22912302

2292-
std::string encode_query_param(const std::string &value);
2293-
2294-
std::string decode_url(const std::string &s, bool convert_plus_to_space);
2303+
std::string decode_path(const std::string &s, bool convert_plus_to_space);
22952304

22962305
std::string trim_copy(const std::string &s);
22972306

@@ -2761,28 +2770,7 @@ inline bool FileStat::is_dir() const {
27612770
return ret_ >= 0 && S_ISDIR(st_.st_mode);
27622771
}
27632772

2764-
inline std::string encode_query_param(const std::string &value) {
2765-
std::ostringstream escaped;
2766-
escaped.fill('0');
2767-
escaped << std::hex;
2768-
2769-
for (auto c : value) {
2770-
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
2771-
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
2772-
c == ')') {
2773-
escaped << c;
2774-
} else {
2775-
escaped << std::uppercase;
2776-
escaped << '%' << std::setw(2)
2777-
<< static_cast<int>(static_cast<unsigned char>(c));
2778-
escaped << std::nouppercase;
2779-
}
2780-
}
2781-
2782-
return escaped.str();
2783-
}
2784-
2785-
inline std::string encode_url(const std::string &s) {
2773+
inline std::string encode_path(const std::string &s) {
27862774
std::string result;
27872775
result.reserve(s.size());
27882776

@@ -2814,8 +2802,8 @@ inline std::string encode_url(const std::string &s) {
28142802
return result;
28152803
}
28162804

2817-
inline std::string decode_url(const std::string &s,
2818-
bool convert_plus_to_space) {
2805+
inline std::string decode_path(const std::string &s,
2806+
bool convert_plus_to_space) {
28192807
std::string result;
28202808

28212809
for (size_t i = 0; i < s.size(); i++) {
@@ -4539,7 +4527,7 @@ inline bool parse_header(const char *beg, const char *end, T fn) {
45394527
case_ignore::equal(key, "Referer")) {
45404528
fn(key, val);
45414529
} else {
4542-
fn(key, decode_url(val, false));
4530+
fn(key, decode_path(val, false));
45434531
}
45444532

45454533
return true;
@@ -5104,7 +5092,7 @@ inline std::string params_to_query_str(const Params &params) {
51045092
if (it != params.begin()) { query += "&"; }
51055093
query += it->first;
51065094
query += "=";
5107-
query += encode_query_param(it->second);
5095+
query += httplib::encode_uri_component(it->second);
51085096
}
51095097
return query;
51105098
}
@@ -5127,7 +5115,7 @@ inline void parse_query_text(const char *data, std::size_t size,
51275115
});
51285116

51295117
if (!key.empty()) {
5130-
params.emplace(decode_url(key, true), decode_url(val, true));
5118+
params.emplace(decode_path(key, true), decode_path(val, true));
51315119
}
51325120
});
51335121
}
@@ -5437,7 +5425,7 @@ class MultipartFormDataParser {
54375425

54385426
std::smatch m2;
54395427
if (std::regex_match(it->second, m2, re_rfc5987_encoding)) {
5440-
file_.filename = decode_url(m2[1], false); // override...
5428+
file_.filename = decode_path(m2[1], false); // override...
54415429
} else {
54425430
is_valid_ = false;
54435431
return false;
@@ -6260,6 +6248,94 @@ inline void hosted_at(const std::string &hostname,
62606248
}
62616249
}
62626250

6251+
inline std::string encode_uri_component(const std::string &value) {
6252+
std::ostringstream escaped;
6253+
escaped.fill('0');
6254+
escaped << std::hex;
6255+
6256+
for (auto c : value) {
6257+
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
6258+
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
6259+
c == ')') {
6260+
escaped << c;
6261+
} else {
6262+
escaped << std::uppercase;
6263+
escaped << '%' << std::setw(2)
6264+
<< static_cast<int>(static_cast<unsigned char>(c));
6265+
escaped << std::nouppercase;
6266+
}
6267+
}
6268+
6269+
return escaped.str();
6270+
}
6271+
6272+
inline std::string encode_uri(const std::string &value) {
6273+
std::ostringstream escaped;
6274+
escaped.fill('0');
6275+
escaped << std::hex;
6276+
6277+
for (auto c : value) {
6278+
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
6279+
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
6280+
c == ')' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' ||
6281+
c == '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '#') {
6282+
escaped << c;
6283+
} else {
6284+
escaped << std::uppercase;
6285+
escaped << '%' << std::setw(2)
6286+
<< static_cast<int>(static_cast<unsigned char>(c));
6287+
escaped << std::nouppercase;
6288+
}
6289+
}
6290+
6291+
return escaped.str();
6292+
}
6293+
6294+
inline std::string decode_uri_component(const std::string &value) {
6295+
std::string result;
6296+
6297+
for (size_t i = 0; i < value.size(); i++) {
6298+
if (value[i] == '%' && i + 2 < value.size()) {
6299+
auto val = 0;
6300+
if (detail::from_hex_to_i(value, i + 1, 2, val)) {
6301+
result += static_cast<char>(val);
6302+
i += 2;
6303+
} else {
6304+
result += value[i];
6305+
}
6306+
} else {
6307+
result += value[i];
6308+
}
6309+
}
6310+
6311+
return result;
6312+
}
6313+
6314+
inline std::string decode_uri(const std::string &value) {
6315+
std::string result;
6316+
6317+
for (size_t i = 0; i < value.size(); i++) {
6318+
if (value[i] == '%' && i + 2 < value.size()) {
6319+
auto val = 0;
6320+
if (detail::from_hex_to_i(value, i + 1, 2, val)) {
6321+
result += static_cast<char>(val);
6322+
i += 2;
6323+
} else {
6324+
result += value[i];
6325+
}
6326+
} else {
6327+
result += value[i];
6328+
}
6329+
}
6330+
6331+
return result;
6332+
}
6333+
6334+
[[deprecated("Use encode_uri_component instead")]]
6335+
inline std::string encode_query_param(const std::string &value) {
6336+
return encode_uri_component(value);
6337+
}
6338+
62636339
inline std::string append_query_params(const std::string &path,
62646340
const Params &params) {
62656341
std::string path_with_query = path;
@@ -7070,7 +7146,7 @@ inline bool Server::parse_request_line(const char *s, Request &req) const {
70707146
detail::divide(req.target, '?',
70717147
[&](const char *lhs_data, std::size_t lhs_size,
70727148
const char *rhs_data, std::size_t rhs_size) {
7073-
req.path = detail::decode_url(
7149+
req.path = detail::decode_path(
70747150
std::string(lhs_data, lhs_size), false);
70757151
detail::parse_query_text(rhs_data, rhs_size, req.params);
70767152
});
@@ -7967,7 +8043,7 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) {
79678043
#endif
79688044
keep_alive_ = rhs.keep_alive_;
79698045
follow_location_ = rhs.follow_location_;
7970-
url_encode_ = rhs.url_encode_;
8046+
path_encode_ = rhs.path_encode_;
79718047
address_family_ = rhs.address_family_;
79728048
tcp_nodelay_ = rhs.tcp_nodelay_;
79738049
ipv6_v6only_ = rhs.ipv6_v6only_;
@@ -8332,7 +8408,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
83328408
if (next_host.empty()) { next_host = host_; }
83338409
if (next_path.empty()) { next_path = "/"; }
83348410

8335-
auto path = detail::decode_url(next_path, true) + next_query;
8411+
auto path = detail::decode_path(next_path, true) + next_query;
83368412

83378413
// Same host redirect - use current client
83388414
if (next_scheme == scheme && next_host == host_ && next_port == port_) {
@@ -8427,7 +8503,7 @@ inline void ClientImpl::setup_redirect_client(ClientType &client) {
84278503
client.set_keep_alive(keep_alive_);
84288504
client.set_follow_location(
84298505
true); // Enable redirects to handle multi-step redirects
8430-
client.set_url_encode(url_encode_);
8506+
client.set_path_encode(path_encode_);
84318507
client.set_compress(compress_);
84328508
client.set_decompress(decompress_);
84338509

@@ -8621,7 +8697,7 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req,
86218697
: append_query_params(req.path, req.params);
86228698

86238699
const auto &path =
8624-
url_encode_ ? detail::encode_url(path_with_query) : path_with_query;
8700+
path_encode_ ? detail::encode_path(path_with_query) : path_with_query;
86258701

86268702
detail::write_request_line(bstrm, req.method, path);
86278703

@@ -9667,7 +9743,7 @@ inline void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; }
96679743

96689744
inline void ClientImpl::set_follow_location(bool on) { follow_location_ = on; }
96699745

9670-
inline void ClientImpl::set_url_encode(bool on) { url_encode_ = on; }
9746+
inline void ClientImpl::set_path_encode(bool on) { path_encode_ = on; }
96719747

96729748
inline void
96739749
ClientImpl::set_hostname_addr_map(std::map<std::string, std::string> addr_map) {
@@ -11143,7 +11219,12 @@ inline void Client::set_follow_location(bool on) {
1114311219
cli_->set_follow_location(on);
1114411220
}
1114511221

11146-
inline void Client::set_url_encode(bool on) { cli_->set_url_encode(on); }
11222+
inline void Client::set_path_encode(bool on) { cli_->set_path_encode(on); }
11223+
11224+
[[deprecated("Use set_path_encode instead")]]
11225+
inline void Client::set_url_encode(bool on) {
11226+
cli_->set_path_encode(on);
11227+
}
1114711228

1114811229
inline void Client::set_compress(bool on) { cli_->set_compress(on); }
1114911230

0 commit comments

Comments
 (0)