Skip to content

Commit 65d54ba

Browse files
committed
Fix #2082
1 parent cb85e57 commit 65d54ba

File tree

3 files changed

+322
-51
lines changed

3 files changed

+322
-51
lines changed

README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,26 @@ res->status; // 200
881881
cli.set_interface("eth0"); // Interface name, IP address or host name
882882
```
883883

884+
### Automatic Path Encoding
885+
886+
The client automatically encodes special characters in URL paths by default:
887+
888+
```cpp
889+
httplib::Client cli("https://example.com");
890+
891+
// Automatic path encoding (default behavior)
892+
cli.set_path_encode(true);
893+
auto res = cli.Get("/path with spaces/file.txt"); // Automatically encodes spaces
894+
895+
// Disable automatic path encoding
896+
cli.set_path_encode(false);
897+
auto res = cli.Get("/already%20encoded/path"); // Use pre-encoded paths
898+
```
899+
900+
- `set_path_encode(bool on)` - Controls automatic encoding of special characters in URL paths
901+
- `true` (default): Automatically encodes spaces, plus signs, newlines, and other special characters
902+
- `false`: Sends paths as-is without encoding (useful for pre-encoded URLs)
903+
884904
Compression
885905
-----------
886906
@@ -957,6 +977,33 @@ cli.set_address_family(AF_UNIX);
957977
"my-socket.sock" can be a relative path or an absolute path. Your application must have the appropriate permissions for the path. You can also use an abstract socket address on Linux. To use an abstract socket address, prepend a null byte ('\x00') to the path.
958978
959979
980+
URI Encoding/Decoding Utilities
981+
-------------------------------
982+
983+
cpp-httplib provides utility functions for URI encoding and decoding:
984+
985+
```cpp
986+
#include <httplib.h>
987+
988+
std::string url = "https://example.com/search?q=hello world";
989+
std::string encoded = httplib::encode_uri(url);
990+
std::string decoded = httplib::decode_uri(encoded);
991+
992+
std::string param = "hello world";
993+
std::string encoded_component = httplib::encode_uri_component(param);
994+
std::string decoded_component = httplib::decode_uri_component(encoded_component);
995+
```
996+
997+
### Functions
998+
999+
- `encode_uri(const std::string &value)` - Encodes a full URI, preserving reserved characters like `://`, `?`, `&`, `=`
1000+
- `decode_uri(const std::string &value)` - Decodes a URI-encoded string
1001+
- `encode_uri_component(const std::string &value)` - Encodes a URI component (query parameter, path segment), encoding all reserved characters
1002+
- `decode_uri_component(const std::string &value)` - Decodes a URI component
1003+
1004+
Use `encode_uri()` for full URLs and `encode_uri_component()` for individual query parameters or path segments.
1005+
1006+
9601007
Split httplib.h into .h and .cc
9611008
-------------------------------
9621009

httplib.h

Lines changed: 119 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,7 @@ class ClientImpl {
14421442
void set_keep_alive(bool on);
14431443
void set_follow_location(bool on);
14441444

1445-
void set_url_encode(bool on);
1445+
void set_path_encode(bool on);
14461446

14471447
void set_compress(bool on);
14481448

@@ -1554,7 +1554,7 @@ class ClientImpl {
15541554
bool keep_alive_ = false;
15551555
bool follow_location_ = false;
15561556

1557-
bool url_encode_ = true;
1557+
bool path_encode_ = true;
15581558

15591559
int address_family_ = AF_UNSPEC;
15601560
bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY;
@@ -1792,6 +1792,7 @@ class Client {
17921792
void set_keep_alive(bool on);
17931793
void set_follow_location(bool on);
17941794

1795+
void set_path_encode(bool on);
17951796
void set_url_encode(bool on);
17961797

17971798
void set_compress(bool on);
@@ -2246,6 +2247,16 @@ std::string hosted_at(const std::string &hostname);
22462247

22472248
void hosted_at(const std::string &hostname, std::vector<std::string> &addrs);
22482249

2250+
std::string encode_uri_component(const std::string &value);
2251+
2252+
std::string encode_uri(const std::string &value);
2253+
2254+
std::string decode_uri_component(const std::string &value);
2255+
2256+
std::string decode_uri(const std::string &value);
2257+
2258+
std::string encode_query_param(const std::string &value);
2259+
22492260
std::string append_query_params(const std::string &path, const Params &params);
22502261

22512262
std::pair<std::string, std::string> make_range_header(const Ranges &ranges);
@@ -2287,9 +2298,7 @@ struct FileStat {
22872298
int ret_ = -1;
22882299
};
22892300

2290-
std::string encode_query_param(const std::string &value);
2291-
2292-
std::string decode_url(const std::string &s, bool convert_plus_to_space);
2301+
std::string decode_path(const std::string &s, bool convert_plus_to_space);
22932302

22942303
std::string trim_copy(const std::string &s);
22952304

@@ -2759,28 +2768,7 @@ inline bool FileStat::is_dir() const {
27592768
return ret_ >= 0 && S_ISDIR(st_.st_mode);
27602769
}
27612770

2762-
inline std::string encode_query_param(const std::string &value) {
2763-
std::ostringstream escaped;
2764-
escaped.fill('0');
2765-
escaped << std::hex;
2766-
2767-
for (auto c : value) {
2768-
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
2769-
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
2770-
c == ')') {
2771-
escaped << c;
2772-
} else {
2773-
escaped << std::uppercase;
2774-
escaped << '%' << std::setw(2)
2775-
<< static_cast<int>(static_cast<unsigned char>(c));
2776-
escaped << std::nouppercase;
2777-
}
2778-
}
2779-
2780-
return escaped.str();
2781-
}
2782-
2783-
inline std::string encode_url(const std::string &s) {
2771+
inline std::string encode_path(const std::string &s) {
27842772
std::string result;
27852773
result.reserve(s.size());
27862774

@@ -2812,8 +2800,8 @@ inline std::string encode_url(const std::string &s) {
28122800
return result;
28132801
}
28142802

2815-
inline std::string decode_url(const std::string &s,
2816-
bool convert_plus_to_space) {
2803+
inline std::string decode_path(const std::string &s,
2804+
bool convert_plus_to_space) {
28172805
std::string result;
28182806

28192807
for (size_t i = 0; i < s.size(); i++) {
@@ -4537,7 +4525,7 @@ inline bool parse_header(const char *beg, const char *end, T fn) {
45374525
case_ignore::equal(key, "Referer")) {
45384526
fn(key, val);
45394527
} else {
4540-
fn(key, decode_url(val, false));
4528+
fn(key, decode_path(val, false));
45414529
}
45424530

45434531
return true;
@@ -5102,7 +5090,7 @@ inline std::string params_to_query_str(const Params &params) {
51025090
if (it != params.begin()) { query += "&"; }
51035091
query += it->first;
51045092
query += "=";
5105-
query += encode_query_param(it->second);
5093+
query += httplib::encode_uri_component(it->second);
51065094
}
51075095
return query;
51085096
}
@@ -5125,7 +5113,7 @@ inline void parse_query_text(const char *data, std::size_t size,
51255113
});
51265114

51275115
if (!key.empty()) {
5128-
params.emplace(decode_url(key, true), decode_url(val, true));
5116+
params.emplace(decode_path(key, true), decode_path(val, true));
51295117
}
51305118
});
51315119
}
@@ -5435,7 +5423,7 @@ class MultipartFormDataParser {
54355423

54365424
std::smatch m2;
54375425
if (std::regex_match(it->second, m2, re_rfc5987_encoding)) {
5438-
file_.filename = decode_url(m2[1], false); // override...
5426+
file_.filename = decode_path(m2[1], false); // override...
54395427
} else {
54405428
is_valid_ = false;
54415429
return false;
@@ -6258,6 +6246,94 @@ inline void hosted_at(const std::string &hostname,
62586246
}
62596247
}
62606248

6249+
inline std::string encode_uri_component(const std::string &value) {
6250+
std::ostringstream escaped;
6251+
escaped.fill('0');
6252+
escaped << std::hex;
6253+
6254+
for (auto c : value) {
6255+
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
6256+
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
6257+
c == ')') {
6258+
escaped << c;
6259+
} else {
6260+
escaped << std::uppercase;
6261+
escaped << '%' << std::setw(2)
6262+
<< static_cast<int>(static_cast<unsigned char>(c));
6263+
escaped << std::nouppercase;
6264+
}
6265+
}
6266+
6267+
return escaped.str();
6268+
}
6269+
6270+
inline std::string encode_uri(const std::string &value) {
6271+
std::ostringstream escaped;
6272+
escaped.fill('0');
6273+
escaped << std::hex;
6274+
6275+
for (auto c : value) {
6276+
if (std::isalnum(static_cast<uint8_t>(c)) || c == '-' || c == '_' ||
6277+
c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' ||
6278+
c == ')' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' ||
6279+
c == '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '#') {
6280+
escaped << c;
6281+
} else {
6282+
escaped << std::uppercase;
6283+
escaped << '%' << std::setw(2)
6284+
<< static_cast<int>(static_cast<unsigned char>(c));
6285+
escaped << std::nouppercase;
6286+
}
6287+
}
6288+
6289+
return escaped.str();
6290+
}
6291+
6292+
inline std::string decode_uri_component(const std::string &value) {
6293+
std::string result;
6294+
6295+
for (size_t i = 0; i < value.size(); i++) {
6296+
if (value[i] == '%' && i + 2 < value.size()) {
6297+
auto val = 0;
6298+
if (detail::from_hex_to_i(value, i + 1, 2, val)) {
6299+
result += static_cast<char>(val);
6300+
i += 2;
6301+
} else {
6302+
result += value[i];
6303+
}
6304+
} else {
6305+
result += value[i];
6306+
}
6307+
}
6308+
6309+
return result;
6310+
}
6311+
6312+
inline std::string decode_uri(const std::string &value) {
6313+
std::string result;
6314+
6315+
for (size_t i = 0; i < value.size(); i++) {
6316+
if (value[i] == '%' && i + 2 < value.size()) {
6317+
auto val = 0;
6318+
if (detail::from_hex_to_i(value, i + 1, 2, val)) {
6319+
result += static_cast<char>(val);
6320+
i += 2;
6321+
} else {
6322+
result += value[i];
6323+
}
6324+
} else {
6325+
result += value[i];
6326+
}
6327+
}
6328+
6329+
return result;
6330+
}
6331+
6332+
[[deprecated("Use encode_uri_component instead")]]
6333+
inline std::string encode_query_param(const std::string &value) {
6334+
return encode_uri_component(value);
6335+
}
6336+
62616337
inline std::string append_query_params(const std::string &path,
62626338
const Params &params) {
62636339
std::string path_with_query = path;
@@ -7063,7 +7139,7 @@ inline bool Server::parse_request_line(const char *s, Request &req) const {
70637139
detail::divide(req.target, '?',
70647140
[&](const char *lhs_data, std::size_t lhs_size,
70657141
const char *rhs_data, std::size_t rhs_size) {
7066-
req.path = detail::decode_url(
7142+
req.path = detail::decode_path(
70677143
std::string(lhs_data, lhs_size), false);
70687144
detail::parse_query_text(rhs_data, rhs_size, req.params);
70697145
});
@@ -7958,7 +8034,7 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) {
79588034
#endif
79598035
keep_alive_ = rhs.keep_alive_;
79608036
follow_location_ = rhs.follow_location_;
7961-
url_encode_ = rhs.url_encode_;
8037+
path_encode_ = rhs.path_encode_;
79628038
address_family_ = rhs.address_family_;
79638039
tcp_nodelay_ = rhs.tcp_nodelay_;
79648040
ipv6_v6only_ = rhs.ipv6_v6only_;
@@ -8323,7 +8399,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
83238399
if (next_host.empty()) { next_host = host_; }
83248400
if (next_path.empty()) { next_path = "/"; }
83258401

8326-
auto path = detail::decode_url(next_path, true) + next_query;
8402+
auto path = detail::decode_path(next_path, true) + next_query;
83278403

83288404
// Same host redirect - use current client
83298405
if (next_scheme == scheme && next_host == host_ && next_port == port_) {
@@ -8418,7 +8494,7 @@ inline void ClientImpl::setup_redirect_client(ClientType &client) {
84188494
client.set_keep_alive(keep_alive_);
84198495
client.set_follow_location(
84208496
true); // Enable redirects to handle multi-step redirects
8421-
client.set_url_encode(url_encode_);
8497+
client.set_path_encode(path_encode_);
84228498
client.set_compress(compress_);
84238499
client.set_decompress(decompress_);
84248500

@@ -8612,7 +8688,7 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req,
86128688
: append_query_params(req.path, req.params);
86138689

86148690
const auto &path =
8615-
url_encode_ ? detail::encode_url(path_with_query) : path_with_query;
8691+
path_encode_ ? detail::encode_path(path_with_query) : path_with_query;
86168692

86178693
detail::write_request_line(bstrm, req.method, path);
86188694

@@ -9658,7 +9734,7 @@ inline void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; }
96589734

96599735
inline void ClientImpl::set_follow_location(bool on) { follow_location_ = on; }
96609736

9661-
inline void ClientImpl::set_url_encode(bool on) { url_encode_ = on; }
9737+
inline void ClientImpl::set_path_encode(bool on) { path_encode_ = on; }
96629738

96639739
inline void
96649740
ClientImpl::set_hostname_addr_map(std::map<std::string, std::string> addr_map) {
@@ -11134,7 +11210,10 @@ inline void Client::set_follow_location(bool on) {
1113411210
cli_->set_follow_location(on);
1113511211
}
1113611212

11137-
inline void Client::set_url_encode(bool on) { cli_->set_url_encode(on); }
11213+
inline void Client::set_path_encode(bool on) { cli_->set_path_encode(on); }
11214+
11215+
[[deprecated("Use set_path_encode instead")]]
11216+
inline void Client::set_url_encode(bool on) { cli_->set_path_encode(on); }
1113811217

1113911218
inline void Client::set_compress(bool on) { cli_->set_compress(on); }
1114011219

0 commit comments

Comments
 (0)