Skip to content

Commit 7ccf861

Browse files
committed
Fix #2184, #2185
1 parent 8e8a23e commit 7ccf861

File tree

2 files changed

+230
-65
lines changed

2 files changed

+230
-65
lines changed

httplib.h

Lines changed: 184 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,7 +2030,7 @@ inline size_t get_header_value_u64(const Headers &headers,
20302030
inline size_t get_header_value_u64(const Headers &headers,
20312031
const std::string &key, size_t def,
20322032
size_t id) {
2033-
bool dummy = false;
2033+
auto dummy = false;
20342034
return get_header_value_u64(headers, key, def, id, dummy);
20352035
}
20362036

@@ -2301,15 +2301,19 @@ std::string hosted_at(const std::string &hostname);
23012301

23022302
void hosted_at(const std::string &hostname, std::vector<std::string> &addrs);
23032303

2304+
// JavaScript-style URL encoding/decoding functions
23042305
std::string encode_uri_component(const std::string &value);
2305-
23062306
std::string encode_uri(const std::string &value);
2307-
23082307
std::string decode_uri_component(const std::string &value);
2309-
23102308
std::string decode_uri(const std::string &value);
23112309

2312-
std::string encode_query_param(const std::string &value);
2310+
// RFC 3986 compliant URL component encoding/decoding functions
2311+
std::string encode_path_component(const std::string &component);
2312+
std::string decode_path_component(const std::string &component);
2313+
std::string encode_query_component(const std::string &component,
2314+
bool space_as_plus = true);
2315+
std::string decode_query_component(const std::string &component,
2316+
bool plus_as_space = true);
23132317

23142318
std::string append_query_params(const std::string &path, const Params &params);
23152319

@@ -2352,8 +2356,6 @@ struct FileStat {
23522356
int ret_ = -1;
23532357
};
23542358

2355-
std::string decode_path(const std::string &s, bool convert_plus_to_space);
2356-
23572359
std::string trim_copy(const std::string &s);
23582360

23592361
void divide(
@@ -2854,43 +2856,6 @@ inline std::string encode_path(const std::string &s) {
28542856
return result;
28552857
}
28562858

2857-
inline std::string decode_path(const std::string &s,
2858-
bool convert_plus_to_space) {
2859-
std::string result;
2860-
2861-
for (size_t i = 0; i < s.size(); i++) {
2862-
if (s[i] == '%' && i + 1 < s.size()) {
2863-
if (s[i + 1] == 'u') {
2864-
auto val = 0;
2865-
if (from_hex_to_i(s, i + 2, 4, val)) {
2866-
// 4 digits Unicode codes
2867-
char buff[4];
2868-
size_t len = to_utf8(val, buff);
2869-
if (len > 0) { result.append(buff, len); }
2870-
i += 5; // 'u0000'
2871-
} else {
2872-
result += s[i];
2873-
}
2874-
} else {
2875-
auto val = 0;
2876-
if (from_hex_to_i(s, i + 1, 2, val)) {
2877-
// 2 digits hex codes
2878-
result += static_cast<char>(val);
2879-
i += 2; // '00'
2880-
} else {
2881-
result += s[i];
2882-
}
2883-
}
2884-
} else if (convert_plus_to_space && s[i] == '+') {
2885-
result += ' ';
2886-
} else {
2887-
result += s[i];
2888-
}
2889-
}
2890-
2891-
return result;
2892-
}
2893-
28942859
inline std::string file_extension(const std::string &path) {
28952860
std::smatch m;
28962861
thread_local auto re = std::regex("\\.([a-zA-Z0-9]+)$");
@@ -4615,7 +4580,7 @@ inline bool parse_header(const char *beg, const char *end, T fn) {
46154580
case_ignore::equal(key, "Referer")) {
46164581
fn(key, val);
46174582
} else {
4618-
fn(key, decode_path(val, false));
4583+
fn(key, decode_path_component(val));
46194584
}
46204585

46214586
return true;
@@ -5263,9 +5228,9 @@ inline std::string params_to_query_str(const Params &params) {
52635228

52645229
for (auto it = params.begin(); it != params.end(); ++it) {
52655230
if (it != params.begin()) { query += "&"; }
5266-
query += it->first;
5231+
query += httplib::encode_query_component(it->first);
52675232
query += "=";
5268-
query += httplib::encode_uri_component(it->second);
5233+
query += httplib::encode_query_component(it->second);
52695234
}
52705235
return query;
52715236
}
@@ -5288,7 +5253,7 @@ inline void parse_query_text(const char *data, std::size_t size,
52885253
});
52895254

52905255
if (!key.empty()) {
5291-
params.emplace(decode_path(key, true), decode_path(val, true));
5256+
params.emplace(decode_query_component(key), decode_query_component(val));
52925257
}
52935258
});
52945259
}
@@ -5611,7 +5576,7 @@ class FormDataParser {
56115576

56125577
std::smatch m2;
56135578
if (std::regex_match(it->second, m2, re_rfc5987_encoding)) {
5614-
file_.filename = decode_path(m2[1], false); // override...
5579+
file_.filename = decode_path_component(m2[1]); // override...
56155580
} else {
56165581
is_valid_ = false;
56175582
return false;
@@ -6517,9 +6482,154 @@ inline std::string decode_uri(const std::string &value) {
65176482
return result;
65186483
}
65196484

6520-
[[deprecated("Use encode_uri_component instead")]]
6521-
inline std::string encode_query_param(const std::string &value) {
6522-
return encode_uri_component(value);
6485+
inline std::string encode_path_component(const std::string &component) {
6486+
std::string result;
6487+
result.reserve(component.size() * 3);
6488+
6489+
for (size_t i = 0; i < component.size(); i++) {
6490+
auto c = static_cast<unsigned char>(component[i]);
6491+
6492+
// Unreserved characters per RFC 3986: ALPHA / DIGIT / "-" / "." / "_" / "~"
6493+
if (std::isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
6494+
result += static_cast<char>(c);
6495+
}
6496+
// Path-safe sub-delimiters: "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" /
6497+
// "," / ";" / "="
6498+
else if (c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' ||
6499+
c == ')' || c == '*' || c == '+' || c == ',' || c == ';' ||
6500+
c == '=') {
6501+
result += static_cast<char>(c);
6502+
}
6503+
// Colon is allowed in path segments except first segment
6504+
else if (c == ':') {
6505+
result += static_cast<char>(c);
6506+
}
6507+
// @ is allowed in path
6508+
else if (c == '@') {
6509+
result += static_cast<char>(c);
6510+
} else {
6511+
result += '%';
6512+
char hex[3];
6513+
snprintf(hex, sizeof(hex), "%02X", c);
6514+
result.append(hex, 2);
6515+
}
6516+
}
6517+
return result;
6518+
}
6519+
6520+
inline std::string decode_path_component(const std::string &component) {
6521+
std::string result;
6522+
result.reserve(component.size());
6523+
6524+
for (size_t i = 0; i < component.size(); i++) {
6525+
if (component[i] == '%' && i + 1 < component.size()) {
6526+
if (component[i + 1] == 'u') {
6527+
// Unicode %uXXXX encoding
6528+
auto val = 0;
6529+
if (detail::from_hex_to_i(component, i + 2, 4, val)) {
6530+
// 4 digits Unicode codes
6531+
char buff[4];
6532+
size_t len = detail::to_utf8(val, buff);
6533+
if (len > 0) { result.append(buff, len); }
6534+
i += 5; // 'u0000'
6535+
} else {
6536+
result += component[i];
6537+
}
6538+
} else {
6539+
// Standard %XX encoding
6540+
auto val = 0;
6541+
if (detail::from_hex_to_i(component, i + 1, 2, val)) {
6542+
// 2 digits hex codes
6543+
result += static_cast<char>(val);
6544+
i += 2; // 'XX'
6545+
} else {
6546+
result += component[i];
6547+
}
6548+
}
6549+
} else {
6550+
result += component[i];
6551+
}
6552+
}
6553+
return result;
6554+
}
6555+
6556+
inline std::string encode_query_component(const std::string &component,
6557+
bool space_as_plus) {
6558+
std::string result;
6559+
result.reserve(component.size() * 3);
6560+
6561+
for (size_t i = 0; i < component.size(); i++) {
6562+
auto c = static_cast<unsigned char>(component[i]);
6563+
6564+
// Unreserved characters per RFC 3986
6565+
if (std::isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~') {
6566+
result += static_cast<char>(c);
6567+
}
6568+
// Space handling
6569+
else if (c == ' ') {
6570+
if (space_as_plus) {
6571+
result += '+';
6572+
} else {
6573+
result += "%20";
6574+
}
6575+
}
6576+
// Plus sign handling
6577+
else if (c == '+') {
6578+
if (space_as_plus) {
6579+
result += "%2B";
6580+
} else {
6581+
result += static_cast<char>(c);
6582+
}
6583+
}
6584+
// Query-safe sub-delimiters (excluding & and = which are query delimiters)
6585+
else if (c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' ||
6586+
c == '*' || c == ',' || c == ';') {
6587+
result += static_cast<char>(c);
6588+
}
6589+
// Colon and @ are allowed in query
6590+
else if (c == ':' || c == '@') {
6591+
result += static_cast<char>(c);
6592+
}
6593+
// Forward slash is allowed in query values
6594+
else if (c == '/') {
6595+
result += static_cast<char>(c);
6596+
}
6597+
// Question mark is allowed in query values (after first ?)
6598+
else if (c == '?') {
6599+
result += static_cast<char>(c);
6600+
} else {
6601+
result += '%';
6602+
char hex[3];
6603+
snprintf(hex, sizeof(hex), "%02X", c);
6604+
result.append(hex, 2);
6605+
}
6606+
}
6607+
return result;
6608+
}
6609+
6610+
inline std::string decode_query_component(const std::string &component,
6611+
bool plus_as_space) {
6612+
std::string result;
6613+
result.reserve(component.size());
6614+
6615+
for (size_t i = 0; i < component.size(); i++) {
6616+
if (component[i] == '%' && i + 2 < component.size()) {
6617+
std::string hex = component.substr(i + 1, 2);
6618+
char *end;
6619+
unsigned long value = std::strtoul(hex.c_str(), &end, 16);
6620+
if (end == hex.c_str() + 2) {
6621+
result += static_cast<char>(value);
6622+
i += 2;
6623+
} else {
6624+
result += component[i];
6625+
}
6626+
} else if (component[i] == '+' && plus_as_space) {
6627+
result += ' '; // + becomes space in form-urlencoded
6628+
} else {
6629+
result += component[i];
6630+
}
6631+
}
6632+
return result;
65236633
}
65246634

65256635
inline std::string append_query_params(const std::string &path,
@@ -7404,8 +7514,8 @@ inline bool Server::parse_request_line(const char *s, Request &req) const {
74047514
detail::divide(req.target, '?',
74057515
[&](const char *lhs_data, std::size_t lhs_size,
74067516
const char *rhs_data, std::size_t rhs_size) {
7407-
req.path = detail::decode_path(
7408-
std::string(lhs_data, lhs_size), false);
7517+
req.path =
7518+
decode_path_component(std::string(lhs_data, lhs_size));
74097519
detail::parse_query_text(rhs_data, rhs_size, req.params);
74107520
});
74117521
}
@@ -8678,7 +8788,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) {
86788788
if (next_host.empty()) { next_host = host_; }
86798789
if (next_path.empty()) { next_path = "/"; }
86808790

8681-
auto path = detail::decode_path(next_path, true) + next_query;
8791+
auto path = decode_query_component(next_path, true) + next_query;
86828792

86838793
// Same host redirect - use current client
86848794
if (next_scheme == scheme && next_host == host_ && next_port == port_) {
@@ -8966,15 +9076,28 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req,
89669076
{
89679077
detail::BufferStream bstrm;
89689078

8969-
const auto &path_with_query =
8970-
req.params.empty() ? req.path
8971-
: append_query_params(req.path, req.params);
9079+
// Extract path and query from req.path
9080+
std::string path_part, query_part;
9081+
auto query_pos = req.path.find('?');
9082+
if (query_pos != std::string::npos) {
9083+
path_part = req.path.substr(0, query_pos);
9084+
query_part = req.path.substr(query_pos + 1);
9085+
} else {
9086+
path_part = req.path;
9087+
query_part = "";
9088+
}
89729089

8973-
const auto &path =
8974-
path_encode_ ? detail::encode_path(path_with_query) : path_with_query;
9090+
// Encode path and query
9091+
auto path_with_query =
9092+
path_encode_ ? detail::encode_path(path_part) : path_part;
89759093

8976-
detail::write_request_line(bstrm, req.method, path);
9094+
detail::parse_query_text(query_part, req.params);
9095+
if (!req.params.empty()) {
9096+
path_with_query = append_query_params(path_with_query, req.params);
9097+
}
89779098

9099+
// Write request line and headers
9100+
detail::write_request_line(bstrm, req.method, path_with_query);
89789101
header_writer_(bstrm, req.headers);
89799102

89809103
// Flush buffer

0 commit comments

Comments
 (0)