Skip to content

Commit 1391217

Browse files
Implement unified URI validation (#82)
This PR implements the formalized grammars that we have selected for unifying the CEL library functions across implementations.
1 parent a66f79d commit 1391217

File tree

11 files changed

+1052
-824
lines changed

11 files changed

+1052
-824
lines changed

buf/validate/conformance/expected_failures.yaml

-739
Large diffs are not rendered by default.

buf/validate/internal/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ cc_library(
135135
":string_format",
136136
"//buf/validate/internal/lib:ipv4",
137137
"//buf/validate/internal/lib:ipv6",
138+
"//buf/validate/internal/lib:uri",
138139
"@com_google_absl//absl/status",
139140
"@com_google_cel_cpp//eval/public:cel_function_adapter",
140141
"@com_google_cel_cpp//eval/public:cel_function_registry",

buf/validate/internal/extra_func.cc

+5-42
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "absl/strings/str_split.h"
2222
#include "buf/validate/internal/lib/ipv4.h"
2323
#include "buf/validate/internal/lib/ipv6.h"
24+
#include "buf/validate/internal/lib/uri.h"
2425
#include "buf/validate/internal/string_format.h"
2526
#include "eval/public/cel_function_adapter.h"
2627
#include "eval/public/cel_value.h"
@@ -316,55 +317,17 @@ cel::CelValue isIpPrefix(google::protobuf::Arena* arena, cel::CelValue::StringHo
316317
}
317318

318319
/**
319-
* Naive URI validation.
320+
* URI validation.
320321
*/
321322
cel::CelValue isUri(google::protobuf::Arena* arena, cel::CelValue::StringHolder lhs) {
322-
const std::string_view& ref = lhs.value();
323-
if (ref.empty()) {
324-
return cel::CelValue::CreateBool(false);
325-
}
326-
std::string_view scheme, host;
327-
if (!absl::StrContains(ref, "://")) {
328-
return cel::CelValue::CreateBool(false);
329-
}
330-
std::vector<std::string_view> split = absl::StrSplit(ref, absl::MaxSplits("://", 1));
331-
scheme = split[0];
332-
std::vector<std::string_view> hostSplit = absl::StrSplit(split[1], absl::MaxSplits('/', 1));
333-
host = hostSplit[0];
334-
// Just checking that scheme and host are present.
335-
return cel::CelValue::CreateBool(!scheme.empty() && !host.empty());
323+
return cel::CelValue::CreateBool(lib::validateUri(lhs.value()));
336324
}
337325

338326
/**
339-
* Naive URI ref validation.
327+
* URI ref validation.
340328
*/
341329
cel::CelValue isUriRef(google::protobuf::Arena* arena, cel::CelValue::StringHolder lhs) {
342-
const std::string_view& ref = lhs.value();
343-
if (ref.empty()) {
344-
return cel::CelValue::CreateBool(false);
345-
}
346-
std::string_view scheme, host, path;
347-
std::string_view remainder = ref;
348-
if (absl::StrContains(ref, "://")) {
349-
std::vector<std::string_view> split = absl::StrSplit(ref, absl::MaxSplits("://", 1));
350-
scheme = split[0];
351-
std::vector<std::string_view> hostSplit = absl::StrSplit(split[1], absl::MaxSplits('/', 1));
352-
host = hostSplit[0];
353-
// If hostSplit has a size greater than 1, then a '/' appeared in the string. Set the rest
354-
// to remainder so we can parse any query string.
355-
if (hostSplit.size() > 1) {
356-
remainder = hostSplit[1];
357-
}
358-
}
359-
std::vector<std::string_view> querySplit = absl::StrSplit(remainder, absl::MaxSplits('?', 1));
360-
path = querySplit[0];
361-
if (!isPathValid(path)) {
362-
return cel::CelValue::CreateBool(false);
363-
}
364-
// If the scheme and host are invalid, then the input is a URI ref (so make sure path exists).
365-
// If the scheme and host are valid, then the input is a URI.
366-
bool parsedResult = !path.empty() || (!scheme.empty() && !host.empty());
367-
return cel::CelValue::CreateBool(parsedResult);
330+
return cel::CelValue::CreateBool(lib::validateUriReference(lhs.value()));
368331
}
369332

370333
absl::Status RegisterExtraFuncs(

buf/validate/internal/lib/BUILD.bazel

+18
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ cc_test(
5050
],
5151
)
5252

53+
cc_library(
54+
name = "uri",
55+
srcs = ["uri.cc"],
56+
hdrs = ["uri.h"],
57+
deps = [
58+
":parser_common"
59+
]
60+
)
61+
62+
cc_test(
63+
name = "uri_test",
64+
srcs = ["uri_test.cc"],
65+
deps = [
66+
":uri",
67+
"@com_google_googletest//:gtest_main",
68+
],
69+
)
70+
5371
cc_library(
5472
name = "parser_common",
5573
hdrs = ["parser_common.h"]

buf/validate/internal/lib/ipv4.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ namespace buf::validate::internal::lib {
2424
namespace {
2525

2626
struct IPv4Parser : ParserCommon, public IPv4Prefix {
27-
bool consumePrefixLength() { return consumeDecimalNumber<uint8_t, bits_count>(prefixLength); }
27+
bool parsePrefixLength() { return parseDecimalNumber<uint8_t, bits_count>(prefixLength); }
2828

29-
bool consumeAddressPart() {
29+
bool parseAddressPart() {
3030
std::array<uint8_t, 4> octets;
31-
if (!consumeDecimalOctet(octets[0]) || !consumeDot() || //
32-
!consumeDecimalOctet(octets[1]) || !consumeDot() || //
33-
!consumeDecimalOctet(octets[2]) || !consumeDot() || //
34-
!consumeDecimalOctet(octets[3])) {
31+
if (!parseDecimalOctet(octets[0]) || !consume<Char<'.'>>() || //
32+
!parseDecimalOctet(octets[1]) || !consume<Char<'.'>>() || //
33+
!parseDecimalOctet(octets[2]) || !consume<Char<'.'>>() || //
34+
!parseDecimalOctet(octets[3])) {
3535
return false;
3636
}
3737
bits |= static_cast<uint32_t>(octets[0]) << 24;
@@ -41,10 +41,10 @@ struct IPv4Parser : ParserCommon, public IPv4Prefix {
4141
return true;
4242
}
4343

44-
bool parseAddress() { return consumeAddressPart() && str.empty(); }
44+
bool parseAddress() { return parseAddressPart() && str.empty(); }
4545

4646
bool parsePrefix() {
47-
return consumeAddressPart() && consumeSlash() && consumePrefixLength() && str.empty();
47+
return parseAddressPart() && consume<Char<'/'>>() && parsePrefixLength() && str.empty();
4848
}
4949
};
5050

buf/validate/internal/lib/ipv4_test.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,10 @@ INSTANTIATE_TEST_SUITE_P(
7878
IPv4PrefixParseTest,
7979
IPv4PrefixParseTestSuite,
8080
::testing::Values(
81-
IPv4PrefixParseTestCase{"127.0.0.1/1", {{0x7f000001, 1}}},
82-
IPv4PrefixParseTestCase{"100.100.100.100/0", {{0x64646464, 0}}},
83-
IPv4PrefixParseTestCase{"255.255.255.255/32", {{0xffffffff, 32}}},
84-
IPv4PrefixParseTestCase{"10.0.0.0/8", {{0x0a000000, 8}}},
81+
IPv4PrefixParseTestCase{"127.0.0.1/1", {{{0x7f000001}, 1}}},
82+
IPv4PrefixParseTestCase{"100.100.100.100/0", {{{0x64646464}, 0}}},
83+
IPv4PrefixParseTestCase{"255.255.255.255/32", {{{0xffffffff}, 32}}},
84+
IPv4PrefixParseTestCase{"10.0.0.0/8", {{{0x0a000000}, 8}}},
8585
IPv4PrefixParseTestCase{"1.1.1.1//1", std::nullopt},
8686
IPv4PrefixParseTestCase{"1.1.1.1.1", std::nullopt},
8787
IPv4PrefixParseTestCase{"1.1.1.1/33", std::nullopt},

buf/validate/internal/lib/ipv6.cc

+14-14
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
3535
return false;
3636
}
3737
str = str.substr(1);
38-
} while(!str.empty());
38+
} while (!str.empty());
3939
return true;
4040
}
4141

@@ -49,12 +49,12 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
4949
return str[1] == '.' || str[2] == '.' || str[3] == '.';
5050
}
5151

52-
bool consumeDotted(int index) {
52+
bool parseDotted() {
5353
std::array<uint8_t, 4> octets;
54-
if (!consumeDecimalOctet(octets[0]) || !consumeDot() || //
55-
!consumeDecimalOctet(octets[1]) || !consumeDot() || //
56-
!consumeDecimalOctet(octets[2]) || !consumeDot() || //
57-
!consumeDecimalOctet(octets[3])) {
54+
if (!parseDecimalOctet(octets[0]) || !consume<Char<'.'>>() || //
55+
!parseDecimalOctet(octets[1]) || !consume<Char<'.'>>() || //
56+
!parseDecimalOctet(octets[2]) || !consume<Char<'.'>>() || //
57+
!parseDecimalOctet(octets[3])) {
5858
return false;
5959
}
6060
bits |= static_cast<uint32_t>(octets[0]) << 24;
@@ -64,9 +64,9 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
6464
return true;
6565
}
6666

67-
bool consumePrefixLength() { return consumeDecimalNumber<uint8_t, bits_count>(prefixLength); }
67+
bool consumePrefixLength() { return parseDecimalNumber<uint8_t, bits_count>(prefixLength); }
6868

69-
bool consumeAddressPart() {
69+
bool parseAddressPart() {
7070
std::bitset<bits_count> b;
7171
int index = 0;
7272
bool doubleColonFound = false;
@@ -80,18 +80,18 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
8080
while (index < hexadecatets_count) {
8181
if ((state == Separator || state == DoubleColon) &&
8282
(doubleColonFound || index == hexadecatets_count - 2) && checkDotted()) {
83-
if (!consumeDotted(index)) {
83+
if (!parseDotted()) {
8484
return false;
8585
}
8686
b <<= 32;
8787
index += 2;
8888
break;
89-
} else if (state != Hexadecatet && consumeHexadecimalHexadecatet(value)) {
89+
} else if (state != Hexadecatet && parseHexadecimalHexadecatet(value)) {
9090
state = Hexadecatet;
9191
b <<= 16;
9292
b |= value;
9393
index++;
94-
} else if (state != Separator && consumeDoubleColon()) {
94+
} else if (state != Separator && consumeSequence<':', ':'>()) {
9595
state = DoubleColon;
9696
if (index > hexadecatets_count - 1 || doubleColonFound) {
9797
return false;
@@ -102,7 +102,7 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
102102
// This ensures that we can't have more than 7 hexadecatets when there's
103103
// a double-colon, even though we don't actually process a hexadecatet.
104104
index++;
105-
} else if (state == Hexadecatet && consumeColon()) {
105+
} else if (state == Hexadecatet && consume<Char<':'>>()) {
106106
state = Separator;
107107
} else {
108108
// Unable to match anything: this is the end.
@@ -120,11 +120,11 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
120120
}
121121

122122
bool parseAddress() {
123-
return consumeAddressPart() && (!consumePercent() || consumeZoneId()) && str.empty();
123+
return parseAddressPart() && (!consume<Char<'%'>>() || consumeZoneId()) && str.empty();
124124
}
125125

126126
bool parsePrefix() {
127-
return consumeAddressPart() && consumeSlash() && consumePrefixLength() && str.empty();
127+
return parseAddressPart() && consume<Char<'/'>>() && consumePrefixLength() && str.empty();
128128
}
129129
};
130130

0 commit comments

Comments
 (0)