Skip to content

Implement unified URI validation #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
739 changes: 0 additions & 739 deletions buf/validate/conformance/expected_failures.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions buf/validate/internal/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ cc_library(
":string_format",
"//buf/validate/internal/lib:ipv4",
"//buf/validate/internal/lib:ipv6",
"//buf/validate/internal/lib:uri",
"@com_google_absl//absl/status",
"@com_google_cel_cpp//eval/public:cel_function_adapter",
"@com_google_cel_cpp//eval/public:cel_function_registry",
Expand Down
47 changes: 5 additions & 42 deletions buf/validate/internal/extra_func.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "absl/strings/str_split.h"
#include "buf/validate/internal/lib/ipv4.h"
#include "buf/validate/internal/lib/ipv6.h"
#include "buf/validate/internal/lib/uri.h"
#include "buf/validate/internal/string_format.h"
#include "eval/public/cel_function_adapter.h"
#include "eval/public/cel_value.h"
Expand Down Expand Up @@ -316,55 +317,17 @@ cel::CelValue isIpPrefix(google::protobuf::Arena* arena, cel::CelValue::StringHo
}

/**
* Naive URI validation.
* URI validation.
*/
cel::CelValue isUri(google::protobuf::Arena* arena, cel::CelValue::StringHolder lhs) {
const std::string_view& ref = lhs.value();
if (ref.empty()) {
return cel::CelValue::CreateBool(false);
}
std::string_view scheme, host;
if (!absl::StrContains(ref, "://")) {
return cel::CelValue::CreateBool(false);
}
std::vector<std::string_view> split = absl::StrSplit(ref, absl::MaxSplits("://", 1));
scheme = split[0];
std::vector<std::string_view> hostSplit = absl::StrSplit(split[1], absl::MaxSplits('/', 1));
host = hostSplit[0];
// Just checking that scheme and host are present.
return cel::CelValue::CreateBool(!scheme.empty() && !host.empty());
return cel::CelValue::CreateBool(lib::validateUri(lhs.value()));
}

/**
* Naive URI ref validation.
* URI ref validation.
*/
cel::CelValue isUriRef(google::protobuf::Arena* arena, cel::CelValue::StringHolder lhs) {
const std::string_view& ref = lhs.value();
if (ref.empty()) {
return cel::CelValue::CreateBool(false);
}
std::string_view scheme, host, path;
std::string_view remainder = ref;
if (absl::StrContains(ref, "://")) {
std::vector<std::string_view> split = absl::StrSplit(ref, absl::MaxSplits("://", 1));
scheme = split[0];
std::vector<std::string_view> hostSplit = absl::StrSplit(split[1], absl::MaxSplits('/', 1));
host = hostSplit[0];
// If hostSplit has a size greater than 1, then a '/' appeared in the string. Set the rest
// to remainder so we can parse any query string.
if (hostSplit.size() > 1) {
remainder = hostSplit[1];
}
}
std::vector<std::string_view> querySplit = absl::StrSplit(remainder, absl::MaxSplits('?', 1));
path = querySplit[0];
if (!isPathValid(path)) {
return cel::CelValue::CreateBool(false);
}
// If the scheme and host are invalid, then the input is a URI ref (so make sure path exists).
// If the scheme and host are valid, then the input is a URI.
bool parsedResult = !path.empty() || (!scheme.empty() && !host.empty());
return cel::CelValue::CreateBool(parsedResult);
return cel::CelValue::CreateBool(lib::validateUriReference(lhs.value()));
}

absl::Status RegisterExtraFuncs(
Expand Down
18 changes: 18 additions & 0 deletions buf/validate/internal/lib/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ cc_test(
],
)

cc_library(
name = "uri",
srcs = ["uri.cc"],
hdrs = ["uri.h"],
deps = [
":parser_common"
]
)

cc_test(
name = "uri_test",
srcs = ["uri_test.cc"],
deps = [
":uri",
"@com_google_googletest//:gtest_main",
],
)

cc_library(
name = "parser_common",
hdrs = ["parser_common.h"]
Expand Down
16 changes: 8 additions & 8 deletions buf/validate/internal/lib/ipv4.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ namespace buf::validate::internal::lib {
namespace {

struct IPv4Parser : ParserCommon, public IPv4Prefix {
bool consumePrefixLength() { return consumeDecimalNumber<uint8_t, bits_count>(prefixLength); }
bool parsePrefixLength() { return parseDecimalNumber<uint8_t, bits_count>(prefixLength); }

bool consumeAddressPart() {
bool parseAddressPart() {
std::array<uint8_t, 4> octets;
if (!consumeDecimalOctet(octets[0]) || !consumeDot() || //
!consumeDecimalOctet(octets[1]) || !consumeDot() || //
!consumeDecimalOctet(octets[2]) || !consumeDot() || //
!consumeDecimalOctet(octets[3])) {
if (!parseDecimalOctet(octets[0]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[1]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[2]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[3])) {
return false;
}
bits |= static_cast<uint32_t>(octets[0]) << 24;
Expand All @@ -41,10 +41,10 @@ struct IPv4Parser : ParserCommon, public IPv4Prefix {
return true;
}

bool parseAddress() { return consumeAddressPart() && str.empty(); }
bool parseAddress() { return parseAddressPart() && str.empty(); }

bool parsePrefix() {
return consumeAddressPart() && consumeSlash() && consumePrefixLength() && str.empty();
return parseAddressPart() && consume<Char<'/'>>() && parsePrefixLength() && str.empty();
}
};

Expand Down
8 changes: 4 additions & 4 deletions buf/validate/internal/lib/ipv4_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ INSTANTIATE_TEST_SUITE_P(
IPv4PrefixParseTest,
IPv4PrefixParseTestSuite,
::testing::Values(
IPv4PrefixParseTestCase{"127.0.0.1/1", {{0x7f000001, 1}}},
IPv4PrefixParseTestCase{"100.100.100.100/0", {{0x64646464, 0}}},
IPv4PrefixParseTestCase{"255.255.255.255/32", {{0xffffffff, 32}}},
IPv4PrefixParseTestCase{"10.0.0.0/8", {{0x0a000000, 8}}},
IPv4PrefixParseTestCase{"127.0.0.1/1", {{{0x7f000001}, 1}}},
IPv4PrefixParseTestCase{"100.100.100.100/0", {{{0x64646464}, 0}}},
IPv4PrefixParseTestCase{"255.255.255.255/32", {{{0xffffffff}, 32}}},
IPv4PrefixParseTestCase{"10.0.0.0/8", {{{0x0a000000}, 8}}},
IPv4PrefixParseTestCase{"1.1.1.1//1", std::nullopt},
IPv4PrefixParseTestCase{"1.1.1.1.1", std::nullopt},
IPv4PrefixParseTestCase{"1.1.1.1/33", std::nullopt},
Expand Down
28 changes: 14 additions & 14 deletions buf/validate/internal/lib/ipv6.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
return false;
}
str = str.substr(1);
} while(!str.empty());
} while (!str.empty());
return true;
}

Expand All @@ -49,12 +49,12 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
return str[1] == '.' || str[2] == '.' || str[3] == '.';
}

bool consumeDotted(int index) {
bool parseDotted() {
std::array<uint8_t, 4> octets;
if (!consumeDecimalOctet(octets[0]) || !consumeDot() || //
!consumeDecimalOctet(octets[1]) || !consumeDot() || //
!consumeDecimalOctet(octets[2]) || !consumeDot() || //
!consumeDecimalOctet(octets[3])) {
if (!parseDecimalOctet(octets[0]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[1]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[2]) || !consume<Char<'.'>>() || //
!parseDecimalOctet(octets[3])) {
return false;
}
bits |= static_cast<uint32_t>(octets[0]) << 24;
Expand All @@ -64,9 +64,9 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
return true;
}

bool consumePrefixLength() { return consumeDecimalNumber<uint8_t, bits_count>(prefixLength); }
bool consumePrefixLength() { return parseDecimalNumber<uint8_t, bits_count>(prefixLength); }

bool consumeAddressPart() {
bool parseAddressPart() {
std::bitset<bits_count> b;
int index = 0;
bool doubleColonFound = false;
Expand All @@ -80,18 +80,18 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
while (index < hexadecatets_count) {
if ((state == Separator || state == DoubleColon) &&
(doubleColonFound || index == hexadecatets_count - 2) && checkDotted()) {
if (!consumeDotted(index)) {
if (!parseDotted()) {
return false;
}
b <<= 32;
index += 2;
break;
} else if (state != Hexadecatet && consumeHexadecimalHexadecatet(value)) {
} else if (state != Hexadecatet && parseHexadecimalHexadecatet(value)) {
state = Hexadecatet;
b <<= 16;
b |= value;
index++;
} else if (state != Separator && consumeDoubleColon()) {
} else if (state != Separator && consumeSequence<':', ':'>()) {
state = DoubleColon;
if (index > hexadecatets_count - 1 || doubleColonFound) {
return false;
Expand All @@ -102,7 +102,7 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
// This ensures that we can't have more than 7 hexadecatets when there's
// a double-colon, even though we don't actually process a hexadecatet.
index++;
} else if (state == Hexadecatet && consumeColon()) {
} else if (state == Hexadecatet && consume<Char<':'>>()) {
state = Separator;
} else {
// Unable to match anything: this is the end.
Expand All @@ -120,11 +120,11 @@ struct IPv6Parser : ParserCommon, public IPv6Prefix {
}

bool parseAddress() {
return consumeAddressPart() && (!consumePercent() || consumeZoneId()) && str.empty();
return parseAddressPart() && (!consume<Char<'%'>>() || consumeZoneId()) && str.empty();
}

bool parsePrefix() {
return consumeAddressPart() && consumeSlash() && consumePrefixLength() && str.empty();
return parseAddressPart() && consume<Char<'/'>>() && consumePrefixLength() && str.empty();
}
};

Expand Down
Loading
Loading