Skip to content

Allow splitting entity path expressions with whitespace #7782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/labels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ jobs:
with:
mode: minimum
count: 1
labels: "📊 analytics, 🟦 blueprint, 🪳 bug, 🌊 C++ API, CLI, codegen/idl, 🧑‍💻 dev experience, dependencies, 📖 documentation, 💬 discussion, examples, exclude from changelog, 🪵 Log-API, 📉 performance, 🐍 Python API, ⛃ re_datastore, 🔍 re_query, 📺 re_viewer, 🔺 re_renderer, 🚜 refactor, ⛴ release, 🦀 Rust API, 🔨 testing, ui, 🕸️ web"
labels: "📊 analytics, 🟦 blueprint, 🪳 bug, 🌊 C++ API, CLI, codegen/idl, 🧑‍💻 dev experience, dependencies, 📖 documentation, 💬 discussion, examples, exclude from changelog, 🪵 Log-API, 📉 performance, 🐍 Python API, ⛃ re_datastore, 🔍 re_query, 📺 re_viewer, 🔺 re_renderer, 🚜 refactor, ⛴ release, 🦀 Rust API, 🔨 testing, ui, 🕸️ web, 🪵 Log & send APIs"
104 changes: 102 additions & 2 deletions crates/store/re_log_types/src/path/entity_path_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,75 @@ impl TryFrom<&str> for EntityPathFilter {
}
}

/// Split a string into whitespace-separated tokens with extra logic.
///
/// Specifically, we allow for whitespace between `+`/`-` and the following token.
///
/// Additional rules:
/// - Escaped whitespace never results in a split.
/// - Otherwise always split on `\n` (even if it follows a `+` or `-` character).
/// - Only consider `+` and `-` characters as special if they are the first character of a token.
/// - Split on whitespace does not following a relevant `+` or `-` character.
fn split_whitespace_smart(path: &'_ str) -> Vec<&'_ str> {
#![allow(clippy::unwrap_used)]

// We parse on bytes, and take care to only split on either side of a one-byte ASCII,
// making the `from_utf8(…)`s below safe to unwrap.
let mut bytes = path.as_bytes();

let mut tokens = vec![];

// Start by ignoring any leading whitespace
while !bytes.is_empty() {
let mut i = 0;
let mut is_in_escape = false;
let mut is_include_exclude = false;
let mut new_token = true;

// Find the next unescaped whitespace character not following a '+' or '-' character
while i < bytes.len() {
let is_unescaped_whitespace = !is_in_escape && bytes[i].is_ascii_whitespace();

let is_unescaped_newline = !is_in_escape && bytes[i] == b'\n';

if is_unescaped_newline || (!is_include_exclude && is_unescaped_whitespace) {
break;
}

is_in_escape = bytes[i] == b'\\';

if bytes[i] == b'+' || bytes[i] == b'-' {
is_include_exclude = new_token;
} else if !is_unescaped_whitespace {
is_include_exclude = false;
new_token = false;
}

i += 1;
}
if i > 0 {
tokens.push(&bytes[..i]);
}

// Continue skipping whitespace characters
while i < bytes.len() {
if is_in_escape || !bytes[i].is_ascii_whitespace() {
break;
}
is_in_escape = bytes[i] == b'\\';
i += 1;
}

bytes = &bytes[i..];
}

// Safety: we split at proper character boundaries
tokens
.iter()
.map(|token| std::str::from_utf8(token).unwrap())
.collect()
}

impl EntityPathFilter {
/// Parse an entity path filter from a string while ignore syntax errors.
///
Expand All @@ -187,7 +256,8 @@ impl EntityPathFilter {
///
/// Conflicting rules are resolved by the last rule.
pub fn parse_forgiving(rules: &str, subst_env: &EntityPathSubs) -> Self {
Self::from_query_expressions_forgiving(rules.split('\n'), subst_env)
let split_rules = split_whitespace_smart(rules);
Self::from_query_expressions_forgiving(split_rules, subst_env)
}

/// Build a filter from a list of query expressions.
Expand Down Expand Up @@ -243,7 +313,8 @@ impl EntityPathFilter {
rules: &str,
subst_env: &EntityPathSubs,
) -> Result<Self, EntityPathFilterParseError> {
Self::from_query_expressions_strict(rules.split('\n'), subst_env)
let split_rules = split_whitespace_smart(rules);
Self::from_query_expressions_strict(split_rules, subst_env)
}

/// Build a filter from a list of query expressions.
Expand Down Expand Up @@ -891,3 +962,32 @@ mod tests {
}
}
}

#[test]
fn test_split_whitespace_smart() {
assert_eq!(split_whitespace_smart("/world"), vec!["/world"]);
assert_eq!(split_whitespace_smart("a b c"), vec!["a", "b", "c"]);
assert_eq!(split_whitespace_smart("a\nb\tc "), vec!["a", "b", "c"]);
assert_eq!(split_whitespace_smart(r"a\ b c"), vec![r"a\ b", "c"]);

assert_eq!(
split_whitespace_smart(" + a - b + c"),
vec!["+ a", "- b", "+ c"]
);
assert_eq!(
split_whitespace_smart("+ a -\n b + c"),
vec!["+ a", "-", "b", "+ c"]
);
assert_eq!(
split_whitespace_smart("/weird/path- +/oth- erpath"),
vec!["/weird/path-", "+/oth-", "erpath"]
);
assert_eq!(
split_whitespace_smart(r"+world/** -/world/points"),
vec!["+world/**", "-/world/points"]
);
assert_eq!(
split_whitespace_smart(r"+ world/** - /world/points"),
vec!["+ world/**", "- /world/points"]
);
}
19 changes: 19 additions & 0 deletions rerun_py/tests/unit/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,25 @@ def test_full_view(self) -> None:
assert table.num_columns == 1
assert table.num_rows == 1

def test_content_filters(self) -> None:
filter_expressions = [
"+/** -/static_text",
"""
+/**
-/static_text
""",
{"/** -/static_text": ["Position3D", "Color"]},
]

for expr in filter_expressions:
view = self.recording.view(index="my_index", contents=expr)

table = view.select().read_all()

# my_index, log_time, log_tick, points, colors
assert table.num_columns == 5
assert table.num_rows == 2

def test_select_columns(self) -> None:
view = self.recording.view(index="my_index", contents="points")
index_col = rr.dataframe.IndexColumnSelector("my_index")
Expand Down
Loading