Skip to content

Commit 95ee663

Browse files
authored
improve performance of unique and unique_by (#3254)
Previously, `unique` and `unique_by` filters are implemented using `group_by` and then `map(.[0])`. This commit re-implements in C, to avoid unnecessary boxing of grouping, and improves the performance.
1 parent 8ba03f7 commit 95ee663

File tree

4 files changed

+45
-3
lines changed

4 files changed

+45
-3
lines changed

src/builtin.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,24 @@ static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
879879
}
880880
}
881881

882+
static jv f_unique(jq_state *jq, jv input) {
883+
if (jv_get_kind(input) == JV_KIND_ARRAY) {
884+
return jv_unique(input, jv_copy(input));
885+
} else {
886+
return type_error(input, "cannot be sorted, as it is not an array");
887+
}
888+
}
889+
890+
static jv f_unique_by_impl(jq_state *jq, jv input, jv keys) {
891+
if (jv_get_kind(input) == JV_KIND_ARRAY &&
892+
jv_get_kind(keys) == JV_KIND_ARRAY &&
893+
jv_array_length(jv_copy(input)) == jv_array_length(jv_copy(keys))) {
894+
return jv_unique(input, keys);
895+
} else {
896+
return type_error2(input, keys, "cannot be sorted, as they are not both arrays");
897+
}
898+
}
899+
882900
#ifdef HAVE_LIBONIG
883901
static int f_match_name_iter(const UChar* name, const UChar *name_end, int ngroups,
884902
int *groups, regex_t *reg, void *arg) {
@@ -1912,6 +1930,8 @@ BINOPS
19121930
CFUNC(f_sort, "sort", 1),
19131931
CFUNC(f_sort_by_impl, "_sort_by_impl", 2),
19141932
CFUNC(f_group_by_impl, "_group_by_impl", 2),
1933+
CFUNC(f_unique, "unique", 1),
1934+
CFUNC(f_unique_by_impl, "_unique_by_impl", 2),
19151935
CFUNC(f_bsearch, "bsearch", 2),
19161936
CFUNC(f_min, "min", 1),
19171937
CFUNC(f_max, "max", 1),

src/builtin.jq

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ def map(f): [.[] | f];
44
def select(f): if f then . else empty end;
55
def sort_by(f): _sort_by_impl(map([f]));
66
def group_by(f): _group_by_impl(map([f]));
7-
def unique: group_by(.) | map(.[0]);
8-
def unique_by(f): group_by(f) | map(.[0]);
7+
def unique_by(f): _unique_by_impl(map([f]));
98
def max_by(f): _max_by_impl(map([f]));
109
def min_by(f): _min_by_impl(map([f]));
1110
def add(f): reduce f as $x (null; . + $x);

src/jv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,9 @@ jv jv_delpaths(jv, jv);
265265
jv jv_keys(jv /*object or array*/);
266266
jv jv_keys_unsorted(jv /*object or array*/);
267267
int jv_cmp(jv, jv);
268-
jv jv_group(jv, jv);
269268
jv jv_sort(jv, jv);
269+
jv jv_group(jv, jv);
270+
jv jv_unique(jv, jv);
270271

271272
#ifdef __cplusplus
272273
}

src/jv_aux.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,3 +727,25 @@ jv jv_group(jv objects, jv keys) {
727727
jv_mem_free(entries);
728728
return ret;
729729
}
730+
731+
jv jv_unique(jv objects, jv keys) {
732+
assert(jv_get_kind(objects) == JV_KIND_ARRAY);
733+
assert(jv_get_kind(keys) == JV_KIND_ARRAY);
734+
assert(jv_array_length(jv_copy(objects)) == jv_array_length(jv_copy(keys)));
735+
int n = jv_array_length(jv_copy(objects));
736+
struct sort_entry* entries = sort_items(objects, keys);
737+
jv ret = jv_array();
738+
jv curr_key = jv_invalid();
739+
for (int i = 0; i < n; i++) {
740+
if (jv_equal(jv_copy(curr_key), jv_copy(entries[i].key))) {
741+
jv_free(entries[i].key);
742+
} else {
743+
jv_free(curr_key);
744+
curr_key = entries[i].key;
745+
ret = jv_array_append(ret, entries[i].object);
746+
}
747+
}
748+
jv_free(curr_key);
749+
jv_mem_free(entries);
750+
return ret;
751+
}

0 commit comments

Comments
 (0)