Skip to content

Commit 9e3c7bc

Browse files
committed
refactor: move bsearch function to C-code
This commit fixes issue #527 and move the bsearch function to a native C-code. The performance is a bit better: Testing script: ```bash clear if [[ `uname` == Darwin ]]; then MAX_MEMORY_UNITS=KB else MAX_MEMORY_UNITS=MB fi export TIMEFMT='%J %U user %S system %P cpu %*E total'$'\n'\ 'avg shared (code): %X KB'$'\n'\ 'avg unshared (data/stack): %D KB'$'\n'\ 'total (sum): %K KB'$'\n'\ 'max memory: %M '$MAX_MEMORY_UNITS''$'\n'\ 'page faults from disk: %F'$'\n'\ 'other page faults: %R' echo "JQ code bsearch" time /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' echo "C code bsearch" time ./jq -n '[range(30000000)] | bsearch(3000)' ```` Results: ``` JQ code bsearch 3000 /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' 8.63s user 0.77s system 98% cpu 9.542 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 823 MB page faults from disk: 1 other page faults: 432828 C code bsearch 3000 ./jq -n '[range(30000000)] | bsearch(3000)' 8.44s user 0.74s system 99% cpu 9.249 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 824 MB page faults from disk: 0 other page faults: 432766 ``` The results may be better if we can use jvp_array_read, and there is no need to copy/free the input array in each iteration. I guess that is like that for API pourposes when the libjq is in use with multiple threads in place. Signed-off-by: Eloy Coto <[email protected]>
1 parent 6c03513 commit 9e3c7bc

File tree

2 files changed

+57
-32
lines changed

2 files changed

+57
-32
lines changed

src/builtin.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ void *alloca (size_t);
4646
#include "jv_private.h"
4747
#include "util.h"
4848

49-
5049
#define BINOP(name) \
5150
static jv f_ ## name(jq_state *jq, jv input, jv a, jv b) { \
5251
jv_free(input); \
@@ -807,6 +806,62 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) {
807806
}
808807
}
809808

809+
/* Assuming the input array is sorted, bsearch/1 returns */
810+
/* the index of the target if the target is in the input array; and otherwise */
811+
/* (-1 - ix), where ix is the insertion point that would leave the array sorted. */
812+
/* If the input is not sorted, bsearch will terminate but with irrelevant results. */
813+
static jv f_bsearch(jq_state *jq, jv input, jv target) {
814+
assert(jv_get_kind(input) == JV_KIND_ARRAY);
815+
assert(jv_get_kind(target) == JV_KIND_NUMBER);
816+
int len = jv_array_length(jv_copy(input));
817+
if (len == 0) {
818+
return jv_number(-1);
819+
} else if (len == 1) {
820+
jv val = jv_copy(jv_array_get(jv_copy(input), 0));
821+
int result = jv_cmp(target, val);
822+
jv_free(val);
823+
if (result == 0 ) {
824+
return jv_number(0);
825+
} else if (result > 0) {
826+
return jv_number(-2);
827+
} else {
828+
return jv_number(-1);
829+
}
830+
}
831+
832+
int start = 0;
833+
int end = len - 1;
834+
jv answer = jv_null();
835+
while (start <end) {
836+
int mid = (start + end) / 2;
837+
838+
jv val = jv_array_get(jv_copy(input), mid);
839+
int result = jv_cmp(target, val);
840+
if (result == 0) {
841+
answer = jv_number(mid);
842+
break;
843+
} else if (start == end ) {
844+
answer = jv_number(-1);
845+
break;
846+
} else if (result < 0 ) {
847+
end = mid -1;
848+
} else {
849+
start = mid +1;
850+
}
851+
}
852+
853+
if (jv_equal(answer, jv_null())) {
854+
jv val = jv_array_get(jv_copy(input), start);
855+
int result = jv_cmp(target, val);
856+
if (result < 0) {
857+
answer = jv_number(-1 - start);
858+
}else {
859+
answer = jv_number(-2 - start);
860+
}
861+
}
862+
return answer;
863+
}
864+
810865
static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
811866
if (jv_get_kind(input) == JV_KIND_ARRAY &&
812867
jv_get_kind(keys) == JV_KIND_ARRAY &&
@@ -1754,6 +1809,7 @@ BINOPS
17541809
{f_sort, "sort", 1},
17551810
{f_sort_by_impl, "_sort_by_impl", 2},
17561811
{f_group_by_impl, "_group_by_impl", 2},
1812+
{f_bsearch, "bsearch", 2},
17571813
{f_min, "min", 1},
17581814
{f_max, "max", 1},
17591815
{f_min_by_impl, "_min_by_impl", 2},

src/builtin.jq

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -213,37 +213,6 @@ def tostream:
213213
getpath($p) |
214214
reduce path(.[]?) as $q ([$p, .]; [$p+$q]);
215215

216-
# Assuming the input array is sorted, bsearch/1 returns
217-
# the index of the target if the target is in the input array; and otherwise
218-
# (-1 - ix), where ix is the insertion point that would leave the array sorted.
219-
# If the input is not sorted, bsearch will terminate but with irrelevant results.
220-
def bsearch($target):
221-
if length == 0 then -1
222-
elif length == 1 then
223-
if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end
224-
else . as $in
225-
# state variable: [start, end, answer]
226-
# where start and end are the upper and lower offsets to use.
227-
| [0, length-1, null]
228-
| until( .[0] > .[1] ;
229-
if .[2] != null then (.[1] = -1) # i.e. break
230-
else
231-
( ( (.[1] + .[0]) / 2 ) | floor ) as $mid
232-
| $in[$mid] as $monkey
233-
| if $monkey == $target then (.[2] = $mid) # success
234-
elif .[0] == .[1] then (.[1] = -1) # failure
235-
elif $monkey < $target then (.[0] = ($mid + 1))
236-
else (.[1] = ($mid - 1))
237-
end
238-
end )
239-
| if .[2] == null then # compute the insertion point
240-
if $in[ .[0] ] < $target then (-2 -.[0])
241-
else (-1 -.[0])
242-
end
243-
else .[2]
244-
end
245-
end;
246-
247216
# Apply f to composite entities recursively, and to atoms
248217
def walk(f):
249218
def w:

0 commit comments

Comments
 (0)