Skip to content

Commit 29c1f4e

Browse files
committed
refactor: move bsearch function to C-code
This commit fixes issue #527 and move the bsearch function to a native C-code. The performance is a bit better: Testing script: ```bash clear if [[ `uname` == Darwin ]]; then MAX_MEMORY_UNITS=KB else MAX_MEMORY_UNITS=MB fi export TIMEFMT='%J %U user %S system %P cpu %*E total'$'\n'\ 'avg shared (code): %X KB'$'\n'\ 'avg unshared (data/stack): %D KB'$'\n'\ 'total (sum): %K KB'$'\n'\ 'max memory: %M '$MAX_MEMORY_UNITS''$'\n'\ 'page faults from disk: %F'$'\n'\ 'other page faults: %R' echo "JQ code bsearch" time /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' echo "C code bsearch" time ./jq -n '[range(30000000)] | bsearch(3000)' ```` Results: ``` JQ code bsearch 3000 /usr/bin/jq -n '[range(30000000)] | bsearch(3000)' 8.63s user 0.77s system 98% cpu 9.542 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 823 MB page faults from disk: 1 other page faults: 432828 C code bsearch 3000 ./jq -n '[range(30000000)] | bsearch(3000)' 8.44s user 0.74s system 99% cpu 9.249 total avg shared (code): 0 KB avg unshared (data/stack): 0 KB total (sum): 0 KB max memory: 824 MB page faults from disk: 0 other page faults: 432766 ``` The results may be better if we can use jvp_array_read, and there is no need to copy/free the input array in each iteration. I guess that is like that for API pourposes when the libjq is in use with multiple threads in place. Signed-off-by: Eloy Coto <[email protected]>
1 parent 6c03513 commit 29c1f4e

File tree

2 files changed

+58
-32
lines changed

2 files changed

+58
-32
lines changed

src/builtin.c

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#endif
88
#include <sys/time.h>
99
#include <stdlib.h>
10+
#include <stdio.h>
1011
#include <stddef.h>
1112
#ifdef HAVE_ALLOCA_H
1213
# include <alloca.h>
@@ -46,7 +47,6 @@ void *alloca (size_t);
4647
#include "jv_private.h"
4748
#include "util.h"
4849

49-
5050
#define BINOP(name) \
5151
static jv f_ ## name(jq_state *jq, jv input, jv a, jv b) { \
5252
jv_free(input); \
@@ -807,6 +807,62 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) {
807807
}
808808
}
809809

810+
/* Assuming the input array is sorted, bsearch/1 returns */
811+
/* the index of the target if the target is in the input array; and otherwise */
812+
/* (-1 - ix), where ix is the insertion point that would leave the array sorted. */
813+
/* If the input is not sorted, bsearch will terminate but with irrelevant results. */
814+
static jv f_bsearch(jq_state *jq, jv input, jv target) {
815+
assert(jv_get_kind(input) == JV_KIND_ARRAY);
816+
assert(jv_get_kind(target) == JV_KIND_NUMBER);
817+
int len = jv_array_length(jv_copy(input));
818+
if (len == 0) {
819+
jv_free(input);
820+
jv_free(target);
821+
return jv_number(-1);
822+
} else if (len == 1) {
823+
int result = jv_cmp(target, jv_array_get(input, 0));
824+
if (result == 0 ) {
825+
return jv_number(0);
826+
} else if (result > 0) {
827+
return jv_number(-2);
828+
} else {
829+
return jv_number(-1);
830+
}
831+
}
832+
833+
int start = 0;
834+
int end = len - 1;
835+
jv answer = jv_null();
836+
while (start <end) {
837+
int mid = (start + end) / 2;
838+
int result = jv_cmp(jv_copy(target), jv_array_get(jv_copy(input), mid));
839+
if (result == 0) {
840+
answer = jv_number(mid);
841+
break;
842+
} else if (start == end ) {
843+
answer = jv_number(-1);
844+
break;
845+
} else if (result < 0 ) {
846+
end = mid -1;
847+
} else {
848+
start = mid +1;
849+
}
850+
}
851+
if (jv_get_kind(answer) == JV_KIND_NULL) {
852+
int result = jv_cmp(target, jv_array_get(jv_copy(input), start));
853+
if (result < 0) {
854+
answer = jv_number(-1 - start);
855+
}else {
856+
answer = jv_number(-2 - start);
857+
}
858+
} else {
859+
jv_free(target);
860+
}
861+
862+
jv_free(input);
863+
return answer;
864+
}
865+
810866
static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
811867
if (jv_get_kind(input) == JV_KIND_ARRAY &&
812868
jv_get_kind(keys) == JV_KIND_ARRAY &&
@@ -1754,6 +1810,7 @@ BINOPS
17541810
{f_sort, "sort", 1},
17551811
{f_sort_by_impl, "_sort_by_impl", 2},
17561812
{f_group_by_impl, "_group_by_impl", 2},
1813+
{f_bsearch, "bsearch", 2},
17571814
{f_min, "min", 1},
17581815
{f_max, "max", 1},
17591816
{f_min_by_impl, "_min_by_impl", 2},

src/builtin.jq

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -213,37 +213,6 @@ def tostream:
213213
getpath($p) |
214214
reduce path(.[]?) as $q ([$p, .]; [$p+$q]);
215215

216-
# Assuming the input array is sorted, bsearch/1 returns
217-
# the index of the target if the target is in the input array; and otherwise
218-
# (-1 - ix), where ix is the insertion point that would leave the array sorted.
219-
# If the input is not sorted, bsearch will terminate but with irrelevant results.
220-
def bsearch($target):
221-
if length == 0 then -1
222-
elif length == 1 then
223-
if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end
224-
else . as $in
225-
# state variable: [start, end, answer]
226-
# where start and end are the upper and lower offsets to use.
227-
| [0, length-1, null]
228-
| until( .[0] > .[1] ;
229-
if .[2] != null then (.[1] = -1) # i.e. break
230-
else
231-
( ( (.[1] + .[0]) / 2 ) | floor ) as $mid
232-
| $in[$mid] as $monkey
233-
| if $monkey == $target then (.[2] = $mid) # success
234-
elif .[0] == .[1] then (.[1] = -1) # failure
235-
elif $monkey < $target then (.[0] = ($mid + 1))
236-
else (.[1] = ($mid - 1))
237-
end
238-
end )
239-
| if .[2] == null then # compute the insertion point
240-
if $in[ .[0] ] < $target then (-2 -.[0])
241-
else (-1 -.[0])
242-
end
243-
else .[2]
244-
end
245-
end;
246-
247216
# Apply f to composite entities recursively, and to atoms
248217
def walk(f):
249218
def w:

0 commit comments

Comments
 (0)