From ab8edf98f6ea8aba5418608e4c170462bd713d24 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 3 Jun 2025 18:56:17 +0200 Subject: [PATCH 1/4] erts: Refactor erl_bif_re.c Split off build_compile_error() from build_compile_result(). --- erts/emulator/beam/erl_bif_re.c | 144 +++++++++++++++++--------------- 1 file changed, 77 insertions(+), 67 deletions(-) diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 9821febe79ae..0c2b24140fb2 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -538,74 +538,84 @@ static pcre2_code *compile(const char* expr, } /* - * Build Erlang term result from compilation + * Build Erlang term result from successful compilation */ +static Eterm +build_compile_result(Process *p, pcre2_code *result, int unicode, bool with_ok) +{ + Eterm *hp; + Eterm ret; + size_t pattern_size; + uint32_t capture_count; + uint32_t newline; + int use_crlf; + Binary* magic_bin; + Eterm magic_ref; + struct regex_magic_indirect* indirect; + ASSERT(result); + + pcre2_pattern_info(result, PCRE2_INFO_SIZE, &pattern_size); + pcre2_pattern_info(result, PCRE2_INFO_CAPTURECOUNT, &capture_count); + pcre2_pattern_info(result, PCRE2_INFO_NEWLINE, &newline); + use_crlf = (newline == PCRE2_NEWLINE_ANY || + newline == PCRE2_NEWLINE_CRLF || + newline == PCRE2_NEWLINE_ANYCRLF); + + magic_bin = erts_create_magic_binary(sizeof(struct regex_magic_indirect), + regex_code_destructor); + indirect = ERTS_MAGIC_BIN_DATA(magic_bin); + indirect->regex_code = result; + + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + 6 + (with_ok ? 3 : 0)); + magic_ref = erts_mk_magic_ref(&hp, &MSO(p), magic_bin); + ret = TUPLE5(hp, am_re_pattern, make_small(capture_count), + make_small(unicode), make_small(use_crlf), magic_ref); + if (with_ok) { + hp += 6; + ret = TUPLE2(hp,am_ok,ret); + } + return ret; +} + +/* + * Build Erlang term result from FAILED compilation + */ static Eterm -build_compile_result(Process *p, Eterm error_tag, pcre2_code *result, - int errcode, PCRE2_SIZE errofset, - int unicode, int with_ok, Eterm extra_err_tag) +build_compile_error(Process *p, + int errcode, PCRE2_SIZE errofset, + Eterm extra_err_tag) { Eterm *hp; Eterm ret; - if (!result) { - int elen, need; - PCRE2_UCHAR8 errstr[120]; + int elen, need; + PCRE2_UCHAR8 errstr[120]; - /* Return {error_tag, {Code, String, Offset}} */ - if (pcre2_get_error_message(errcode, errstr, sizeof(errstr)) - == PCRE2_ERROR_BADDATA) { - erts_snprintf((char*)errstr, sizeof(errstr), "Unknown error (%d)", errcode); - } - elen = sys_strlen((const char*)errstr); - need = 3 /* tuple of 2 */ + - 3 /* tuple of 2 */ + - (2 * elen) /* The error string list */ + - ((extra_err_tag != NIL) ? 3 : 0); - hp = HAlloc(p, need); - ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL); - ret = TUPLE2(hp, ret, make_small(errofset)); - hp += 3; - if (extra_err_tag != NIL) { - /* Return {error_tag, {extra_tag, + /* Return {error, {Code, String, Offset}} */ + if (pcre2_get_error_message(errcode, errstr, sizeof(errstr)) + == PCRE2_ERROR_BADDATA) { + erts_snprintf((char*)errstr, sizeof(errstr), "Unknown error (%d)", errcode); + } + elen = sys_strlen((const char*)errstr); + need = 3 /* tuple of 2 */ + + 3 /* tuple of 2 */ + + (2 * elen) /* The error string list */ + + ((extra_err_tag != NIL) ? 3 : 0); + hp = HAlloc(p, need); + ret = buf_to_intlist(&hp, (char *) errstr, elen, NIL); + ret = TUPLE2(hp, ret, make_small(errofset)); + hp += 3; + if (extra_err_tag != NIL) { + /* Return {error, {extra_tag, {Code, String, Offset}}} instead */ - ret = TUPLE2(hp, extra_err_tag, ret); - hp += 3; - } - ret = TUPLE2(hp, error_tag, ret); - } else { - size_t pattern_size; - uint32_t capture_count; - uint32_t newline; - int use_crlf; - Binary* magic_bin; - Eterm magic_ref; - struct regex_magic_indirect* indirect; - - pcre2_pattern_info(result, PCRE2_INFO_SIZE, &pattern_size); - pcre2_pattern_info(result, PCRE2_INFO_CAPTURECOUNT, &capture_count); - pcre2_pattern_info(result, PCRE2_INFO_NEWLINE, &newline); - use_crlf = (newline == PCRE2_NEWLINE_ANY || - newline == PCRE2_NEWLINE_CRLF || - newline == PCRE2_NEWLINE_ANYCRLF); - - magic_bin = erts_create_magic_binary(sizeof(struct regex_magic_indirect), - regex_code_destructor); - indirect = ERTS_MAGIC_BIN_DATA(magic_bin); - indirect->regex_code = result; - - hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE + 6 + (with_ok ? 3 : 0)); - magic_ref = erts_mk_magic_ref(&hp, &MSO(p), magic_bin); - ret = TUPLE5(hp, am_re_pattern, make_small(capture_count), - make_small(unicode), make_small(use_crlf), magic_ref); - if (with_ok) { - hp += 6; - ret = TUPLE2(hp,am_ok,ret); - } + ret = TUPLE2(hp, extra_err_tag, ret); + hp += 3; } + ret = TUPLE2(hp, am_error, ret); return ret; } + /* * Compile BIFs */ @@ -682,8 +692,12 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) result = compile((char*)expr, slen, &opts, the_precompile_ctx, &errcode, &errofset); - ret = build_compile_result(p, am_error, result, errcode, - errofset, unicode, 1, NIL); + if (!result) { + ret = build_compile_error(p, errcode, errofset, NIL); + } + else { + ret = build_compile_result(p, result, unicode, true); + } if (tmp_expr) { erts_free(ERTS_ALC_T_RE_TMP_BUF, tmp_expr); @@ -1284,11 +1298,8 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first) /* Compilation error gives badarg except in the compile function or if we have PARSE_FLAG_REPORT_ERRORS */ if (opts.flags & PARSE_FLAG_REPORT_ERRORS) { - res = build_compile_result(p, am_error, regex_code, errcode, - errofset, - (opts.flags & - PARSE_FLAG_UNICODE) ? 1 : 0, - 1, am_compile); + res = build_compile_error(p, errcode, + errofset, am_compile); BIF_RET(res); } else { BIF_ERROR(p,BADARG); @@ -1296,12 +1307,11 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first) } if (opts.flags & PARSE_FLAG_GLOBAL) { Eterm precompiled = - build_compile_result(p, am_error, - regex_code, errcode, - errofset, + build_compile_result(p, + regex_code, (opts.flags & PARSE_FLAG_UNICODE) ? 1 : 0, - 0, NIL); + false); Eterm *hp,r; hp = HAlloc(p,4); /* arg2 is in the tuple just to make exceptions right */ From d3fcfa229646b6bc1318d67fa2eb2be5ebb26560 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 3 Jun 2025 19:00:28 +0200 Subject: [PATCH 2/4] stdlib: Remove dead code in re.erl --- lib/stdlib/src/re.erl | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index bc4503db723a..fa25ca165ed5 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -1546,15 +1546,8 @@ check_for_unicode(_,L) -> check_for_crlf({re_pattern,_,_,1,_},_) -> true; check_for_crlf({re_pattern,_,_,0,_},_) -> - false; -check_for_crlf(_,L) -> - case lists:keysearch(newline,1,L) of - {value,{newline,any}} -> true; - {value,{newline,crlf}} -> true; - {value,{newline,anycrlf}} -> true; - _ -> false - end. - + false. + % SelectReturn = false | all | stirpfirst | none % ConvertReturn = index | list | binary % {capture, all} -> all (untouchded) From f173f7debf7cbde9fd042357bb1c01a158456d92 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Mon, 9 Jun 2025 16:33:08 +0200 Subject: [PATCH 3/4] erts: Add re:import/1 and export option to re:compile --- erts/emulator/beam/atom.names | 2 + erts/emulator/beam/bif.tab | 5 + erts/emulator/beam/erl_bif_re.c | 294 ++++++++++++++++++++++++--- erts/emulator/pcre/pcre.mk | 4 + erts/emulator/pcre/pcre2_serialize.c | 286 ++++++++++++++++++++++++++ lib/stdlib/src/erl_stdlib_errors.erl | 2 + lib/stdlib/src/re.erl | 9 + lib/stdlib/test/re_SUITE.erl | 103 +++++++++- lib/stdlib/test/run_pcre_tests.erl | 28 ++- 9 files changed, 696 insertions(+), 37 deletions(-) create mode 100644 erts/emulator/pcre/pcre2_serialize.c diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 3662b918b7fc..d3296846b1f7 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -302,6 +302,7 @@ atom existing_processes atom existing_ports atom existing atom exiting +atom export atom exports atom extended atom external @@ -603,6 +604,7 @@ atom public atom queue_size atom raw atom re +atom re_exported_pattern atom re_pattern atom re_run_trap atom read_concurrency diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 20c469620f52..53f1c454f554 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -828,3 +828,8 @@ bif erl_debugger:stack_frames/2 bif erl_debugger:peek_stack_frame_slot/4 bif erl_debugger:xregs_count/1 bif erl_debugger:peek_xreg/3 + +# +# New in 28.1 +# +bif re:import/1 diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 0c2b24140fb2..1d5bcfa055d8 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -32,6 +32,7 @@ #include "erl_binary.h" #include "erl_iolist.h" #include "big.h" +#include "zlib.h" #define ERLANG_INTEGRATION 1 #define PCRE2_STATIC //#include "pcre.h" @@ -48,9 +49,13 @@ static Export *urun_trap_exportp = NULL; static Export *ucompile_trap_exportp = NULL; static pcre2_general_context* the_general_ctx; +static pcre2_general_context* the_binary_general_ctx; +static pcre2_general_context* the_precomp_general_ctx; + static pcre2_compile_context* the_tmp_compile_ctx; static pcre2_compile_context* the_precompile_ctx; +static BIF_RETTYPE re_compile(Process* p, Eterm re_arg, Eterm opts_arg, bool is_import); static BIF_RETTYPE re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first); static void *our_pcre2_malloc(size_t size, void* null) @@ -66,6 +71,32 @@ static void our_pcre2_free(void *ptr, void* null) erts_free(ERTS_ALC_T_RE_SHORTLIVED, ptr); } +erts_tsd_key_t the_binary_malloc_tsd_key; + +static void *our_pcre2_binary_malloc(size_t size, void* null) +{ + if (erts_initialized) { + Binary* bin = erts_bin_nrml_alloc(size); + + /* Use TSD to "return" Binary back to caller. + * We assume only one malloc call per pcre2_serialize_encode() + */ + ASSERT(erts_tsd_get(the_binary_malloc_tsd_key) == NULL); + erts_tsd_set(the_binary_malloc_tsd_key, bin); + + return &(bin->orig_bytes); + } + else { + /* Allocation of the_binary_general_ctx itself, which is never freed. */ + return erts_alloc(ERTS_ALC_T_RE_INIT, size); + } +} + +static void our_pcre2_binary_free(void *ptr, void* null) +{ + ASSERT(!"Dead code. Exported binary should be deallocated by GC."); +} + /* * The magic binary for a pre-compiled regex. * Just an indirection to the pcre2_code allocated and managed by PCRE2. @@ -171,16 +202,19 @@ void erts_init_bif_re(void) the_tmp_compile_ctx = pcre2_compile_context_create(the_general_ctx); pcre2_set_compile_recursion_guard(the_tmp_compile_ctx, stack_guard, NULL); - { - pcre2_general_context *precomp_gen_ctx = - pcre2_general_context_create(our_pcre2_precompile_malloc, - our_pcre2_precompile_free, - NULL); - the_precompile_ctx = pcre2_compile_context_create(precomp_gen_ctx); - pcre2_set_compile_recursion_guard(the_precompile_ctx, stack_guard, - NULL); - pcre2_general_context_free(precomp_gen_ctx); - } + the_precomp_general_ctx = + pcre2_general_context_create(our_pcre2_precompile_malloc, + our_pcre2_precompile_free, + NULL); + the_precompile_ctx = pcre2_compile_context_create(the_precomp_general_ctx); + pcre2_set_compile_recursion_guard(the_precompile_ctx, stack_guard, + NULL); + + the_binary_general_ctx = + pcre2_general_context_create(our_pcre2_binary_malloc, + our_pcre2_binary_free, + NULL); + erts_tsd_key_create(&the_binary_malloc_tsd_key, "re_binary_malloc"); max_loop_limit = CONTEXT_REDS * LOOP_FACTOR; erts_init_trap_export(&re_match_trap_export, am_erlang, am_re_run_trap, 3, @@ -293,6 +327,7 @@ static Eterm make_signed_integer(int x, Process *p) #define PARSE_FLAG_REPORT_ERRORS 64 #define PARSE_FLAG_MATCH_LIMIT 128 #define PARSE_FLAG_MATCH_LIMIT_RECURSION 256 +#define PARSE_FLAG_EXPORT 512 #define CAPSPEC_VALUES 0 #define CAPSPEC_TYPE 1 @@ -493,6 +528,9 @@ static bool parse_options(Eterm listp, struct parsed_options* po) case am_bsr_unicode: po->bsr = PCRE2_BSR_UNICODE; break; + case am_export: + po->flags |= (PARSE_FLAG_EXPORT | PARSE_FLAG_UNIQUE_COMPILE_OPT); + break; default: return false; } @@ -541,7 +579,7 @@ static pcre2_code *compile(const char* expr, * Build Erlang term result from successful compilation */ static Eterm -build_compile_result(Process *p, pcre2_code *result, int unicode, bool with_ok) +build_compile_result(Process *p, pcre2_code *result, byte unicode, bool with_ok) { Eterm *hp; Eterm ret; @@ -578,6 +616,180 @@ build_compile_result(Process *p, pcre2_code *result, int unicode, bool with_ok) return ret; } +#define EXPORTED_HDR_TITLE_SZ 8 +#define EXPORTED_HDR_CHECKSUM_SZ 4 +#define EXPORTED_HDR_ENCODE_VER_SZ 1 +#define EXPORTED_HDR_UNICODE_SZ 1 + +#define EXPORTED_HDR_TITLE_OFFS 0 +#define EXPORTED_HDR_CHECKSUM_OFFS (EXPORTED_HDR_TITLE_OFFS + EXPORTED_HDR_TITLE_SZ) +#define EXPORTED_HDR_ENCODE_VER_OFFS (EXPORTED_HDR_CHECKSUM_OFFS + EXPORTED_HDR_CHECKSUM_SZ) +#define EXPORTED_HDR_UNICODE_OFFS (EXPORTED_HDR_ENCODE_VER_OFFS + EXPORTED_HDR_ENCODE_VER_SZ) +#define EXPORTED_HDR_SZ (EXPORTED_HDR_UNICODE_OFFS + EXPORTED_HDR_UNICODE_SZ) + +/* + * Bump this version if for some reason the encoded binary format need to change + * while the PCRE version is the same. That is, if we want to force fallback to + * compilation without even looking at the exported stuff. + */ +#define EXPORTED_ENCODE_VERSION 1 + +static uint32_t +calc_checksum(const byte* encoded, Uint encoded_sz) +{ + return crc32(0, encoded, encoded_sz); +} + +/* + * Build Erlang binary exported result from successful compilation + */ +static Eterm +build_compile_export(Process *p, const pcre2_code *result, byte unicode, + Eterm regex_bin, Eterm opts) +{ + Eterm *hp, *hp_end; + Uint hsz; + Eterm ret, encode_bin_term, hdr_bin_term; + uint8_t* serialized_bytes; + PCRE2_SIZE serialized_size; + Binary* bin; + int32_t encode_res; + byte *hdr; + uint32_t chksum; + + ASSERT(result); + +#ifdef DEBUG + erts_tsd_set(the_binary_malloc_tsd_key, NULL); +#endif + encode_res = pcre2_serialize_encode_8(&result, 1, + &serialized_bytes, &serialized_size, + the_binary_general_ctx); + ASSERT(encode_res == 1); (void)encode_res; + + bin = erts_tsd_get(the_binary_malloc_tsd_key); + ASSERT(bin); + ASSERT((char*)serialized_bytes >= bin->orig_bytes); + ASSERT((char*)serialized_bytes + serialized_size <= bin->orig_bytes + bin->orig_size); + + hsz = 3 + 6 + ERL_REFC_BITS_SIZE; + hp = HAlloc(p, hsz); + hp_end = hp + hsz; + + encode_bin_term = erts_wrap_refc_bitstring(&MSO(p).first, + &MSO(p).overhead, + &hp, + bin, + serialized_bytes, + 0, + NBITS(serialized_size)); + + hdr_bin_term = erts_new_binary(p, EXPORTED_HDR_SZ, &hdr); + + sys_memcpy(hdr + EXPORTED_HDR_TITLE_OFFS, "re-PCRE2", EXPORTED_HDR_TITLE_SZ); + put_int8(EXPORTED_ENCODE_VERSION, hdr + EXPORTED_HDR_ENCODE_VER_OFFS); + put_int8(unicode, hdr + EXPORTED_HDR_UNICODE_OFFS); + + chksum = calc_checksum(serialized_bytes, serialized_size); + put_int32(chksum, hdr + EXPORTED_HDR_CHECKSUM_OFFS); + + ret = TUPLE5(hp, am_re_exported_pattern, hdr_bin_term, regex_bin, opts, encode_bin_term); + hp += 6; + ret = TUPLE2(hp, am_ok, ret); + hp += 3; + ASSERT(hp == hp_end); (void)hp_end; + + return ret; +} + +BIF_RETTYPE +re_import_1(BIF_ALIST_1) +{ + Eterm* tpl; + pcre2_code *regex_code; + int32_t decode_ret; + uint32_t chksum; + const byte* hdr; + Uint hdr_sz; + const byte *hdr_tmp_alloc = NULL; + const byte *encoded_tmp_alloc = NULL; + byte enc_ver; + byte unicode; + + // {re_exported_pattern, HeaderBin, OrigBin, OrigOpts, EncodedBin} + + if (!is_tuple_arity(BIF_ARG_1, 5)) { + goto badarg; + } + tpl = tuple_val(BIF_ARG_1); + if (tpl[1] != am_re_exported_pattern) { + goto badarg; + } + + hdr = erts_get_aligned_binary_bytes(tpl[2], &hdr_sz, &hdr_tmp_alloc); + if (!hdr || hdr_sz < EXPORTED_HDR_SZ + || sys_memcmp(hdr, "re-PCRE2", EXPORTED_HDR_TITLE_SZ) != 0) { + goto badarg; + } + enc_ver = get_int8(hdr + EXPORTED_HDR_ENCODE_VER_OFFS); + if (enc_ver == EXPORTED_ENCODE_VERSION) { + const byte *encoded; + Uint encoded_sz; + + if (hdr_sz != EXPORTED_HDR_SZ) { + goto badarg; + } + + encoded = erts_get_aligned_binary_bytes(tpl[5], &encoded_sz, + &encoded_tmp_alloc); + if (!encoded) { + goto badarg; + } + + chksum = get_uint32(hdr + EXPORTED_HDR_CHECKSUM_OFFS); + if (chksum != calc_checksum(encoded, encoded_sz)) { + goto badarg; + } + unicode = get_int8(hdr + EXPORTED_HDR_UNICODE_OFFS); + + decode_ret = pcre2_serialize_decode_8(®ex_code, 1, + encoded, + the_precomp_general_ctx); + } + else { + /* + * Incorrect export encode format. + * Don't even look at tpl[5] and instead act as if the decode failed + * and fallback to compile regex below. + */ + decode_ret = PCRE2_ERROR_BADMODE; + } + + erts_free_aligned_binary_bytes(hdr_tmp_alloc); + hdr_tmp_alloc = NULL; + erts_free_aligned_binary_bytes(encoded_tmp_alloc); + encoded_tmp_alloc = NULL; + + switch (decode_ret) { + case 1: // Ok + return build_compile_result(BIF_P, regex_code, unicode, false); + + case PCRE2_ERROR_BADMODE: + case PCRE2_ERROR_BADMAGIC: + // Wrong architecture or PCRE version, try compile orig regex. + if (is_bitstring(tpl[3])) { + return re_compile(BIF_P, tpl[3], tpl[4], true); + } + } + ASSERT(decode_ret < 0); + +badarg: + erts_free_aligned_binary_bytes(hdr_tmp_alloc); + erts_free_aligned_binary_bytes(encoded_tmp_alloc); + BIF_ERROR(BIF_P, BADARG); +} + + /* * Build Erlang term result from FAILED compilation */ @@ -629,13 +841,17 @@ re_version_0(BIF_ALIST_0) BIF_RET(erts_new_binary_from_data(BIF_P, version_size, version)); } -static bool get_iolist_as_bytes(Eterm iolist, +static bool get_iolist_as_bytes(Process* p, + Eterm iolist, byte **bytes_p, ErlDrvSizeT *slen_p, - byte** tmp_buf_p) + byte** tmp_buf_p, + Eterm* resbin_p) { int buffres; + ASSERT(tmp_buf_p || resbin_p); + if (is_bitstring(iolist)) { Uint bit_offs, bit_sz; @@ -643,7 +859,9 @@ static bool get_iolist_as_bytes(Eterm iolist, if (!BIT_OFFSET(bit_offs) && !TAIL_BITS(bit_sz)) { *bytes_p += BYTE_OFFSET(bit_offs); *slen_p = BYTE_SIZE(bit_sz); - *tmp_buf_p = NULL; + if (resbin_p) { + *resbin_p = iolist; + } return true; } } @@ -651,26 +869,35 @@ static bool get_iolist_as_bytes(Eterm iolist, if (erts_iolist_size(iolist, slen_p)) { return false; } - *bytes_p = *tmp_buf_p = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, *slen_p); - buffres = erts_iolist_to_buf(iolist, (char*)*bytes_p, *slen_p); + if (resbin_p) { + *resbin_p = erts_new_binary(p, *slen_p, bytes_p); + } + else { + *bytes_p = *tmp_buf_p = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, *slen_p); + } + + buffres = erts_iolist_to_buf(iolist, (char *)*bytes_p, *slen_p); ASSERT(buffres >= 0); (void)buffres; return true; } static BIF_RETTYPE -re_compile(Process* p, Eterm arg1, Eterm arg2) +re_compile(Process* p, Eterm re_arg, Eterm opts_arg, bool is_import) { ErlDrvSizeT slen; byte *expr; - byte *tmp_expr; + byte *tmp_expr = NULL; pcre2_code *result; int errcode = 0; PCRE2_SIZE errofset = 0; Eterm ret; - int unicode = 0; + byte unicode = 0; + bool is_export; struct parsed_options opts; + Eterm regex_bin; + Eterm* regex_bin_p; - if (!parse_options(arg2, &opts)) { + if (!parse_options(opts_arg, &opts)) { opt_error: p->fvalue = am_badopt; BIF_ERROR(p, BADARG | EXF_HAS_EXT_INFO); @@ -681,22 +908,31 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) } unicode = (opts.flags & PARSE_FLAG_UNICODE) ? 1 : 0; + is_export = !is_import && opts.flags & PARSE_FLAG_EXPORT; - if (unicode && !is_bitstring(arg1)) { - BIF_TRAP2(ucompile_trap_exportp, p, arg1, arg2); + if (unicode && !is_bitstring(re_arg)) { + BIF_TRAP2(ucompile_trap_exportp, p, re_arg, opts_arg); } - if (!get_iolist_as_bytes(arg1, &expr, &slen, &tmp_expr)) { + regex_bin_p = is_export ? ®ex_bin : NULL; + + if (!get_iolist_as_bytes(p, re_arg, &expr, &slen, &tmp_expr, regex_bin_p)) { BIF_ERROR(p,BADARG); } - result = compile((char*)expr, slen, &opts, the_precompile_ctx, &errcode, &errofset); + result = compile((char*)expr, slen, &opts, + (is_export ? the_tmp_compile_ctx : the_precompile_ctx), + &errcode, &errofset); if (!result) { ret = build_compile_error(p, errcode, errofset, NIL); } + else if (is_export) { + ret = build_compile_export(p, result, unicode, regex_bin, opts_arg); + pcre2_code_free(result); + } else { - ret = build_compile_result(p, result, unicode, true); + ret = build_compile_result(p, result, unicode, !is_import); } if (tmp_expr) { @@ -708,13 +944,13 @@ re_compile(Process* p, Eterm arg1, Eterm arg2) BIF_RETTYPE re_compile_2(BIF_ALIST_2) { - return re_compile(BIF_P, BIF_ARG_1, BIF_ARG_2); + return re_compile(BIF_P, BIF_ARG_1, BIF_ARG_2, false); } BIF_RETTYPE re_compile_1(BIF_ALIST_1) { - return re_compile(BIF_P, BIF_ARG_1, NIL); + return re_compile(BIF_P, BIF_ARG_1, NIL, false); } /* @@ -1272,7 +1508,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first) /* Compile from textual regex */ ErlDrvSizeT slen; byte *expr; - byte *tmp_expr; + byte *tmp_expr = NULL; int errcode = 0; PCRE2_SIZE errofset = 0; @@ -1284,7 +1520,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, int first) BIF_TRAP3(urun_trap_exportp, p, arg1, arg2, arg3); } - if (!get_iolist_as_bytes(arg2, &expr, &slen, &tmp_expr)) { + if (!get_iolist_as_bytes(p, arg2, &expr, &slen, &tmp_expr, NULL)) { BIF_ERROR(p,BADARG); } diff --git a/erts/emulator/pcre/pcre.mk b/erts/emulator/pcre/pcre.mk index 1c83084995a7..c83692e19556 100644 --- a/erts/emulator/pcre/pcre.mk +++ b/erts/emulator/pcre/pcre.mk @@ -37,6 +37,7 @@ pcre2_newline.o \ pcre2_ord2utf.o \ pcre2_pattern_info.o \ pcre2_script_run.o \ +pcre2_serialize.o \ pcre2_string_utils.o \ pcre2_study.o \ pcre2_substring.o \ @@ -133,6 +134,9 @@ $(PCRE_OBJDIR)/pcre2_pattern_info.o: $(PCRE_DIR)/pcre2_pattern_info.c \ $(PCRE_OBJDIR)/pcre2_script_run.o: $(PCRE_DIR)/pcre2_script_run.c \ $(PCRE_DIR)/pcre2_internal.h $(PCRE_DIR)/local_config.h $(PCRE_DIR)/pcre2.h $(PCRE_DIR)/pcre2_ucp.h $(PCRE_DIR)/pcre2_intmodedep.h \ $(PCRE_DIR)/pcre2_util.h +$(PCRE_OBJDIR)/pcre2_serialize.o: $(PCRE_DIR)/pcre2_serialize.c \ + $(PCRE_DIR)/pcre2_internal.h $(PCRE_DIR)/local_config.h $(PCRE_DIR)/pcre2.h $(PCRE_DIR)/pcre2_ucp.h $(PCRE_DIR)/pcre2_intmodedep.h \ + $(PCRE_DIR)/pcre2_util.h $(PCRE_OBJDIR)/pcre2_string_utils.o: $(PCRE_DIR)/pcre2_string_utils.c \ $(PCRE_DIR)/pcre2_internal.h $(PCRE_DIR)/local_config.h $(PCRE_DIR)/pcre2.h $(PCRE_DIR)/pcre2_ucp.h $(PCRE_DIR)/pcre2_intmodedep.h \ $(PCRE_DIR)/pcre2_util.h diff --git a/erts/emulator/pcre/pcre2_serialize.c b/erts/emulator/pcre/pcre2_serialize.c new file mode 100644 index 000000000000..a10e3020bbe9 --- /dev/null +++ b/erts/emulator/pcre/pcre2_serialize.c @@ -0,0 +1,286 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2024 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* This module contains functions for serializing and deserializing +a sequence of compiled codes. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include "pcre2_internal.h" + +/* Magic number to provide a small check against being handed junk. */ + +#define SERIALIZED_DATA_MAGIC 0x50523253u + +/* Deserialization is limited to the current PCRE version and +character width. */ + +#define SERIALIZED_DATA_VERSION \ + ((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16)) + +#define SERIALIZED_DATA_CONFIG \ + (sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16)) + + + +/************************************************* +* Serialize compiled patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes, + uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, + pcre2_general_context *gcontext) +{ +uint8_t *bytes; +uint8_t *dst_bytes; +int32_t i; +PCRE2_SIZE total_size; +const pcre2_real_code *re; +const uint8_t *tables; +pcre2_serialized_data *data; + +const pcre2_memctl *memctl = (gcontext != NULL) ? + &gcontext->memctl : &PRIV(default_compile_context).memctl; + +if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL) + return PCRE2_ERROR_NULL; + +if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; + +/* Compute total size. */ +total_size = sizeof(pcre2_serialized_data) + TABLES_LENGTH; +tables = NULL; + +for (i = 0; i < number_of_codes; i++) + { + if (codes[i] == NULL) return PCRE2_ERROR_NULL; + re = (const pcre2_real_code *)(codes[i]); + if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; + if (tables == NULL) + tables = re->tables; + else if (tables != re->tables) + return PCRE2_ERROR_MIXEDTABLES; + total_size += re->blocksize; + } + +/* Initialize the byte stream. */ +bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data); +if (bytes == NULL) return PCRE2_ERROR_NOMEMORY; + +/* The controller is stored as a hidden parameter. */ +memcpy(bytes, memctl, sizeof(pcre2_memctl)); +bytes += sizeof(pcre2_memctl); + +data = (pcre2_serialized_data *)bytes; +data->magic = SERIALIZED_DATA_MAGIC; +data->version = SERIALIZED_DATA_VERSION; +data->config = SERIALIZED_DATA_CONFIG; +data->number_of_codes = number_of_codes; + +/* Copy all compiled code data. */ +dst_bytes = bytes + sizeof(pcre2_serialized_data); +memcpy(dst_bytes, tables, TABLES_LENGTH); +dst_bytes += TABLES_LENGTH; + +for (i = 0; i < number_of_codes; i++) + { + re = (const pcre2_real_code *)(codes[i]); + (void)memcpy(dst_bytes, (const char *)re, re->blocksize); + + /* Certain fields in the compiled code block are re-set during + deserialization. In order to ensure that the serialized data stream is always + the same for the same pattern, set them to zero here. We can't assume the + copy of the pattern is correctly aligned for accessing the fields as part of + a structure. Note the use of sizeof(void *) in the second of these, to + specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a + pointer to uint8_t), gcc gives a warning because the first argument is also a + pointer to uint8_t. Casting the first argument to (void *) can stop this, but + it didn't stop Coverity giving the same complaint. */ + + (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, + sizeof(pcre2_memctl)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, + sizeof(void *)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, + sizeof(void *)); + + dst_bytes += re->blocksize; + } + +*serialized_bytes = bytes; +*serialized_size = total_size; +return number_of_codes; +} + + +/************************************************* +* Deserialize compiled patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes, + const uint8_t *bytes, pcre2_general_context *gcontext) +{ +const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; +const pcre2_memctl *memctl = (gcontext != NULL) ? + &gcontext->memctl : &PRIV(default_compile_context).memctl; + +const uint8_t *src_bytes; +pcre2_real_code *dst_re; +uint8_t *tables; +int32_t i, j; + +/* Sanity checks. */ + +if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL; +if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA; +if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA; +if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; +if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; +if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; + +if (number_of_codes > data->number_of_codes) + number_of_codes = data->number_of_codes; + +src_bytes = bytes + sizeof(pcre2_serialized_data); + +/* Decode tables. The reference count for the tables is stored immediately +following them. */ + +tables = memctl->malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE), memctl->memory_data); +if (tables == NULL) return PCRE2_ERROR_NOMEMORY; + +memcpy(tables, src_bytes, TABLES_LENGTH); +*(PCRE2_SIZE *)(tables + TABLES_LENGTH) = number_of_codes; +src_bytes += TABLES_LENGTH; + +/* Decode the byte stream. We must not try to read the size from the compiled +code block in the stream, because it might be unaligned, which causes errors on +hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type +of the blocksize field is given its own name to ensure that it is the same here +as in the block. */ + +for (i = 0; i < number_of_codes; i++) + { + CODE_BLOCKSIZE_TYPE blocksize; + memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize), + sizeof(CODE_BLOCKSIZE_TYPE)); + if (blocksize <= sizeof(pcre2_real_code)) + return PCRE2_ERROR_BADSERIALIZEDDATA; + + /* The allocator provided by gcontext replaces the original one. */ + + dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize, + (pcre2_memctl *)gcontext); + if (dst_re == NULL) + { + memctl->free(tables, memctl->memory_data); + for (j = 0; j < i; j++) + { + memctl->free(codes[j], memctl->memory_data); + codes[j] = NULL; + } + return PCRE2_ERROR_NOMEMORY; + } + + /* The new allocator must be preserved. */ + + memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl), + src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl)); + if (dst_re->magic_number != MAGIC_NUMBER || + dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 || + dst_re->name_count > MAX_NAME_COUNT) + { + memctl->free(dst_re, memctl->memory_data); + return PCRE2_ERROR_BADSERIALIZEDDATA; + } + + /* At the moment only one table is supported. */ + + dst_re->tables = tables; + dst_re->executable_jit = NULL; + dst_re->flags |= PCRE2_DEREF_TABLES; + + codes[i] = dst_re; + src_bytes += blocksize; + } + +return number_of_codes; +} + + +/************************************************* +* Get the number of serialized patterns * +*************************************************/ + +PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION +pcre2_serialize_get_number_of_codes(const uint8_t *bytes) +{ +const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes; + +if (data == NULL) return PCRE2_ERROR_NULL; +if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC; +if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE; +if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE; + +return data->number_of_codes; +} + + +/************************************************* +* Free the allocated stream * +*************************************************/ + +PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION +pcre2_serialize_free(uint8_t *bytes) +{ +if (bytes != NULL) + { + pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl)); + memctl->free(memctl, memctl->memory_data); + } +} + +/* End of pcre2_serialize.c */ diff --git a/lib/stdlib/src/erl_stdlib_errors.erl b/lib/stdlib/src/erl_stdlib_errors.erl index b59d259009ce..49db693d57e2 100644 --- a/lib/stdlib/src/erl_stdlib_errors.erl +++ b/lib/stdlib/src/erl_stdlib_errors.erl @@ -368,6 +368,8 @@ format_re_error(inspect, [CompiledRE, Item], _) -> true -> [ReError] end; +format_re_error(import, [_], _) -> + [~"not an exported regular expression"]; format_re_error(replace, [Subject, RE, Replacement], _) -> [must_be_iodata(Subject), must_be_regexp(RE), diff --git a/lib/stdlib/src/re.erl b/lib/stdlib/src/re.erl index fa25ca165ed5..cbcee344ddfc 100644 --- a/lib/stdlib/src/re.erl +++ b/lib/stdlib/src/re.erl @@ -36,6 +36,8 @@ fields can change in future Erlang/OTP releases. """. -type mp() :: {re_pattern, _, _, _, _}. +-type exported() :: {re_exported_pattern, _}. + -type nl_spec() :: cr | crlf | lf | nul | anycrlf | any. -type compile_options() :: [compile_option()]. @@ -66,6 +68,13 @@ fields can change in future Erlang/OTP releases. -export([version/0, compile/1, compile/2, run/2, run/3, inspect/2]). +-export([import/1]). + +-spec import(exported()) -> {ok, mp()}. +import(_) -> + erlang:nif_error(undef). + + -doc """ The return of this function is a string with the PCRE version of the system that was used in the Erlang/OTP compilation. diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl index 41a54706a07d..1bad176506ec 100644 --- a/lib/stdlib/test/re_SUITE.erl +++ b/lib/stdlib/test/re_SUITE.erl @@ -35,9 +35,12 @@ match_limit/1,sub_binaries/1,copt/1,global_unicode_validation/1, yield_on_subject_validation/1, bad_utf8_subject/1, error_info/1, subject_is_sub_binary/1, pattern_is_sub_binary/1, + import/1, last_test/1]). +-export([id/1]). + -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/file.hrl"). @@ -58,6 +61,7 @@ all() -> match_limit, sub_binaries, re_version, global_unicode_validation, yield_on_subject_validation, bad_utf8_subject, error_info, subject_is_sub_binary, pattern_is_sub_binary, + import, last_test]. @@ -1067,6 +1071,7 @@ error_info(_Config) -> BadRegexp = {re_pattern,0,0,0,<<"xyz">>}, BadErr = "neither an iodata term", {ok,GoodRegexp} = re:compile(".*"), + {ok, Exported} = re:compile(".*", [export]), InvalidRegexp = <<"(.*))">>, InvalidErr = "could not parse regular expression\n.*unmatched closing parenthesis.*", @@ -1085,6 +1090,12 @@ error_info(_Config) -> {internal_run, 4}, %Internal. + {import, [17]}, + {import, [{re_exported_pattern}]}, + {import, [setelement(1,Exported,error)]}, + {import, [setelement(2,Exported,error)]}, + {import, [setelement(5,Exported,error)]}, + {replace, [{a,b}, {x,y}, {z,z}],[{1,".*"},{2,".*"},{3,".*"}]}, {replace, [{a,b}, BadRegexp, {z,z}],[{1,".*"},{2,BadErr},{3,".*"}]}, {replace, [{a,b}, InvalidRegexp, {z,z}],[{1,".*"},{2,InvalidErr},{3,".*"}]}, @@ -1132,37 +1143,113 @@ pattern_is_sub_binary(Config) when is_list(Config) -> match = re:run(Subject, RE, [{capture, none}]), nomatch = re:run(Subject, Bin, [{capture, none}]), %% Unaligned sub binary - will result in a copy operation - <<0:1, RE2/binary>> = Bin2 = <<0:1, "^((:|(0?|([1-9a-f][0-9a-f]{0,3}))):)((0?|([1-9a-f][0-9a-f]{0,3})):){0,6}(:|(0?|([1-9a-f][0-9a-f]{0,3})))$">>, + RE2 = unalign_bin(<<"^((:|(0?|([1-9a-f][0-9a-f]{0,3}))):)((0?|([1-9a-f][0-9a-f]{0,3})):){0,6}(:|(0?|([1-9a-f][0-9a-f]{0,3})))$">>), {ok,REC2} = re:compile(RE2), match = re:run(Subject, REC2, [{capture, none}]), match = re:run(Subject, RE2, [{capture, none}]), ok = try - _ = re:run(Subject, Bin2, [{capture, none}]) - catch error:badarg -> + re:run(Subject, <<0:1,RE2>>, [{capture, none}]) + catch error:badarg -> %% *** argument 2: neither an iodata term nor a compiled regular expression ok end. subject_is_sub_binary(Config) when is_list(Config) -> %% Aligned subject sub binary - Bin = <<"subject = ::1">>, - RE = <<"^((:|(0?|([1-9a-f][0-9a-f]{0,3}))):)((0?|([1-9a-f][0-9a-f]{0,3})):){0,6}(:|(0?|([1-9a-f][0-9a-f]{0,3})))$">>, + BigBin = list_to_binary(lists:duplicate(100,$x)), % make it big to force sub-bin and not copy + Bin = <<"subject = ::1", BigBin/binary>>, + RE = <<"^((:|(0?|([1-9a-f][0-9a-f]{0,3}))):)((0?|([1-9a-f][0-9a-f]{0,3})):){0,6}(:|(0?|([1-9a-f][0-9a-f]{0,3})))x">>, {_,Subject} = split_binary(Bin, 10), {ok,REC} = re:compile(RE), match = re:run(Subject, REC, [{capture, none}]), match = re:run(Subject, RE, [{capture, none}]), nomatch = re:run(Bin, RE, [{capture, none}]), %% Unaligned subject sub binary - <<0:1, Subject2/binary>> = Bin2 = <<0:1,"::1">>, + Subject2 = unalign_bin(Subject), match = re:run(Subject2, REC, [{capture, none}]), match = re:run(Subject2, RE, [{capture, none}]), ok = try - _ = re:run(Bin2, RE, [{capture, none}]) + _ = re:run(<<0:1, Subject>>, RE, [{capture, none}]) catch error:badarg -> %% *** argument 1: not an iodata term ok end. +import(Config) when is_list(Config) -> + %% Make the regex large in order to test it as a bit unaligned sub-binary. + RE = <<"(exported|1234567890123456789012345678901234567890123456789012345678901234567890)">>, + + {ok, Exported1} = re:compile(RE, [export]), + import_do(Exported1, fun re:import/1), + import_do(Exported1, fun(E) -> re:import(unalign_exported(E)) end), + + {ok, Exported2} = re:compile(binary_to_list(RE), [export]), + import_do(Exported2, fun re:import/1), + import_do(Exported2, fun(E) -> re:import(unalign_exported(E)) end), + ok. + +import_do(Exported, ImportFun) -> + match = re:run("exported", ImportFun(Exported), [{capture,none}]), + + %% Make an exported tuple with fake fallback to verify if it was used or not. + FallbackRE = <<"(fallback|1234567890123456789012345678901234567890123456789012345678901234567890)">>, + {re_exported_pattern, Hdr, _, [export], Encoded} = Exported, + Fake1 = {re_exported_pattern, Hdr, FallbackRE, [export], Encoded}, + match = re:run("exported", ImportFun(Fake1), [{capture,none}]), + + Fake2 = bump_exported_pcre_version(Fake1), + match = re:run("fallback", ImportFun(Fake2), [{capture,none}]), + + Fake3 = swap_exported_endianness(Fake1), + match = re:run("fallback", ImportFun(Fake3), [{capture,none}]), + + Fake4 = bump_encode_version(Fake1), + match = re:run("fallback", ImportFun(Fake4), [{capture,none}]), + + badarg = try ImportFun(trash_encoding(Exported)) + catch error:badarg -> badarg end, + ok. + +unalign_exported(Exported) -> + {re_exported_pattern, Hdr, RE, Opts, Enc} = Exported, + {re_exported_pattern, unalign_bin(Hdr), unalign_bin(RE), Opts, unalign_bin(Enc)}. + +unalign_bin(Bin1) -> + <<0:1, Bin2/binary>> = ?MODULE:id(<<0:1, Bin1/binary>>), + Bin2. + +bump_exported_pcre_version(Exported) -> + {re_exported_pattern, Hdr, RE, Opts, Enc1} = Exported, + <> = Enc1, + Enc2 = <>, + build_exported(Hdr, RE, Opts, Enc2). + +bump_encode_version(Exported) -> + {re_exported_pattern, Hdr1, RE, Opts, Enc} = Exported, + <> = Hdr1, + Hdr2 = <>, + build_exported(Hdr2, RE, Opts, Enc). + +swap_exported_endianness(Exported) -> + {re_exported_pattern, Hdr, RE, Opts, Enc1} = Exported, + <> = Enc1, + Enc2 = <>, + build_exported(Hdr, RE, Opts, Enc2). + +trash_encoding(Exported) -> + {re_exported_pattern, Hdr, RE, Opts, Enc1} = Exported, + <> = Enc1, + Enc2 = <>, + {re_exported_pattern, Hdr, RE, Opts, Enc2}. + +build_exported(Hdr1, RE, Opts, Enc) -> + <> = Hdr1, + CRC2 = erlang:crc32(Enc), + Hdr2 = <>, + {re_exported_pattern, Hdr2, RE, Opts, Enc}. + + + last_test(Config) when is_list(Config) -> erts_debug:set_internal_state(available_internal_state, true), Res = case erts_debug:get_internal_state(re_yield_coverage) of @@ -1198,3 +1285,5 @@ check_yield_coverage([Tuple | Tail], Err0) -> Err0 end, check_yield_coverage(Tail, Err1). + +id(X) -> X. diff --git a/lib/stdlib/test/run_pcre_tests.erl b/lib/stdlib/test/run_pcre_tests.erl index 55e4cd3b1d96..544a8d026b4b 100644 --- a/lib/stdlib/test/run_pcre_tests.erl +++ b/lib/stdlib/test/run_pcre_tests.erl @@ -1433,19 +1433,45 @@ ranstring() -> re_compile(RE, Options) -> inc_counter(re_compile), put(re_compile_opts, Options), - re:compile(RE, Options). + case re:compile(RE, Options) of + {ok, Local} -> + {ok, Exported} = re:compile(RE, [export|Options]), + {ok, {Local, Exported}}; + Error -> + Error = re:compile(RE, [export | Options]) + end. + +re_run(Subj, {Local, Exported}, Opts) -> + Res = re_run(Subj, Local, Opts), + Imported1 = re:import(Exported), + Res = re_run(Subj, Imported1, Opts), + Imported2 = re:import(bump_exported_version(Exported)), + Res = re_run(Subj, Imported2, Opts); re_run(Subj, RE, Opts) -> %%io:format("re:run(~p, ~p, ~p)\n", [Subj, RE, Opts]), inc_counter(re_run), put(re_run_opts, Opts), re:run(Subj, RE, Opts). +re_replace(Subj, {Local, Exported}, Repl, Opts) -> + Res = re_replace(Subj, Local, Repl, Opts), + Imported = re:import(Exported), + Res = re_replace(Subj, Imported, Repl, Opts); re_replace(Subj, RE, Repl, Opts) -> inc_counter(re_replace), put(re_run_opts, Opts), re:replace(Subj, RE, Repl, Opts). +bump_exported_version(Exported1) -> + {re_exported_pattern, Hdr1, RE, Opts, Enc1} = Exported1, + <<Magic:32, Maj:16, Min:16/little, EncRest/binary>> = Enc1, + Enc2 = <<Magic:32, Maj:16, (Min+1):16/little, EncRest/binary>>, + <<"re-PCRE2", _CRC1:32, HdrRest/binary>> = Hdr1, + CRC2 = erlang:crc32(Enc2), + Hdr2 = <<"re-PCRE2", CRC2:32, HdrRest/binary>>, + {re_exported_pattern, Hdr2, RE, Opts, Enc2}. + used_options() -> RunOpts = get(re_run_opts), case get(re_compile_opts) of From 05976251766f5f140a5c0ee07b5f8d8599116aa4 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson <sverker@erlang.org> Date: Wed, 11 Jun 2025 20:32:22 +0200 Subject: [PATCH 4/4] erts: Disable parallel make in top Makefile "make opt debug" will build one target at a time but each targets' sub-makefile may build in parallel. This to avoid corrupted files when the same file is generated from two Makefile invocations. --- erts/emulator/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/erts/emulator/Makefile b/erts/emulator/Makefile index dd61b9006d25..068ba222cf8a 100644 --- a/erts/emulator/Makefile +++ b/erts/emulator/Makefile @@ -27,3 +27,4 @@ include $(ERL_TOP)/make/run_make.mk include $(ERL_TOP)/make/app_targets.mk +.NOTPARALLEL: