39
39
#include "v1_samp.h"
40
40
41
41
#ifdef FEATURE_REGEXP
42
- #include <pcre.h>
42
+ #define PCRE2_CODE_UNIT_WIDTH 8
43
+ #include <pcre2.h>
43
44
#include <errno.h>
44
45
#endif
45
46
@@ -1266,7 +1267,7 @@ void* tokenized_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
1266
1267
* significantly slower than other field-types.
1267
1268
*/
1268
1269
struct regex_parser_data_s {
1269
- pcre * re ;
1270
+ pcre2_code * re ;
1270
1271
int consume_group ;
1271
1272
int return_group ;
1272
1273
int max_groups ;
@@ -1276,40 +1277,54 @@ PARSER(Regex)
1276
1277
assert (str != NULL);
1277
1278
assert (offs != NULL );
1278
1279
assert (parsed != NULL );
1279
- unsigned int * ovector = NULL ;
1280
+ PCRE2_SIZE * ovector ;
1281
+ pcre2_match_data * match_data = NULL ;
1280
1282
1281
1283
struct regex_parser_data_s * pData = (struct regex_parser_data_s * ) node -> parser_data ;
1282
1284
if (pData != NULL ) {
1283
- ovector = calloc (pData -> max_groups , sizeof (unsigned int ) * 3 );
1284
- if (ovector == NULL ) FAIL (LN_NOMEM );
1285
+ match_data = pcre2_match_data_create_from_pattern (pData -> re , NULL );
1286
+ if (match_data == NULL ) FAIL (LN_NOMEM );
1287
+
1288
+ int result = pcre2_match (
1289
+ pData -> re , /* the compiled pattern */
1290
+ (PCRE2_SPTR )str , /* the subject string */
1291
+ (PCRE2_SIZE )strLen , /* the length of the subject */
1292
+ (PCRE2_SIZE )* offs , /* start at offset 0 in the subject */
1293
+ 0 , /* default options */
1294
+ match_data , /* block for storing the result */
1295
+ NULL ); /* use default match context */
1285
1296
1286
- int result = pcre_exec (pData -> re , NULL , str , strLen , * offs , 0 , (int * ) ovector , pData -> max_groups * 3 );
1287
1297
if (result == 0 ) result = pData -> max_groups ;
1288
1298
if (result > pData -> consume_group ) {
1289
- /*please check 'man 3 pcreapi' for cryptic '2 * n' and '2 * n + 1' magic*/
1299
+ ovector = pcre2_get_ovector_pointer (match_data );
1300
+ printf ("Match succeeded at offset %d\n" , (int )ovector [0 ]);
1301
+
1302
+ /* please check 'man 3 pcre2api' for cryptic '2 * n' and '2 * n + 1' magic
1303
+ * in a nutshell, within the ovector, the first in each pair of values is set to the
1304
+ * offset of the first code unit of a substring, and the second is set to the
1305
+ * offset of the first code unit after the end of a substring.
1306
+ */
1290
1307
if (ovector [2 * pData -> consume_group ] == * offs ) {
1291
1308
* parsed = ovector [2 * pData -> consume_group + 1 ] - ovector [2 * pData -> consume_group ];
1292
1309
if (pData -> consume_group != pData -> return_group ) {
1293
1310
char * val = NULL ;
1294
1311
if ((val = strndup (str + ovector [2 * pData -> return_group ],
1295
1312
ovector [2 * pData -> return_group + 1 ] -
1296
1313
ovector [2 * pData -> return_group ])) == NULL ) {
1297
- free (ovector );
1298
1314
FAIL (LN_NOMEM );
1299
1315
}
1300
1316
* value = json_object_new_string (val );
1301
1317
free (val );
1302
1318
if (* value == NULL ) {
1303
- free (ovector );
1304
1319
FAIL (LN_NOMEM );
1305
1320
}
1306
1321
}
1307
1322
}
1308
1323
}
1309
- free (ovector );
1310
1324
}
1311
1325
r = 0 ; /* success */
1312
1326
done :
1327
+ pcre2_match_data_free (match_data );
1313
1328
return r ;
1314
1329
}
1315
1330
@@ -1346,8 +1361,8 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
1346
1361
char * name = NULL ;
1347
1362
struct regex_parser_data_s * pData = NULL ;
1348
1363
const char * unescaped_exp = NULL ;
1349
- const char * error = NULL ;
1350
- int erroffset = 0 ;
1364
+ PCRE2_SIZE erroffset = 0 ;
1365
+ int errcode = 0 ;
1351
1366
1352
1367
1353
1368
CHKN (name = es_str2cstr (node -> name , NULL ));
@@ -1365,7 +1380,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
1365
1380
if ((grp_parse_err = regex_parser_configure_consume_and_return_group (args , pData )) != NULL )
1366
1381
FAIL (LN_BADCONFIG );
1367
1382
1368
- CHKN (pData -> re = pcre_compile ( exp , 0 , & error , & erroffset , NULL ));
1383
+ CHKN (pData -> re = pcre2_compile (( PCRE2_SPTR ) exp , PCRE2_ZERO_TERMINATED , 0 , & errcode , & erroffset , NULL ));
1369
1384
1370
1385
pData -> max_groups = ((pData -> consume_group > pData -> return_group ) ? pData -> consume_group :
1371
1386
pData -> return_group ) + 1 ;
@@ -1387,9 +1402,12 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
1387
1402
ln_dbgprintf (ctx , "couldn't allocate memory for regex-string for field: '%s'" , name );
1388
1403
else if (grp_parse_err != NULL )
1389
1404
ln_dbgprintf (ctx , "%s for: '%s'" , grp_parse_err , name );
1390
- else if (pData -> re == NULL )
1405
+ else if (pData -> re == NULL ) {
1406
+ PCRE2_UCHAR errbuffer [256 ];
1407
+ pcre2_get_error_message (errcode , errbuffer , sizeof (errbuffer ));
1391
1408
ln_dbgprintf (ctx , "couldn't compile regex(encountered error '%s' at char '%d' in pattern) "
1392
- "for regex-matched field: '%s'" , error , erroffset , name );
1409
+ "for regex-matched field: '%s'" , errbuffer , (int )erroffset , name );
1410
+ }
1393
1411
regex_parser_data_destructor ((void * * )& pData );
1394
1412
}
1395
1413
if (exp != NULL ) free (exp );
@@ -1401,7 +1419,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
1401
1419
void regex_parser_data_destructor (void * * dataPtr ) {
1402
1420
if ((* dataPtr ) != NULL ) {
1403
1421
struct regex_parser_data_s * pData = (struct regex_parser_data_s * ) * dataPtr ;
1404
- if (pData -> re != NULL ) pcre_free (pData -> re );
1422
+ if (pData -> re != NULL ) pcre2_code_free (pData -> re );
1405
1423
free (pData );
1406
1424
* dataPtr = NULL ;
1407
1425
}
0 commit comments