Skip to content

Commit 8616165

Browse files
committed
Fix segfault with regular expressions that matched the zero-length string.
In regular expressions, repetition operators`{0,0}` and `{,0}` are valid, but they always match the zero-length string. For instance, `a{0,0}` and `[a-z]{0,0}` both match the zero-length string. When the whole regular expression consists in one of these repetitions it caused a segfault during the evaluation of the regular expression because the `forward_code_ref` field for the root atom in the atom's tree was null. Closes #2084.
1 parent 608fb3d commit 8616165

File tree

3 files changed

+73
-50
lines changed

3 files changed

+73
-50
lines changed

libyara/atoms.c

-16
Original file line numberDiff line numberDiff line change
@@ -1355,22 +1355,6 @@ int yr_atoms_extract_from_re(
13551355
*atoms = _yr_atoms_list_concat(*atoms, case_insensitive_atoms);
13561356
}
13571357

1358-
// No atoms has been extracted, let's add a zero-length atom.
1359-
1360-
if (*atoms == NULL)
1361-
{
1362-
*atoms = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
1363-
1364-
if (*atoms == NULL)
1365-
return ERROR_INSUFFICIENT_MEMORY;
1366-
1367-
(*atoms)->atom.length = 0;
1368-
(*atoms)->backtrack = 0;
1369-
(*atoms)->forward_code_ref = re_ast->root_node->forward_code_ref;
1370-
(*atoms)->backward_code_ref = YR_ARENA_NULL_REF;
1371-
(*atoms)->next = NULL;
1372-
}
1373-
13741358
return ERROR_SUCCESS;
13751359
}
13761360

libyara/parser.c

+71-34
Original file line numberDiff line numberDiff line change
@@ -502,19 +502,22 @@ static int _yr_parser_write_string(
502502
literal_string->length + 1, // +1 to include terminating NULL
503503
&ref);
504504

505+
if (result != ERROR_SUCCESS)
506+
goto cleanup;
507+
505508
string->length = (uint32_t) literal_string->length;
506509
string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref);
507510

508-
if (result == ERROR_SUCCESS)
509-
{
510-
result = yr_atoms_extract_from_string(
511-
&compiler->atoms_config,
512-
(uint8_t*) literal_string->c_string,
513-
(int32_t) literal_string->length,
514-
modifier,
515-
&atom_list,
516-
min_atom_quality);
517-
}
511+
result = yr_atoms_extract_from_string(
512+
&compiler->atoms_config,
513+
(uint8_t*) literal_string->c_string,
514+
(int32_t) literal_string->length,
515+
modifier,
516+
&atom_list,
517+
min_atom_quality);
518+
519+
if (result != ERROR_SUCCESS)
520+
goto cleanup;
518521
}
519522
else
520523
{
@@ -524,37 +527,72 @@ static int _yr_parser_write_string(
524527
// variable-length portions.
525528
modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET;
526529

530+
// Save the position where the RE forward code starts for later reference.
531+
yr_arena_off_t forward_code_start = yr_arena_get_current_offset(
532+
compiler->arena, YR_RE_CODE_SECTION);
533+
527534
// Emit forwards code
528535
result = yr_re_ast_emit_code(re_ast, compiler->arena, false);
529536

537+
if (result != ERROR_SUCCESS)
538+
goto cleanup;
539+
530540
// Emit backwards code
531-
if (result == ERROR_SUCCESS)
532-
result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
541+
result = yr_re_ast_emit_code(re_ast, compiler->arena, true);
533542

534-
if (result == ERROR_SUCCESS)
535-
result = yr_atoms_extract_from_re(
536-
&compiler->atoms_config,
537-
re_ast,
538-
modifier,
539-
&atom_list,
540-
min_atom_quality);
543+
if (result != ERROR_SUCCESS)
544+
goto cleanup;
545+
546+
// Extract atoms from the regular expression.
547+
result = yr_atoms_extract_from_re(
548+
&compiler->atoms_config,
549+
re_ast,
550+
modifier,
551+
&atom_list,
552+
min_atom_quality);
553+
554+
if (result != ERROR_SUCCESS)
555+
goto cleanup;
556+
557+
// If no atom was extracted let's add a zero-length atom.
558+
if (atom_list == NULL)
559+
{
560+
atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM));
561+
562+
if (atom_list == NULL)
563+
{
564+
result = ERROR_INSUFFICIENT_MEMORY;
565+
goto cleanup;
566+
}
567+
568+
atom_list->atom.length = 0;
569+
atom_list->backtrack = 0;
570+
atom_list->backward_code_ref = YR_ARENA_NULL_REF;
571+
atom_list->next = NULL;
572+
573+
yr_arena_ptr_to_ref(
574+
compiler->arena,
575+
yr_arena_get_ptr(
576+
compiler->arena, YR_RE_CODE_SECTION, forward_code_start),
577+
&(atom_list->forward_code_ref));
578+
}
541579
}
542580

543581
string->flags = modifier.flags;
544582
string->rule_idx = compiler->current_rule_idx;
545583
string->idx = compiler->current_string_idx;
546584
string->fixed_offset = YR_UNDEFINED;
547585

548-
if (result == ERROR_SUCCESS)
549-
{
550-
// Add the string to Aho-Corasick automaton.
551-
result = yr_ac_add_string(
552-
compiler->automaton,
553-
string,
554-
compiler->current_string_idx,
555-
atom_list,
556-
compiler->arena);
557-
}
586+
// Add the string to Aho-Corasick automaton.
587+
result = yr_ac_add_string(
588+
compiler->automaton,
589+
string,
590+
compiler->current_string_idx,
591+
atom_list,
592+
compiler->arena);
593+
594+
if (result != ERROR_SUCCESS)
595+
goto cleanup;
558596

559597
if (modifier.flags & STRING_FLAGS_LITERAL)
560598
{
@@ -580,6 +618,7 @@ static int _yr_parser_write_string(
580618

581619
compiler->current_string_idx++;
582620

621+
cleanup:
583622
if (free_literal)
584623
yr_free(literal_string);
585624

@@ -761,11 +800,9 @@ int yr_parser_reduce_string_declaration(
761800
{
762801
if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE)
763802
{
764-
yywarning(
765-
yyscanner,
766-
"unknown escape sequence");
803+
yywarning(yyscanner, "unknown escape sequence");
767804
}
768-
else
805+
else
769806
{
770807
snprintf(
771808
message,
@@ -1148,7 +1185,7 @@ int yr_parser_reduce_string_identifier(
11481185
YR_STRING* string;
11491186
YR_COMPILER* compiler = yyget_extra(yyscanner);
11501187

1151-
if (strcmp(identifier, "$") == 0) // is an anonymous string ?
1188+
if (strcmp(identifier, "$") == 0) // is an anonymous string ?
11521189
{
11531190
if (compiler->loop_for_of_var_index >= 0) // inside a loop ?
11541191
{

tests/test-rules.c

+2
Original file line numberDiff line numberDiff line change
@@ -2517,6 +2517,8 @@ void test_re()
25172517
assert_true_regexp("a{0,1}?bc", "abc", "abc");
25182518
assert_true_regexp("a{0,1}bc", "bbc", "bc");
25192519
assert_true_regexp("a{0,1}?bc", "abc", "bc");
2520+
assert_true_regexp("a{,0}", "a", "");
2521+
assert_true_regexp("a{,0}", "x", "");
25202522
assert_true_regexp("aa{0,1}?bc", "abc", "abc");
25212523
assert_true_regexp("aa{0,1}?bc", "abc", "abc");
25222524
assert_true_regexp("aa{0,1}bc", "abc", "abc");

0 commit comments

Comments
 (0)