Skip to content

Commit a5903e1

Browse files
authored
Fix some compiler edge cases. (#1709)
* Fix some compiler edge cases. Add a warning when "0 of them" is used. Point to some new documentation that explains why "none of them" is preferred. Handle some edge cases, which are all now errors: * -1 of them * "foo" of them * /foo/ of them And this one is also an error if the external variable "x" is a negative integer or a string: * x of them I've also made sure this works with a construct like "0 + x of them" where x is defined as a negative integer. Add test cases for as many of these as I can. I don't have test cases for the external variable case, but it does work as expected. Here is the output of a rule using external variables where the variable is defined to be 1, 0, -1 and foo: ``` wxs@mbp yara % cat rules/test.yara rule a { strings: $a = "FreeBSD" condition: x of them } wxs@mbp yara % ./yara -d x=1 rules/test.yara /bin/ls a /bin/ls wxs@mbp yara % ./yara -d x=0 rules/test.yara /bin/ls warning: rule "a" in rules/test.yara(5): Consider using "none" keyword, it is less ambiguous. Please see https://yara.readthedocs.io/en/stable/writingrules.html#sets-of-strings-1 for an explanation. wxs@mbp yara % ./yara -d x=-1 rules/test.yara /bin/ls error: rule "a" in rules/test.yara(5): invalid value in condition: "-1" wxs@mbp yara % ./yara -d x=foo rules/test.yara /bin/ls error: rule "a" in rules/test.yara(5): invalid value in condition: "string in for_expression is invalid" wxs@mbp yara % ``` * More compiler warnings and errors. If you specify "2 of ($a)" it is now a warning because it will always evaluate to false, and is quite possibly not what you wanted to write. This is true for string sets, rule sets and string sets in a range like "2 of ($a) in (0..10)". If you are using an integer range it is now an error if the lower bound is greater than the upper bound. To support rule sets I needed to modify yr_parser_emit_pushes_for_rules() to use a count parameter, which it will update with the number of pushed rules. This is identical to how yr_parser_emit_pushes_for_strings() works. While I'm here, remove the link from the warning message for the "none" keyword.
1 parent 19363ee commit a5903e1

File tree

10 files changed

+963
-597
lines changed

10 files changed

+963
-597
lines changed

docs/writingrules.rst

+12
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,18 @@ The keywords ``any``, ``all`` and ``none`` can be used as well.
12001200
1 of ($*) // same that "any of them"
12011201
none of ($b*) // zero of the set of strings that start with "$b"
12021202

1203+
.. warning:: Due to the way YARA works internally, using "0 of them" is an
1204+
ambiguous part of the language which should be avoided in favor of "none
1205+
of them". To understand this, consider the meaning of "2 of them", which
1206+
is true if 2 or more of the strings match. Historically, "0 of them"
1207+
followed this principle and would evaluate to true if at least one of the
1208+
strings matched. This ambiguity is resolved in YARA 4.3.0 by making "0 of
1209+
them" evaluate to true if exactly 0 of the strings match. To improve on
1210+
the situation and make the intent clear, it is encouraged to use "none" in
1211+
place of 0. By not using an integer it is easier to reason about the meaning
1212+
of "none of them" without the historical understanding of "at least 0"
1213+
clouding the issue.
1214+
12031215

12041216
Starting with YARA 4.2.0 it is possible to express a set of strings in an
12051217
integer range, like this:

libyara/compiler.c

+7
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,13 @@ YR_API char* yr_compiler_get_error_message(
10411041
"rule identifier \"%s\" matches previously used wildcard rule set",
10421042
compiler->last_error_extra_info);
10431043
break;
1044+
case ERROR_INVALID_VALUE:
1045+
snprintf(
1046+
buffer,
1047+
buffer_size,
1048+
"invalid value in condition: \"%s\"",
1049+
compiler->last_error_extra_info);
1050+
break;
10441051
}
10451052

10461053
return buffer;

libyara/grammar.c

+682-576
Large diffs are not rendered by default.

libyara/grammar.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ extern int yara_yydebug;
189189
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
190190
union YYSTYPE
191191
{
192-
#line 336 "grammar.y"
192+
#line 341 "grammar.y"
193193

194194
YR_EXPRESSION expression;
195195
SIZED_STRING* sized_string;

libyara/grammar.y

+103-11
Original file line numberDiff line numberDiff line change
@@ -286,19 +286,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
286286

287287
%type <integer> integer_set
288288
%type <integer> integer_enumeration
289-
%type <integer> for_expression
290289
%type <integer> rule_modifier
291290
%type <integer> rule_modifiers
292291
%type <integer> string_enumeration
293292
%type <integer> string_enumeration_item
294293
%type <integer> string_set
295294
%type <integer> for_iteration
295+
%type <integer> rule_enumeration
296+
%type <integer> rule_enumeration_item
297+
%type <integer> rule_set
296298

297299
%type <expression> primary_expression
298300
%type <expression> boolean_expression
299301
%type <expression> expression
300302
%type <expression> identifier
301303
%type <expression> regexp
304+
%type <expression> for_expression
305+
%type <expression> for_quantifier
306+
302307

303308
%type <c_string> arguments
304309
%type <c_string> arguments_list
@@ -1687,12 +1692,22 @@ expression
16871692
}
16881693
| for_expression _OF_ string_set
16891694
{
1695+
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
1696+
{
1697+
yywarning(yyscanner,
1698+
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
1699+
}
16901700
yr_parser_emit_with_arg(yyscanner, OP_OF, OF_STRING_SET, NULL, NULL);
16911701

16921702
$$.type = EXPRESSION_TYPE_BOOLEAN;
16931703
}
16941704
| for_expression _OF_ rule_set
16951705
{
1706+
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
1707+
{
1708+
yywarning(yyscanner,
1709+
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
1710+
}
16961711
yr_parser_emit_with_arg(yyscanner, OP_OF, OF_RULE_SET, NULL, NULL);
16971712

16981713
$$.type = EXPRESSION_TYPE_BOOLEAN;
@@ -1737,6 +1752,12 @@ expression
17371752
}
17381753
| for_expression _OF_ string_set _IN_ range
17391754
{
1755+
if ($1.type == EXPRESSION_TYPE_INTEGER && $1.value.integer > $3)
1756+
{
1757+
yywarning(yyscanner,
1758+
"expression always false - requesting %lld of %lld.", $1.value.integer, $3);
1759+
}
1760+
17401761
yr_parser_emit(yyscanner, OP_OF_FOUND_IN, NULL);
17411762

17421763
$$.type = EXPRESSION_TYPE_BOOLEAN;
@@ -2106,6 +2127,18 @@ range
21062127
result = ERROR_WRONG_TYPE;
21072128
}
21082129

2130+
// If we can statically determine lower and upper bounds, ensure
2131+
// lower < upper. Check for upper bound here because some things (like
2132+
// string count) are EXPRESSION_TYPE_INTEGER.
2133+
if ($2.value.integer != YR_UNDEFINED &&
2134+
$4.value.integer != YR_UNDEFINED &&
2135+
$2.value.integer > $4.value.integer)
2136+
{
2137+
yr_compiler_set_error_extra_info(
2138+
compiler, "range lower bound must be greater than upper bound");
2139+
result = ERROR_INVALID_VALUE;
2140+
}
2141+
21092142
fail_if_error(result);
21102143
}
21112144
;
@@ -2214,12 +2247,15 @@ rule_set
22142247
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
22152248
}
22162249
rule_enumeration ')'
2250+
{
2251+
$$ = $3;
2252+
}
22172253
;
22182254

22192255

22202256
rule_enumeration
2221-
: rule_enumeration_item
2222-
| rule_enumeration ',' rule_enumeration_item
2257+
: rule_enumeration_item { $$ = $1; }
2258+
| rule_enumeration ',' rule_enumeration_item { $$ = $1 + $3; }
22232259
;
22242260

22252261

@@ -2254,9 +2290,12 @@ rule_enumeration_item
22542290
yr_free($1);
22552291

22562292
fail_if_error(result);
2293+
2294+
$$ = 1;
22572295
}
22582296
| _IDENTIFIER_ '*'
22592297
{
2298+
int count = 0;
22602299
YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
22612300
compiler->arena,
22622301
YR_NAMESPACES_TABLE,
@@ -2268,33 +2307,86 @@ rule_enumeration_item
22682307
ns->name,
22692308
1);
22702309

2271-
int result = yr_parser_emit_pushes_for_rules(yyscanner, $1);
2310+
int result = yr_parser_emit_pushes_for_rules(yyscanner, $1, &count);
22722311
yr_free($1);
22732312

22742313
fail_if_error(result);
2314+
2315+
$$ = count;
22752316
}
22762317
;
22772318

22782319

22792320
for_expression
22802321
: primary_expression
22812322
{
2282-
$$ = FOR_EXPRESSION_ANY;
2323+
if ($1.type == EXPRESSION_TYPE_INTEGER)
2324+
{
2325+
if ($1.value.integer == 0)
2326+
{
2327+
yywarning(yyscanner,
2328+
"consider using \"none\" keyword, it is less ambiguous.");
2329+
}
2330+
2331+
if ($1.value.integer < 0)
2332+
{
2333+
yr_compiler_set_error_extra_info_fmt(compiler,
2334+
"%lld", $1.value.integer);
2335+
fail_with_error(ERROR_INVALID_VALUE);
2336+
}
2337+
}
2338+
2339+
if ($1.type == EXPRESSION_TYPE_STRING)
2340+
{
2341+
SIZED_STRING* ss = yr_arena_ref_to_ptr(compiler->arena,
2342+
&$1.value.sized_string_ref);
2343+
// If the expression is an external string variable we need to get
2344+
// it some other way.
2345+
if (ss != NULL)
2346+
{
2347+
yr_compiler_set_error_extra_info_fmt(compiler, "%s", ss->c_string);
2348+
}
2349+
else
2350+
{
2351+
yr_compiler_set_error_extra_info(compiler,
2352+
"string in for_expression is invalid");
2353+
}
2354+
fail_with_error(ERROR_INVALID_VALUE);
2355+
}
2356+
2357+
if ($1.type == EXPRESSION_TYPE_REGEXP)
2358+
{
2359+
yr_compiler_set_error_extra_info(compiler,
2360+
"regexp in for_expression is invalid");
2361+
fail_with_error(ERROR_INVALID_VALUE);
2362+
}
2363+
2364+
$$.value.integer = $1.value.integer;
22832365
}
2284-
| _ALL_
2366+
| for_quantifier
22852367
{
2286-
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
2287-
$$ = FOR_EXPRESSION_ALL;
2368+
$$.value.integer = $1.value.integer;
22882369
}
2370+
;
2371+
2372+
for_quantifier
2373+
: _ALL_
2374+
{
2375+
yr_parser_emit_push_const(yyscanner, YR_UNDEFINED);
2376+
$$.type = EXPRESSION_TYPE_QUANTIFIER;
2377+
$$.value.integer = FOR_EXPRESSION_ALL;
2378+
}
22892379
| _ANY_
22902380
{
22912381
yr_parser_emit_push_const(yyscanner, 1);
2292-
$$ = FOR_EXPRESSION_ANY;
2382+
$$.type = EXPRESSION_TYPE_QUANTIFIER;
2383+
$$.value.integer = FOR_EXPRESSION_ANY;
22932384
}
22942385
| _NONE_
22952386
{
22962387
yr_parser_emit_push_const(yyscanner, 0);
2297-
$$ = FOR_EXPRESSION_NONE;
2388+
$$.type = EXPRESSION_TYPE_QUANTIFIER;
2389+
$$.value.integer = FOR_EXPRESSION_NONE;
22982390
}
22992391
;
23002392

@@ -2468,7 +2560,7 @@ primary_expression
24682560
{
24692561
case OBJECT_TYPE_INTEGER:
24702562
$$.type = EXPRESSION_TYPE_INTEGER;
2471-
$$.value.integer = YR_UNDEFINED;
2563+
$$.value.integer = $1.value.object->value.i;
24722564
break;
24732565
case OBJECT_TYPE_FLOAT:
24742566
$$.type = EXPRESSION_TYPE_FLOAT;

libyara/include/yara/compiler.h

+8-7
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
#define YARA_ERROR_LEVEL_WARNING 1
4343

4444
// Expression type constants are powers of two because they are used as flags.
45-
#define EXPRESSION_TYPE_UNKNOWN 0
46-
#define EXPRESSION_TYPE_BOOLEAN 1
47-
#define EXPRESSION_TYPE_INTEGER 2
48-
#define EXPRESSION_TYPE_STRING 4
49-
#define EXPRESSION_TYPE_REGEXP 8
50-
#define EXPRESSION_TYPE_OBJECT 16
51-
#define EXPRESSION_TYPE_FLOAT 32
45+
#define EXPRESSION_TYPE_UNKNOWN 0
46+
#define EXPRESSION_TYPE_BOOLEAN 1
47+
#define EXPRESSION_TYPE_INTEGER 2
48+
#define EXPRESSION_TYPE_STRING 4
49+
#define EXPRESSION_TYPE_REGEXP 8
50+
#define EXPRESSION_TYPE_OBJECT 16
51+
#define EXPRESSION_TYPE_FLOAT 32
52+
#define EXPRESSION_TYPE_QUANTIFIER 64
5253

5354
// The compiler uses an arena to store the data it generates during the
5455
// compilation. Each buffer in the arena is used for storing a different type

libyara/include/yara/error.h

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
106106
#define ERROR_BLOCK_NOT_READY 61
107107
#define ERROR_INVALID_PERCENTAGE 62
108108
#define ERROR_IDENTIFIER_MATCHES_WILDCARD 63
109+
#define ERROR_INVALID_VALUE 64
109110

110111
#define GOTO_EXIT_ON_ERROR(x) \
111112
{ \

libyara/include/yara/parser.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ int yr_parser_emit_pushes_for_strings(
120120

121121
int yr_parser_emit_pushes_for_rules(
122122
yyscan_t yyscanner,
123-
const char* identifier);
123+
const char* identifier,
124+
int *count);
124125

125126
int yr_parser_reduce_external(
126127
yyscan_t yyscanner,

libyara/parser.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,10 @@ int yr_parser_emit_pushes_for_strings(
237237

238238
// Emit OP_PUSH_RULE instructions for all rules whose identifier has given
239239
// prefix.
240-
int yr_parser_emit_pushes_for_rules(yyscan_t yyscanner, const char* prefix)
240+
int yr_parser_emit_pushes_for_rules(
241+
yyscan_t yyscanner,
242+
const char* prefix,
243+
int* count)
241244
{
242245
YR_COMPILER* compiler = yyget_extra(yyscanner);
243246

@@ -284,6 +287,11 @@ int yr_parser_emit_pushes_for_rules(yyscan_t yyscanner, const char* prefix)
284287
rule++;
285288
}
286289

290+
if (count != NULL)
291+
{
292+
*count = matching;
293+
}
294+
287295
if (matching == 0)
288296
{
289297
yr_compiler_set_error_extra_info(compiler, prefix);

0 commit comments

Comments
 (0)