Skip to content

Commit aea676d

Browse files
authored
Merge pull request #19445 from asgerf/js/summaries-with-fallback
JS: Generate flow summaries from summaryModels; only generate steps as a fallback
2 parents 20a012d + 891b2b8 commit aea676d

File tree

7 files changed

+201
-51
lines changed

7 files changed

+201
-51
lines changed

javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll

+4
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,7 @@ module Stage {
264264
cached
265265
predicate backref() { optionalStep(_, _, _) }
266266
}
267+
268+
predicate unsupportedCallable = Private::unsupportedCallable/1;
269+
270+
predicate unsupportedCallable = Private::unsupportedCallable/4;

javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll

+84-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
private import javascript
2020
private import internal.ApiGraphModels as Shared
2121
private import internal.ApiGraphModelsSpecific as Specific
22+
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate
2223
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
2324
import Shared::ModelInput as ModelInput
2425
import Shared::ModelOutput as ModelOutput
@@ -45,12 +46,94 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang
4546
}
4647
}
4748

49+
private class SummarizedCallableFromModel extends DataFlow::SummarizedCallable {
50+
string type;
51+
string path;
52+
53+
SummarizedCallableFromModel() {
54+
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
55+
this = type + ";" + path
56+
}
57+
58+
override DataFlow::InvokeNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) }
59+
60+
override predicate propagatesFlow(
61+
string input, string output, boolean preservesValue, string model
62+
) {
63+
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
64+
kind = "value" and
65+
preservesValue = true
66+
or
67+
kind = "taint" and
68+
preservesValue = false
69+
)
70+
}
71+
72+
predicate hasTypeAndPath(string type_, string path_) { type = type_ and path = path_ }
73+
74+
predicate isUnsupportedByFlowSummaries() { unsupportedCallable(this) }
75+
}
76+
77+
private predicate shouldInduceStepsFromSummary(string type, string path) {
78+
exists(SummarizedCallableFromModel callable |
79+
callable.isUnsupportedByFlowSummaries() and
80+
callable.hasTypeAndPath(type, path)
81+
)
82+
}
83+
84+
/**
85+
* Holds if `path` is an input or output spec for a summary with the given `base` node.
86+
*/
87+
pragma[nomagic]
88+
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
89+
exists(string type, string input, string output, string path |
90+
// If the summary for 'callable' could not be handled as a flow summary, we need to evaluate
91+
// its inputs and outputs to a set of nodes, so we can generate steps instead.
92+
shouldInduceStepsFromSummary(type, path) and
93+
ModelOutput::resolvedSummaryBase(type, path, base) and
94+
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
95+
inputOrOutput = [input, output]
96+
)
97+
}
98+
99+
/**
100+
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
101+
*/
102+
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
103+
relevantInputOutputPath(baseNode, path) and
104+
(
105+
n = 1 and
106+
result = Shared::getSuccessorFromInvoke(baseNode, path.getToken(0))
107+
or
108+
result =
109+
Shared::getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1),
110+
path.getToken(n - 1))
111+
)
112+
}
113+
114+
/**
115+
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
116+
*/
117+
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
118+
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
119+
}
120+
121+
private predicate summaryStep(API::Node pred, API::Node succ, string kind) {
122+
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
123+
shouldInduceStepsFromSummary(type, path) and
124+
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
125+
ModelOutput::resolvedSummaryBase(type, path, base) and
126+
pred = getNodeFromInputOutputPath(base, input) and
127+
succ = getNodeFromInputOutputPath(base, output)
128+
)
129+
}
130+
48131
/**
49132
* Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
50133
*/
51134
private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) {
52135
exists(API::Node predNode, API::Node succNode |
53-
Specific::summaryStep(predNode, succNode, kind) and
136+
summaryStep(predNode, succNode, kind) and
54137
pred = predNode.asSink() and
55138
succ = succNode.asSource()
56139
)

javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll

-45
Original file line numberDiff line numberDiff line change
@@ -272,51 +272,6 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat
272272
)
273273
}
274274

275-
/**
276-
* Holds if `path` is an input or output spec for a summary with the given `base` node.
277-
*/
278-
pragma[nomagic]
279-
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
280-
exists(string type, string input, string output, string path |
281-
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
282-
ModelOutput::resolvedSummaryBase(type, path, base) and
283-
inputOrOutput = [input, output]
284-
)
285-
}
286-
287-
/**
288-
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
289-
*/
290-
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
291-
relevantInputOutputPath(baseNode, path) and
292-
(
293-
n = 1 and
294-
result = getSuccessorFromInvoke(baseNode, path.getToken(0))
295-
or
296-
result =
297-
getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1))
298-
)
299-
}
300-
301-
/**
302-
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
303-
*/
304-
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
305-
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
306-
}
307-
308-
/**
309-
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
310-
*/
311-
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
312-
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
313-
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
314-
ModelOutput::resolvedSummaryBase(type, path, base) and
315-
pred = getNodeFromInputOutputPath(base, input) and
316-
succ = getNodeFromInputOutputPath(base, output)
317-
)
318-
}
319-
320275
class InvokeNode = API::InvokeNode;
321276

322277
/** Gets an `InvokeNode` corresponding to an invocation of `node`. */

javascript/ql/test/library-tests/TripleDot/underscore.string.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ function strToStr() {
3939
}
4040

4141
function strToArray() {
42-
sink(s.chop(source("s1"), 3)); // $ MISSING: hasTaintFlow=s1
42+
sink(s.chop(source("s1"), 3)); // $ hasTaintFlow=s1
4343
sink(s.chars(source("s2"))[0]); // $ hasTaintFlow=s2
4444
sink(s.words(source("s3"))[0]); // $ hasTaintFlow=s3
4545
sink(s.lines(source("s7"))[0]); // $ hasTaintFlow=s7
@@ -97,7 +97,7 @@ function multiSource() {
9797

9898
function chaining() {
9999
sink(s(source("s1"))
100-
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
100+
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
101101
.swapCase().escapeHTML().unescapeHTML().wrap().dedent()
102102
.reverse().pred().succ().titleize().camelize().classify()
103103
.underscored().dasherize().humanize().trim().ltrim().rtrim()
@@ -119,8 +119,8 @@ function chaining() {
119119
.q(source("s17")).ljust(10, source("s18"))
120120
.rjust(10, source("s19"))); // $ hasTaintFlow=s16 hasTaintFlow=s17 hasTaintFlow=s18 hasTaintFlow=s19
121121

122-
sink(s(source("s20")).tap(function(value) {
123-
return value + source("s21");
122+
sink(s(source("s20")).tap(function(value) {
123+
return value + source("s21");
124124
}).value()); // $ hasTaintFlow=s20 hasTaintFlow=s21
125125
}
126126

javascript/ql/test/library-tests/frameworks/data/test.expected

+17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
11
legacyDataFlowDifference
2+
| test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | only flow with NEW data flow library |
3+
| test.js:6:22:6:29 | source() | test.js:6:8:6:30 | preserv ... urce()) | only flow with NEW data flow library |
4+
| test.js:7:41:7:48 | source() | test.js:7:8:7:49 | require ... urce()) | only flow with NEW data flow library |
5+
| test.js:11:38:11:45 | source() | test.js:11:8:11:55 | testlib ... , 1, 1) | only flow with NEW data flow library |
6+
| test.js:13:44:13:51 | source() | test.js:13:8:13:55 | testlib ... e(), 1) | only flow with NEW data flow library |
7+
| test.js:17:47:17:54 | source() | test.js:17:8:17:61 | testlib ... , 1, 1) | only flow with NEW data flow library |
8+
| test.js:18:50:18:57 | source() | test.js:18:8:18:61 | testlib ... e(), 1) | only flow with NEW data flow library |
9+
| test.js:19:53:19:60 | source() | test.js:19:8:19:61 | testlib ... urce()) | only flow with NEW data flow library |
10+
| test.js:21:34:21:41 | source() | test.js:21:8:21:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
11+
| test.js:22:37:22:44 | source() | test.js:22:8:22:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
12+
| test.js:23:40:23:47 | source() | test.js:23:8:23:51 | testlib ... e(), 1) | only flow with NEW data flow library |
13+
| test.js:24:43:24:50 | source() | test.js:24:8:24:51 | testlib ... urce()) | only flow with NEW data flow library |
14+
| test.js:31:29:31:36 | source() | test.js:32:10:32:10 | y | only flow with NEW data flow library |
15+
| test.js:37:29:37:36 | source() | test.js:38:10:38:10 | y | only flow with NEW data flow library |
16+
| test.js:43:29:43:36 | source() | test.js:44:10:44:10 | y | only flow with NEW data flow library |
17+
| test.js:47:33:47:40 | source() | test.js:49:10:49:13 | this | only flow with NEW data flow library |
18+
| test.js:102:16:102:34 | testlib.getSource() | test.js:104:8:104:24 | source.continue() | only flow with NEW data flow library |
219
consistencyIssue
320
taintFlow
421
| guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData |

javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected

+5-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ edges
7878
| ReflectedXss.js:22:19:22:26 | req.body | ReflectedXss.js:22:12:22:27 | marked(req.body) | provenance | |
7979
| ReflectedXss.js:29:7:32:4 | mytable | ReflectedXss.js:33:12:33:18 | mytable | provenance | |
8080
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | ReflectedXss.js:29:7:32:4 | mytable | provenance | |
81-
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
81+
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
82+
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | provenance | |
83+
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | provenance | |
8284
| ReflectedXss.js:41:31:41:38 | req.body | ReflectedXss.js:41:12:41:39 | convert ... q.body) | provenance | |
8385
| ReflectedXss.js:63:14:63:21 | req.body | ReflectedXss.js:63:39:63:42 | file | provenance | |
8486
| ReflectedXss.js:63:39:63:42 | file | ReflectedXss.js:64:16:64:19 | file | provenance | |
@@ -253,6 +255,8 @@ nodes
253255
| ReflectedXss.js:28:12:28:19 | req.body | semmle.label | req.body |
254256
| ReflectedXss.js:29:7:32:4 | mytable | semmle.label | mytable |
255257
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | semmle.label | table([ ... ce\\n ]) |
258+
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | semmle.label | [\\n [ ... rce\\n ] [1, 1] |
259+
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | semmle.label | ['body', req.body] [1] |
256260
| ReflectedXss.js:31:14:31:21 | req.body | semmle.label | req.body |
257261
| ReflectedXss.js:33:12:33:18 | mytable | semmle.label | mytable |
258262
| ReflectedXss.js:40:12:40:19 | req.body | semmle.label | req.body |

shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll

+87
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,93 @@ module Make<
530530
}
531531
}
532532

533+
private predicate isNonLocalSummaryComponent(SummaryComponent c) {
534+
c instanceof TArgumentSummaryComponent or
535+
c instanceof TParameterSummaryComponent or
536+
c instanceof TReturnSummaryComponent
537+
}
538+
539+
private predicate isLocalSummaryComponent(SummaryComponent c) {
540+
not isNonLocalSummaryComponent(c)
541+
}
542+
543+
/**
544+
* Holds if `s` is a valid input stack, in the sense that we generate a data flow graph
545+
* that faithfully represents this flow, and lambda-tracking can be expected to track
546+
* lambdas to the relevant callbacks in practice.
547+
*/
548+
private predicate isSupportedInputStack(SummaryComponentStack s) {
549+
// Argument[n].*
550+
s.length() = 1 and
551+
s.head() instanceof TArgumentSummaryComponent
552+
or
553+
// Argument[n].ReturnValue.*
554+
s.length() = 2 and
555+
s.head() instanceof TReturnSummaryComponent and
556+
s.tail().head() instanceof TArgumentSummaryComponent
557+
or
558+
// Argument[n].Parameter[n].Content.*
559+
s.length() = 3 and
560+
s.head() instanceof TContentSummaryComponent and
561+
s.tail().head() instanceof TParameterSummaryComponent and
562+
s.drop(2).head() instanceof TArgumentSummaryComponent
563+
or
564+
isSupportedInputStack(s.tail()) and
565+
isLocalSummaryComponent(s.head())
566+
}
567+
568+
private predicate isSupportedOutputStack1(SummaryComponentStack s) {
569+
// ReturnValue.*
570+
s.length() = 1 and
571+
s.head() instanceof TReturnSummaryComponent
572+
or
573+
// Argument[n].Content.*
574+
s.length() = 2 and
575+
s.head() instanceof TContentSummaryComponent and
576+
s.tail().head() instanceof TArgumentSummaryComponent
577+
or
578+
// Argument[n].Parameter[n].*
579+
s.length() = 2 and
580+
s.head() instanceof TParameterSummaryComponent and
581+
s.tail().head() instanceof TArgumentSummaryComponent
582+
or
583+
isSupportedOutputStack1(s.tail()) and
584+
isLocalSummaryComponent(s.head())
585+
}
586+
587+
/** Like `isSupportedInputStack` but for output stacks. */
588+
private predicate isSupportedOutputStack(SummaryComponentStack s) {
589+
isSupportedOutputStack1(s)
590+
or
591+
// `Argument[n]` not followed by anything. Needs to be outside the recursion.
592+
s.length() = 1 and
593+
s.head() instanceof TArgumentSummaryComponent
594+
}
595+
596+
/**
597+
* Holds if `callable` has an unsupported flow `input -> output`.
598+
*
599+
* `whichOne` indicates if the `input` or `output` contains the unsupported sequence.
600+
*/
601+
predicate unsupportedCallable(
602+
SummarizedCallableImpl callable, SummaryComponentStack input, SummaryComponentStack output,
603+
string whichOne
604+
) {
605+
callable.propagatesFlow(input, output, _, _) and
606+
(
607+
not isSupportedInputStack(input) and whichOne = "input"
608+
or
609+
not isSupportedOutputStack(output) and whichOne = "output"
610+
)
611+
}
612+
613+
/**
614+
* Holds if `callable` has an unsupported flow.
615+
*/
616+
predicate unsupportedCallable(SummarizedCallableImpl callable) {
617+
unsupportedCallable(callable, _, _, _)
618+
}
619+
533620
private predicate summarySpec(string spec) {
534621
exists(SummarizedCallable c |
535622
c.propagatesFlow(spec, _, _, _)

0 commit comments

Comments
 (0)