Skip to content

Commit f1d3c75

Browse files
r-vdpterriko
andauthored
fix: improve handling of triage data (intel#4160)
* Fix handling of triage data Before this change, there were two issues when using an SBOM file together with a VEX file for triage: 1. new CVEs for a product for which there were existing CVEs in the triage file, were not added to the triage file 2. triage info recorded in the triage file was overwritten when cve-bin-tool was executed with both the `--triage-file` and `vex` options. This commit fixes both issues by: 1. still scanning for CVEs even if the product was already present in the triage file. Before we would not scan for CVEs as soon as we found that the product was already present, but by doing so we might miss new CVEs since the last time we did a scan. 2. merge recorded triage info into the CVEs that we found for the SBOM components. In order to properly identify products, I implemented the hash and eq methods on the `ProductInfo` type to not consider the `location` field as this field does not seem to be populated in a consistent manner. * fixup! Fix handling of triage data * chore: nitpick for black --------- Co-authored-by: Terri Oda <[email protected]>
1 parent 1d15256 commit f1d3c75

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

cve_bin_tool/cve_scanner.py

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from cve_bin_tool.input_engine import TriageData
1717
from cve_bin_tool.log import LOGGER
1818
from cve_bin_tool.theme import cve_theme
19-
from cve_bin_tool.util import CVE, CVEData, ProductInfo, VersionInfo
19+
from cve_bin_tool.util import CVE, CVEData, ProductInfo, Remarks, VersionInfo
2020
from cve_bin_tool.version_compare import Version
2121

2222

@@ -180,8 +180,13 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
180180
end_excluding=version_end_excluding,
181181
)
182182

183-
# Go through and get all the severities
183+
product_info_data: CVEData | None = self.all_cve_data.get(product_info)
184+
prev_cves: List[CVE] = (
185+
product_info_data.get("cves", []) if product_info_data is not None else [] # type: ignore
186+
)
184187
cves: List[CVE] = []
188+
189+
# Go through and get all the severities
185190
if cve_list:
186191
finished = False
187192
max_cves = 500
@@ -223,15 +228,26 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
223228
if duplicate_found:
224229
continue
225230

231+
# Check if we already found this CVE with a previous scan.
232+
# In that case we need to check where to get our triage info
233+
# from.
234+
# TODO: turn the list of CVEs into a set to avoid needing
235+
# the linear-time lookup.
236+
prev_cve = next(
237+
(
238+
cve
239+
for cve in prev_cves
240+
if cve.cve_number == row["cve_number"]
241+
),
242+
None,
243+
)
244+
226245
triage = triage_data.get(row["cve_number"]) or triage_data.get(
227246
"default"
228247
)
229-
# Only scan cves if triage is not None.
230-
# Triage will only be None if triage_data don't have default attribute.
231-
# NOTE: Triage can be empty dictionary so checking `if triage:` won't suffice.
232-
if triage is not None:
248+
if prev_cve is None:
233249
row_dict = dict(row)
234-
row_dict.update(triage)
250+
235251
# print(row_dict)
236252
row_dict["severity"] = row_dict["severity"] or row["severity"]
237253
# Checking for exploits
@@ -274,7 +290,39 @@ def get_cves(self, product_info: ProductInfo, triage_data: TriageData):
274290
f'metrics found in CVE {row_dict["cve_number"]} is {row_dict["metric"]}'
275291
)
276292
cve = CVE(**row_dict)
277-
cves.append(cve)
293+
else:
294+
cve = prev_cve
295+
296+
# We assume that only one source has the triage info.
297+
# We try to figure out here which one.
298+
# If we have useful info in the triage data we received,
299+
# then we use it.
300+
if triage is not None and (
301+
# Either the new cve does not have triage data,
302+
# or it is trivial (newly found cve)
303+
not cve.remarks
304+
or cve.remarks == Remarks.NewFound
305+
):
306+
for key in [
307+
"remarks",
308+
"comments",
309+
"response",
310+
"justification",
311+
"severity",
312+
]:
313+
data = triage.get(key)
314+
if data:
315+
if (
316+
key == "severity"
317+
and self.check_exploits
318+
and row_dict["cve_number"] in self.exploits_list
319+
):
320+
data += "-EXPLOIT"
321+
322+
self.logger.debug(f"Setting field {key} to: {data}")
323+
cve = cve._replace(**{key: data})
324+
325+
cves.append(cve)
278326

279327
if cves:
280328
self.products_with_cve += 1

cve_bin_tool/util.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Copyright (C) 2021 Intel Corporation
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

4-
""" Utility classes for the CVE Binary Tool """
4+
"""Utility classes for the CVE Binary Tool"""
5+
56
from __future__ import annotations
67

78
import fnmatch
@@ -165,6 +166,24 @@ class ProductInfo(NamedTuple):
165166
location: str
166167
purl: str | None = None
167168

169+
def __identity_members(self):
170+
"""The members that will be used for eq and hash implementations.
171+
We do not include location here since it can take on different values
172+
depending on where the product info is coming from and we want to be
173+
able to properly identify products that are actually the same.
174+
"""
175+
# TODO: what is the meaning of the location field exactly?
176+
return (self.vendor, self.product, self.version)
177+
178+
def __eq__(self, other):
179+
if type(other) is type(self):
180+
return self.__identity_members() == other.__identity_members()
181+
else:
182+
return False
183+
184+
def __hash__(self):
185+
return hash(self.__identity_members())
186+
168187

169188
class ScanInfo(NamedTuple):
170189
"""

0 commit comments

Comments
 (0)