Skip to content

Commit 28f9dad

Browse files
feat: Add support for scanning Java packages (fixes #1463) (#1476)
1 parent 93c37ff commit 28f9dad

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed

cve_bin_tool/version_scanner.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77
from re import MULTILINE, compile, search
88

9+
import defusedxml.ElementTree as ET
910
import pkg_resources
1011

1112
from cve_bin_tool.cvedb import CVEDB
@@ -57,6 +58,7 @@ def __init__(
5758
self.should_extract = should_extract
5859
self.file_stack = []
5960
self.error_mode = error_mode
61+
self.cve_db = CVEDB()
6062
# self.logger.info("Checkers loaded: %s" % (", ".join(self.checkers.keys())))
6163

6264
@classmethod
@@ -115,6 +117,7 @@ def is_executable(self, filename):
115117
and ("Mach-O" not in output)
116118
and ("PKG-INFO: " not in output)
117119
and ("METADATA: " not in output)
120+
and ("pom.xml" not in output)
118121
):
119122
return False, None
120123
# otherwise use python implementation of file
@@ -160,13 +163,86 @@ def scan_file(self, filename):
160163
# parse binary file's strings
161164
lines = self.parse_strings(filename)
162165

166+
# Check for Java package
167+
if output and "pom.xml" in output:
168+
java_lines = "\n".join(lines.splitlines())
169+
yield from self.run_java_checker(filename, java_lines)
170+
163171
# If python package then strip the lines to avoid detecting other product strings
164172
if output and ("PKG-INFO: " in output or "METADATA: " in output):
165173
py_lines = "\n".join(lines.splitlines()[:3])
166174
yield from self.run_python_package_checkers(filename, py_lines)
167175

168176
yield from self.run_checkers(filename, lines)
169177

178+
def find_java_vendor(self, product, version):
179+
"""Find vendor for Java product"""
180+
vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
181+
# If no match, try alternative product name.
182+
# Apache product names are stored as A_B in NVD database but often called A-B
183+
# Some packages have -parent appended to product which is not in NVD database
184+
if vendor_package_pair == [] and "-" in product:
185+
self.logger.debug(f"Try alternative product {product}")
186+
# Remove parent appendage
187+
if "-parent" in product:
188+
product = product.replace("-parent", "")
189+
product = product.replace("-", "_")
190+
vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
191+
if vendor_package_pair != []:
192+
vendor = vendor_package_pair[0]["vendor"]
193+
file_path = "".join(self.file_stack)
194+
self.logger.debug(f"{file_path} {product} {version} by {vendor}")
195+
return ProductInfo(vendor, product, version), file_path
196+
return None, None
197+
198+
def run_java_checker(self, filename, lines):
199+
"""Process maven pom.xml file and extract product and dependency details"""
200+
tree = ET.parse(filename)
201+
# Find root element
202+
root = tree.getroot()
203+
# Extract schema
204+
schema = root.tag[: root.tag.find("}") + 1]
205+
parent = root.find(schema + "parent")
206+
version = None
207+
product = None
208+
file_path = "".join(self.file_stack)
209+
# Parent tag is optional.
210+
if parent is None:
211+
product = root.find(schema + "artifactId").text
212+
version = root.find(schema + "version").text
213+
if version is None:
214+
version = parent.find(schema + "version").text
215+
# Check valid version identifier (i.e. starts with a digit)
216+
if not version[0].isdigit():
217+
self.logger.debug(f"Invalid {version} detected in {filename}")
218+
version = None
219+
if product is None:
220+
product = parent.find(schema + "artifactId").text
221+
if product is not None and version is not None:
222+
product_info, file_path = self.find_java_vendor(product, version)
223+
if file_path is not None:
224+
yield product_info, file_path
225+
226+
# Scan for any dependencies referenced in file
227+
dependencies = root.find(schema + "dependencies")
228+
if dependencies is not None:
229+
for dependency in dependencies.findall(schema + "dependency"):
230+
product = dependency.find(schema + "artifactId")
231+
if product is not None:
232+
version = dependency.find(schema + "version")
233+
if version is not None:
234+
version = version.text
235+
self.logger.debug(f"{file_path} {product.text} {version}")
236+
if version[0].isdigit():
237+
# Valid version identifier
238+
product_info, file_path = self.find_java_vendor(
239+
product.text, version
240+
)
241+
if file_path is not None:
242+
yield product_info, file_path
243+
244+
self.logger.debug(f"Done scanning file: {filename}")
245+
170246
def run_python_package_checkers(self, filename, lines):
171247
"""
172248
This generator runs only for python packages.

0 commit comments

Comments
 (0)