diff --git a/api_app/analyzers_manager/file_analyzers/debloat.py b/api_app/analyzers_manager/file_analyzers/debloat.py new file mode 100644 index 0000000000..830c59b098 --- /dev/null +++ b/api_app/analyzers_manager/file_analyzers/debloat.py @@ -0,0 +1,150 @@ +# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl +# See the file 'LICENSE' for copying permission. + +import hashlib +import logging +import os +import sys +from base64 import b64encode +from tempfile import TemporaryDirectory + +import pefile +from debloat.processor import process_pe + +from api_app.analyzers_manager.classes import FileAnalyzer +from api_app.analyzers_manager.exceptions import AnalyzerRunException +from tests.mock_utils import MockUpResponse, if_mock_connections, patch + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +# Custom logger to handle the debloat library's logging +def log_message(*args, end="\n", flush=False, **kwargs): + message = " ".join(map(str, args)) + if end: + message += end + valid_kwargs = {} + for key, value in kwargs.items(): + if key in [ + "level", + "exc_info", + "stack_info", + "extra", + "msg", + "args", + "kwargs", + ]: + valid_kwargs[key] = value + logger.info(message, **valid_kwargs) + # Emulate flush if requested + if flush: + for handler in logger.handlers: + if hasattr(handler, "flush"): + handler.flush() + break + else: + # Fallback to stdout flush if no flushable handlers + sys.stdout.flush() + + +class Debloat(FileAnalyzer): + + def run(self): + try: + binary = pefile.PE(self.filepath, fast_load=True) + except pefile.PEFormatError as e: + raise AnalyzerRunException(f"Invalid PE file: {e}") + + with TemporaryDirectory() as temp_dir: + output_path = os.path.join(temp_dir, "debloated.exe") + original_size = os.path.getsize(self.filepath) + + try: + debloat_code = process_pe( + binary, + out_path=output_path, + last_ditch_processing=True, + cert_preservation=True, + log_message=log_message, + beginning_file_size=original_size, + ) + except OSError as e: + raise AnalyzerRunException( + f"File operation failed during Debloat processing: {e}" + ) + except ValueError as e: + raise AnalyzerRunException( + f"Invalid parameter in Debloat processing: {e}" + ) + except AttributeError as e: + raise AnalyzerRunException( + f"Debloat library error, possibly malformed PE object: {e}" + ) + + logger.info(f"Debloat processed {self.filepath} with code {debloat_code}") + + if debloat_code == 0 and not os.path.exists(output_path): + return { + "success": False, + "error": "No solution found", + } + + if not os.path.exists(output_path) or not os.path.isfile(output_path): + raise AnalyzerRunException( + "Debloat did not produce a valid output file" + ) + + debloated_size = os.path.getsize(output_path) + size_reduction = ( + (original_size - debloated_size) / original_size * 100 + if original_size > 0 + else 0 + ) + + with open(output_path, "rb") as f: + output = f.read() + debloated_hash = hashlib.md5(output).hexdigest() + debloated_sha256 = hashlib.sha256(output).hexdigest() + + encoded_output = b64encode(output).decode("utf-8") + + os.remove(output_path) + logger.debug("Cleaned up temporary file.") + + return { + "success": True, + "original_size": original_size, + "debloated_size": debloated_size, + "debloated_file": encoded_output, + "size_reduction_percentage": size_reduction, + "debloated_hash": debloated_hash, + "debloated_sha256": debloated_sha256, + } + + @classmethod + def update(cls) -> bool: + pass + + @classmethod + def _monkeypatch(cls, patches: list = None): + patches = [ + if_mock_connections( + patch( + "debloat.processor.process_pe", + return_value=MockUpResponse( + { + "success": True, + "original_size": 3840392, + "debloated_file": "TVqQAAMAAAAEAAAA//", + "debloated_hash": "f7f92eadfb444e7fce27efa2007a955a", + "debloated_size": 813976, + "size_reduction_percentage": 78.80487200264973, + "debloated_sha256": "f7f92eadfb444e7fce27efa2007a955a", + }, + 200, + ), + ) + ), + ] + return super()._monkeypatch(patches) diff --git a/api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py b/api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py new file mode 100644 index 0000000000..7a8491c94d --- /dev/null +++ b/api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py @@ -0,0 +1,125 @@ +from django.db import migrations +from django.db.models.fields.related_descriptors import ( + ForwardManyToOneDescriptor, + ForwardOneToOneDescriptor, + ManyToManyDescriptor, + ReverseManyToOneDescriptor, + ReverseOneToOneDescriptor, +) + +plugin = { + "python_module": { + "health_check_schedule": None, + "update_schedule": None, + "module": "debloat.Debloat", + "base_path": "api_app.analyzers_manager.file_analyzers", + }, + "name": "Debloat", + "description": '"Analyzer for debloating PE files using the [Debloat](https://github.com/Squiblydoo/debloat) tool. Reduces file size for easier malware analysis."', + "disabled": False, + "soft_time_limit": 300, + "routing_key": "default", + "health_check_status": True, + "type": "file", + "docker_based": False, + "maximum_tlp": "CLEAR", + "observable_supported": [], + "supported_filetypes": ["application/vnd.microsoft.portable-executable"], + "run_hash": False, + "run_hash_type": "", + "not_supported_filetypes": [], + "mapping_data_model": {}, + "model": "analyzers_manager.AnalyzerConfig", +} + +params = [] + +values = [] + + +def _get_real_obj(Model, field, value): + def _get_obj(Model, other_model, value): + if isinstance(value, dict): + real_vals = {} + for key, real_val in value.items(): + real_vals[key] = _get_real_obj(other_model, key, real_val) + value = other_model.objects.get_or_create(**real_vals)[0] + # it is just the primary key serialized + else: + if isinstance(value, int): + if Model.__name__ == "PluginConfig": + value = other_model.objects.get(name=plugin["name"]) + else: + value = other_model.objects.get(pk=value) + else: + value = other_model.objects.get(name=value) + return value + + if ( + type(getattr(Model, field)) + in [ + ForwardManyToOneDescriptor, + ReverseManyToOneDescriptor, + ReverseOneToOneDescriptor, + ForwardOneToOneDescriptor, + ] + and value + ): + other_model = getattr(Model, field).get_queryset().model + value = _get_obj(Model, other_model, value) + elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value: + other_model = getattr(Model, field).rel.model + value = [_get_obj(Model, other_model, val) for val in value] + return value + + +def _create_object(Model, data): + mtm, no_mtm = {}, {} + for field, value in data.items(): + value = _get_real_obj(Model, field, value) + if type(getattr(Model, field)) is ManyToManyDescriptor: + mtm[field] = value + else: + no_mtm[field] = value + try: + o = Model.objects.get(**no_mtm) + except Model.DoesNotExist: + o = Model(**no_mtm) + o.full_clean() + o.save() + for field, value in mtm.items(): + attribute = getattr(o, field) + if value is not None: + attribute.set(value) + return False + return True + + +def migrate(apps, schema_editor): + Parameter = apps.get_model("api_app", "Parameter") + PluginConfig = apps.get_model("api_app", "PluginConfig") + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + if not Model.objects.filter(name=plugin["name"]).exists(): + exists = _create_object(Model, plugin) + if not exists: + for param in params: + _create_object(Parameter, param) + for value in values: + _create_object(PluginConfig, value) + + +def reverse_migrate(apps, schema_editor): + python_path = plugin.pop("model") + Model = apps.get_model(*python_path.split(".")) + Model.objects.get(name=plugin["name"]).delete() + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ("api_app", "0071_delete_last_elastic_report"), + ("analyzers_manager", "0154_analyzer_config_bbot"), + ] + + operations = [migrations.RunPython(migrate, reverse_migrate)] diff --git a/docker/test.override.yml b/docker/test.override.yml index fd11751160..1fb7b8955f 100644 --- a/docker/test.override.yml +++ b/docker/test.override.yml @@ -1,43 +1,43 @@ -services: - uwsgi: - build: - context: .. - dockerfile: docker/Dockerfile - args: - REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED} - WATCHMAN: "true" - PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0} - image: intelowlproject/intelowl:test - volumes: - - ../:/opt/deploy/intel_owl - environment: - - DEBUG=True - - DJANGO_TEST_SERVER=True - - DJANGO_WATCHMAN_TIMEOUT=60 - - daphne: - image: intelowlproject/intelowl:test - volumes: - - ../:/opt/deploy/intel_owl - - nginx: - build: - context: .. - dockerfile: docker/Dockerfile_nginx - image: intelowlproject/intelowl_nginx:test - volumes: - - ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf - - celery_beat: - image: intelowlproject/intelowl:test - volumes: - - ../:/opt/deploy/intel_owl - environment: - - DEBUG=True - - celery_worker_default: - image: intelowlproject/intelowl:test - volumes: - - ../:/opt/deploy/intel_owl - environment: - - DEBUG=True +services: + uwsgi: + build: + context: .. + dockerfile: docker/Dockerfile + args: + REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED} + WATCHMAN: "true" + PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0} + image: intelowlproject/intelowl:test + volumes: + - ../:/opt/deploy/intel_owl + environment: + - DEBUG=True + - DJANGO_TEST_SERVER=True + - DJANGO_WATCHMAN_TIMEOUT=60 + + daphne: + image: intelowlproject/intelowl:test + volumes: + - ../:/opt/deploy/intel_owl + + nginx: + build: + context: .. + dockerfile: docker/Dockerfile_nginx + image: intelowlproject/intelowl_nginx:test + volumes: + - ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf + + celery_beat: + image: intelowlproject/intelowl:test + volumes: + - ../:/opt/deploy/intel_owl + environment: + - DEBUG=True + + celery_worker_default: + image: intelowlproject/intelowl:test + volumes: + - ../:/opt/deploy/intel_owl + environment: + - DEBUG=True diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt index 222877bf59..84a1a09931 100644 --- a/requirements/project-requirements.txt +++ b/requirements/project-requirements.txt @@ -88,6 +88,7 @@ docxpy==0.8.5 pylnk3==0.4.2 androguard==3.4.0a1 # version >=4.x of androguard raises a dependency conflict with quark-engine==25.1.1 wad==0.4.6 +debloat==1.6.4 # httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors) httpx[http2]==0.28.1