Skip to content

[Analyzer] Debloat #2806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 11, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions api_app/analyzers_manager/file_analyzers/debloat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

import hashlib
import logging
import os
import sys
from base64 import b64encode
from tempfile import TemporaryDirectory

import pefile
from debloat.processor import process_pe

from api_app.analyzers_manager.classes import FileAnalyzer
from api_app.analyzers_manager.exceptions import AnalyzerRunException
from tests.mock_utils import MockUpResponse, if_mock_connections, patch

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


# Custom logger to handle the debloat library's logging
def log_message(*args, end="\n", flush=False, **kwargs):
message = " ".join(map(str, args))
if end:
message += end
valid_kwargs = {}
for key, value in kwargs.items():
if key in [
"level",
"exc_info",
"stack_info",
"extra",
"msg",
"args",
"kwargs",
]:
valid_kwargs[key] = value
logger.info(message, **valid_kwargs)
# Emulate flush if requested
for handler in logger.handlers:
if hasattr(handler, "flush"):
handler.flush()
break
else:
# Fallback to stdout flush if no flushable handlers
sys.stdout.flush()


class Debloat(FileAnalyzer):

def run(self):
try:
binary = pefile.PE(self.filepath, fast_load=True)
except pefile.PEFormatError as e:
raise AnalyzerRunException(f"Invalid PE file: {e}")

with TemporaryDirectory() as temp_dir:
output_path = os.path.join(temp_dir, "debloated.exe")
original_size = os.path.getsize(self.filepath)

try:
debloat_code = process_pe(
binary,
out_path=output_path,
last_ditch_processing=True,
cert_preservation=True,
log_message=log_message,
beginning_file_size=original_size,
)
except OSError as e:
raise AnalyzerRunException(
f"File operation failed during Debloat processing: {e}"
)
except ValueError as e:
raise AnalyzerRunException(
f"Invalid parameter in Debloat processing: {e}"
)
except AttributeError as e:
raise AnalyzerRunException(
f"Debloat library error, possibly malformed PE object: {e}"
)

logger.info(f"Debloat processed {self.filepath} with code {debloat_code}")

if debloat_code == 0 and not os.path.exists(output_path):
return {
"success": False,
"error": "No solution found",
}

if not os.path.exists(output_path) or not os.path.isfile(output_path):
raise AnalyzerRunException(
"Debloat did not produce a valid output file"
)

debloated_size = os.path.getsize(output_path)
size_reduction = (
(original_size - debloated_size) / original_size * 100
if original_size > 0
else 0
)

with open(output_path, "rb") as f:
output = f.read()
debloated_hash = hashlib.md5(output).hexdigest()
debloated_sha256 = hashlib.sha256(output).hexdigest()

encoded_output = b64encode(output).decode("utf-8")

os.remove(output_path)
logger.debug("Cleaned up temporary file.")

return {
"success": True,
"original_size": original_size,
"debloated_size": debloated_size,
"debloated_file": encoded_output,
"size_reduction_percentage": size_reduction,
"debloated_hash": debloated_hash,
"debloated_sha256": debloated_sha256,
}

@classmethod
def update(cls) -> bool:
pass

@classmethod
def _monkeypatch(cls, patches: list = None):
patches = [
if_mock_connections(
patch(
"debloat.processor.process_pe",
return_value=MockUpResponse(
{
"success": True,
"original_size": 3840392,
"debloated_file": "TVqQAAMAAAAEAAAA//",
"debloated_hash": "f7f92eadfb444e7fce27efa2007a955a",
"debloated_size": 813976,
"size_reduction_percentage": 78.80487200264973,
"debloated_sha256": "f7f92eadfb444e7fce27efa2007a955a",
},
200,
),
)
),
]
return super()._monkeypatch(patches)
125 changes: 125 additions & 0 deletions api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from django.db import migrations
from django.db.models.fields.related_descriptors import (
ForwardManyToOneDescriptor,
ForwardOneToOneDescriptor,
ManyToManyDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
)

plugin = {
"python_module": {
"health_check_schedule": None,
"update_schedule": None,
"module": "debloat.Debloat",
"base_path": "api_app.analyzers_manager.file_analyzers",
},
"name": "Debloat",
"description": '"Analyzer for debloating PE files using the [Debloat](https://github.com/Squiblydoo/debloat) tool. Reduces file size for easier malware analysis."',
"disabled": False,
"soft_time_limit": 300,
"routing_key": "default",
"health_check_status": True,
"type": "file",
"docker_based": False,
"maximum_tlp": "CLEAR",
"observable_supported": [],
"supported_filetypes": ["application/vnd.microsoft.portable-executable"],
"run_hash": False,
"run_hash_type": "",
"not_supported_filetypes": [],
"mapping_data_model": {},
"model": "analyzers_manager.AnalyzerConfig",
}

params = []

values = []


def _get_real_obj(Model, field, value):
def _get_obj(Model, other_model, value):
if isinstance(value, dict):
real_vals = {}
for key, real_val in value.items():
real_vals[key] = _get_real_obj(other_model, key, real_val)
value = other_model.objects.get_or_create(**real_vals)[0]
# it is just the primary key serialized
else:
if isinstance(value, int):
if Model.__name__ == "PluginConfig":
value = other_model.objects.get(name=plugin["name"])
else:
value = other_model.objects.get(pk=value)
else:
value = other_model.objects.get(name=value)
return value

if (
type(getattr(Model, field))
in [
ForwardManyToOneDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
ForwardOneToOneDescriptor,
]
and value
):
other_model = getattr(Model, field).get_queryset().model
value = _get_obj(Model, other_model, value)
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
other_model = getattr(Model, field).rel.model
value = [_get_obj(Model, other_model, val) for val in value]
return value


def _create_object(Model, data):
mtm, no_mtm = {}, {}
for field, value in data.items():
value = _get_real_obj(Model, field, value)
if type(getattr(Model, field)) is ManyToManyDescriptor:
mtm[field] = value
else:
no_mtm[field] = value
try:
o = Model.objects.get(**no_mtm)
except Model.DoesNotExist:
o = Model(**no_mtm)
o.full_clean()
o.save()
for field, value in mtm.items():
attribute = getattr(o, field)
if value is not None:
attribute.set(value)
return False
return True


def migrate(apps, schema_editor):
Parameter = apps.get_model("api_app", "Parameter")
PluginConfig = apps.get_model("api_app", "PluginConfig")
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
if not Model.objects.filter(name=plugin["name"]).exists():
exists = _create_object(Model, plugin)
if not exists:
for param in params:
_create_object(Parameter, param)
for value in values:
_create_object(PluginConfig, value)


def reverse_migrate(apps, schema_editor):
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
Model.objects.get(name=plugin["name"]).delete()


class Migration(migrations.Migration):
atomic = False
dependencies = [
("api_app", "0071_delete_last_elastic_report"),
("analyzers_manager", "0154_analyzer_config_bbot"),
]

operations = [migrations.RunPython(migrate, reverse_migrate)]
86 changes: 43 additions & 43 deletions docker/test.override.yml
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
services:
uwsgi:
build:
context: ..
dockerfile: docker/Dockerfile
args:
REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED}
WATCHMAN: "true"
PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0}
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True
- DJANGO_TEST_SERVER=True
- DJANGO_WATCHMAN_TIMEOUT=60

daphne:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl

nginx:
build:
context: ..
dockerfile: docker/Dockerfile_nginx
image: intelowlproject/intelowl_nginx:test
volumes:
- ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf

celery_beat:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True

celery_worker_default:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True
services:
uwsgi:
build:
context: ..
dockerfile: docker/Dockerfile
args:
REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED}
WATCHMAN: "true"
PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0}
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True
- DJANGO_TEST_SERVER=True
- DJANGO_WATCHMAN_TIMEOUT=60
daphne:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
nginx:
build:
context: ..
dockerfile: docker/Dockerfile_nginx
image: intelowlproject/intelowl_nginx:test
volumes:
- ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf
celery_beat:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True
celery_worker_default:
image: intelowlproject/intelowl:test
volumes:
- ../:/opt/deploy/intel_owl
environment:
- DEBUG=True
1 change: 1 addition & 0 deletions requirements/project-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ docxpy==0.8.5
pylnk3==0.4.2
androguard==3.4.0a1 # version >=4.x of androguard raises a dependency conflict with quark-engine==25.1.1
wad==0.4.6
debloat==1.6.4

# httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors)
httpx[http2]==0.28.1
Expand Down
Loading