(malware) keep traces of failed malware analysis tasks

We want to keep in the database the failed tasks with the reason why
they failed. The idea is to take a decision later on what to do for
tasks reaching a max retries for example.
This commit is contained in:
Manuel Raynaud
2025-11-28 15:19:55 +01:00
parent 6947c146a4
commit a91ad0009e
7 changed files with 115 additions and 18 deletions

View File

@@ -8,6 +8,10 @@ and this project adheres to
## [Unreleased]
### Added
- ✨(backend) keep traces of failed malware analysis tasks
## [0.0.19] - 2025-11-21
### Changed

View File

@@ -102,7 +102,16 @@ class JCOPBackend(BaseBackend):
self, file_path: str, error_code: int, error_msg: str, status: ReportStatus | None = None, **kwargs
) -> None:
"""Handle a failed analysis."""
self.delete_detection(file_path)
try:
detection = MalwareDetection.objects.get(path=file_path)
except MalwareDetection.DoesNotExist:
logger.warning("Detection %s not found", file_path)
else:
detection.status = MalwareDetectionStatus.FAILED
detection.error_code = error_code
detection.error_msg = error_msg
detection.save(update_fields=["status", "error_code", "error_msg"])
self.launch_next_analysis()
self.callback(
file_path,

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.2.3 on 2025-11-28 14:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('malware_detection', '0002_alter_malwaredetection_parameters'),
]
operations = [
migrations.AddField(
model_name='malwaredetection',
name='error_code',
field=models.IntegerField(blank=True, help_text='error code for the detection', null=True),
),
migrations.AddField(
model_name='malwaredetection',
name='error_msg',
field=models.TextField(blank=True, help_text='error message for the detection'),
),
migrations.AlterField(
model_name='malwaredetection',
name='status',
field=models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('failed', 'Failed')], default='pending', help_text='status of the detection', max_length=255),
),
]

View File

@@ -11,6 +11,7 @@ class MalwareDetectionStatus(models.TextChoices):
PENDING = "pending", "Pending"
PROCESSING = "processing", "Processing"
FAILED = "failed", "Failed"
class JsonUUIDEncoder(json.JSONEncoder):
@@ -60,6 +61,15 @@ class MalwareDetection(models.Model):
blank=True,
encoder=JsonUUIDEncoder,
)
error_code = models.IntegerField(
help_text="error code for the detection",
blank=True,
null=True,
)
error_msg = models.TextField(
help_text="error message for the detection",
blank=True,
)
def __str__(self):
"""Return a string representation of the model."""

View File

@@ -1,6 +1,7 @@
"""Module containing the tasks for the JCOP backend."""
import logging
from enum import IntEnum
import requests
from celery import shared_task
@@ -11,6 +12,14 @@ from ..exceptions import MalwareDetectionInvalidAuthenticationError
logger = logging.getLogger(__name__)
class MaxRetriesErrorCodes(IntEnum):
"""Error codes for max retries."""
TRIGGER_NEW_ANALYSIS = 9000
TRIGGER_NEW_ANALYSIS_TIMEOUT = 9001
ANALYSE_FILE = 9002
@shared_task(
bind=True,
default_retry_delay=3,
@@ -29,7 +38,11 @@ def analyse_file_async(
should_retry = backend.check_analysis(file_path, file_hash=file_hash, **kwargs)
except requests.exceptions.RequestException as exc:
if self.request.retries >= self.max_retries:
backend.failed_analysis(file_path, "Max retries fetching results exceeded")
backend.failed_analysis(
file_path,
error_code=MaxRetriesErrorCodes.ANALYSE_FILE,
error_msg="Max retries fetching results exceeded",
)
return
self.retry(exc=exc)
@@ -49,13 +62,21 @@ def trigger_new_analysis(
backend.trigger_new_analysis(file_path, **kwargs)
except requests.exceptions.RequestException as exc:
if self.request.retries >= self.max_retries:
backend.failed_analysis(file_path, "Max retries triggering new analysis exceeded")
backend.failed_analysis(
file_path,
error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS,
error_msg="Max retries triggering new analysis exceeded",
)
return
self.retry(exc=exc)
return
except TimeoutError:
if self.request.retries >= self.max_retries:
backend.failed_analysis(file_path, "Max retries triggering new analysis exceeded")
backend.failed_analysis(
file_path,
error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS_TIMEOUT,
error_msg="Max retries triggering new analysis exceeded",
)
return
self.retry(exc=TimeoutError())
return

View File

@@ -298,7 +298,7 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
):
"""Test check_analysis with errors in the response."""
file_path, file_hash = jcop_generate_file_path
factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
# Mock the results endpoint
responses.add(
@@ -325,7 +325,10 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
},
**used_kwargs,
)
assert not MalwareDetection.objects.filter(path=file_path).exists()
malware_detection.refresh_from_db()
assert malware_detection.status == MalwareDetectionStatus.FAILED
assert malware_detection.error_code == 4001
assert malware_detection.error_msg == "error message"
@responses.activate
@@ -337,7 +340,7 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
):
"""Test check_analysis with errors in the response."""
file_path, file_hash = jcop_generate_file_path
factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
# Mock the results endpoint
@@ -368,7 +371,10 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
},
**used_kwargs,
)
assert not MalwareDetection.objects.filter(path=file_path).exists()
malware_detection.refresh_from_db()
assert malware_detection.status == MalwareDetectionStatus.FAILED
assert malware_detection.error_code == 4001
assert malware_detection.error_msg == "error message"
next_record.refresh_from_db()
assert next_record.status == MalwareDetectionStatus.PROCESSING
@@ -597,7 +603,7 @@ def test_jcop_backend_trigger_new_analysis_unauthorized_complete_flow(
):
"""Test submission with invalid API key."""
file_path, _ = jcop_generate_file_path
factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
# Mock the submit endpoint
@@ -628,7 +634,10 @@ def test_jcop_backend_trigger_new_analysis_unauthorized_complete_flow(
},
**used_kwargs,
)
assert not MalwareDetection.objects.filter(path=file_path).exists()
malware_detection.refresh_from_db()
assert malware_detection.status == MalwareDetectionStatus.FAILED
assert malware_detection.error_code == 401
assert malware_detection.error_msg == "Invalid API key"
next_record.refresh_from_db()
assert next_record.status == MalwareDetectionStatus.PROCESSING
@@ -693,7 +702,7 @@ def test_jcop_backend_trigger_new_analysis_file_too_large_complete_flow(
):
"""Test submission with file too large."""
file_path, _ = jcop_generate_file_path
factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
# Mock the submit endpoint
responses.add(
@@ -719,7 +728,10 @@ def test_jcop_backend_trigger_new_analysis_file_too_large_complete_flow(
},
**used_kwargs,
)
assert not MalwareDetection.objects.filter(path=file_path).exists()
malware_detection.refresh_from_db()
assert malware_detection.status == MalwareDetectionStatus.FAILED
assert malware_detection.error_code == 413
assert malware_detection.error_msg == "File too large"
next_record.refresh_from_db()
assert next_record.status == MalwareDetectionStatus.PROCESSING
@@ -784,7 +796,7 @@ def test_jcop_backend_trigger_new_analysis_unknown_status_complete_flow(
):
"""Test submission with unknown status code."""
file_path, _ = jcop_generate_file_path
factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
# Mock the submit endpoint
responses.add(
@@ -810,7 +822,10 @@ def test_jcop_backend_trigger_new_analysis_unknown_status_complete_flow(
},
**used_kwargs,
)
assert not MalwareDetection.objects.filter(path=file_path).exists()
malware_detection.refresh_from_db()
assert malware_detection.status == MalwareDetectionStatus.FAILED
assert malware_detection.error_code == 500
assert malware_detection.error_msg == "Unknown treatment"
next_record.refresh_from_db()
assert next_record.status == MalwareDetectionStatus.PROCESSING

View File

@@ -7,7 +7,7 @@ import requests
from celery.exceptions import Retry
from lasuite.malware_detection.exceptions import MalwareDetectionInvalidAuthenticationError
from lasuite.malware_detection.tasks.jcop import analyse_file_async, trigger_new_analysis
from lasuite.malware_detection.tasks.jcop import MaxRetriesErrorCodes, analyse_file_async, trigger_new_analysis
@pytest.fixture
@@ -72,7 +72,9 @@ def test_analyse_file_async_request_exception_max_retries(mock_backend):
analyse_file_async("file.txt")
backend.check_analysis.assert_called_once_with("file.txt", file_hash=None)
backend.failed_analysis.assert_called_once_with("file.txt", "Max retries fetching results exceeded")
backend.failed_analysis.assert_called_once_with(
"file.txt", error_code=MaxRetriesErrorCodes.ANALYSE_FILE, error_msg="Max retries fetching results exceeded"
)
def test_analyse_file_async_with_auth_error_no_retry(mock_backend):
@@ -140,7 +142,11 @@ def test_trigger_new_analysis_request_exception_max_retries(mock_backend):
trigger_new_analysis("file.txt")
backend.trigger_new_analysis.assert_called_once_with("file.txt")
backend.failed_analysis.assert_called_once_with("file.txt", "Max retries triggering new analysis exceeded")
backend.failed_analysis.assert_called_once_with(
"file.txt",
error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS,
error_msg="Max retries triggering new analysis exceeded",
)
def test_trigger_new_analysis_timeout_max_retries(mock_backend):
@@ -153,7 +159,11 @@ def test_trigger_new_analysis_timeout_max_retries(mock_backend):
trigger_new_analysis("file.txt")
backend.trigger_new_analysis.assert_called_once_with("file.txt")
backend.failed_analysis.assert_called_once_with("file.txt", "Max retries triggering new analysis exceeded")
backend.failed_analysis.assert_called_once_with(
"file.txt",
error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS_TIMEOUT,
error_msg="Max retries triggering new analysis exceeded",
)
def test_trigger_new_analysis_with_auth_error_no_retry(mock_backend):