✨(malware) keep traces of failed malware analysis tasks

We want to keep in the database the failed tasks with the reason why they failed. The idea is to take a decision later on what to do for tasks reaching a max retries for example.
2026-04-25 17:15:14 +02:00 · 2025-11-28 15:19:55 +01:00
parent 6947c146a4
commit a91ad0009e
7 changed files with 115 additions and 18 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ and this project adheres to

 ## [Unreleased]

+### Added
+
+- ✨(backend) keep traces of failed malware analysis tasks
+
 ## [0.0.19] - 2025-11-21

 ### Changed
--- a/src/lasuite/malware_detection/backends/jcop.py
+++ b/src/lasuite/malware_detection/backends/jcop.py
@@ -102,7 +102,16 @@ class JCOPBackend(BaseBackend):
        self, file_path: str, error_code: int, error_msg: str, status: ReportStatus | None = None, **kwargs
    ) -> None:
        """Handle a failed analysis."""
-        self.delete_detection(file_path)
+        try:
+            detection = MalwareDetection.objects.get(path=file_path)
+        except MalwareDetection.DoesNotExist:
+            logger.warning("Detection %s not found", file_path)
+        else:
+            detection.status = MalwareDetectionStatus.FAILED
+            detection.error_code = error_code
+            detection.error_msg = error_msg
+            detection.save(update_fields=["status", "error_code", "error_msg"])
+
        self.launch_next_analysis()
        self.callback(
            file_path,
--- a/src/lasuite/malware_detection/migrations/0003_malwaredetection_error_code_and_more.py
+++ b/src/lasuite/malware_detection/migrations/0003_malwaredetection_error_code_and_more.py
@@ -0,0 +1,28 @@
+# Generated by Django 5.2.3 on 2025-11-28 14:16
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('malware_detection', '0002_alter_malwaredetection_parameters'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='malwaredetection',
+            name='error_code',
+            field=models.IntegerField(blank=True, help_text='error code for the detection', null=True),
+        ),
+        migrations.AddField(
+            model_name='malwaredetection',
+            name='error_msg',
+            field=models.TextField(blank=True, help_text='error message for the detection'),
+        ),
+        migrations.AlterField(
+            model_name='malwaredetection',
+            name='status',
+            field=models.CharField(choices=[('pending', 'Pending'), ('processing', 'Processing'), ('failed', 'Failed')], default='pending', help_text='status of the detection', max_length=255),
+        ),
+    ]
--- a/src/lasuite/malware_detection/models.py
+++ b/src/lasuite/malware_detection/models.py
@@ -11,6 +11,7 @@ class MalwareDetectionStatus(models.TextChoices):

    PENDING = "pending", "Pending"
    PROCESSING = "processing", "Processing"
+    FAILED = "failed", "Failed"


 class JsonUUIDEncoder(json.JSONEncoder):
@@ -60,6 +61,15 @@ class MalwareDetection(models.Model):
        blank=True,
        encoder=JsonUUIDEncoder,
    )
+    error_code = models.IntegerField(
+        help_text="error code for the detection",
+        blank=True,
+        null=True,
+    )
+    error_msg = models.TextField(
+        help_text="error message for the detection",
+        blank=True,
+    )

    def __str__(self):
        """Return a string representation of the model."""
--- a/src/lasuite/malware_detection/tasks/jcop.py
+++ b/src/lasuite/malware_detection/tasks/jcop.py
@@ -1,6 +1,7 @@
 """Module containing the tasks for the JCOP backend."""

 import logging
+from enum import IntEnum

 import requests
 from celery import shared_task
@@ -11,6 +12,14 @@ from ..exceptions import MalwareDetectionInvalidAuthenticationError
 logger = logging.getLogger(__name__)


+class MaxRetriesErrorCodes(IntEnum):
+    """Error codes for max retries."""
+
+    TRIGGER_NEW_ANALYSIS = 9000
+    TRIGGER_NEW_ANALYSIS_TIMEOUT = 9001
+    ANALYSE_FILE = 9002
+
+
@shared_task(
    bind=True,
    default_retry_delay=3,
@@ -29,7 +38,11 @@ def analyse_file_async(
        should_retry = backend.check_analysis(file_path, file_hash=file_hash, **kwargs)
    except requests.exceptions.RequestException as exc:
        if self.request.retries >= self.max_retries:
-            backend.failed_analysis(file_path, "Max retries fetching results exceeded")
+            backend.failed_analysis(
+                file_path,
+                error_code=MaxRetriesErrorCodes.ANALYSE_FILE,
+                error_msg="Max retries fetching results exceeded",
+            )
            return
        self.retry(exc=exc)

@@ -49,13 +62,21 @@ def trigger_new_analysis(
        backend.trigger_new_analysis(file_path, **kwargs)
    except requests.exceptions.RequestException as exc:
        if self.request.retries >= self.max_retries:
-            backend.failed_analysis(file_path, "Max retries triggering new analysis exceeded")
+            backend.failed_analysis(
+                file_path,
+                error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS,
+                error_msg="Max retries triggering new analysis exceeded",
+            )
            return
        self.retry(exc=exc)
        return
    except TimeoutError:
        if self.request.retries >= self.max_retries:
-            backend.failed_analysis(file_path, "Max retries triggering new analysis exceeded")
+            backend.failed_analysis(
+                file_path,
+                error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS_TIMEOUT,
+                error_msg="Max retries triggering new analysis exceeded",
+            )
            return
        self.retry(exc=TimeoutError())
        return
--- a/tests/malware_detection/backends/test_jcop_backend.py
+++ b/tests/malware_detection/backends/test_jcop_backend.py
@@ -298,7 +298,7 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
 ):
    """Test check_analysis with errors in the response."""
    file_path, file_hash = jcop_generate_file_path
-    factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
+    malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)

    # Mock the results endpoint
    responses.add(
@@ -325,7 +325,10 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
        },
        **used_kwargs,
    )
-    assert not MalwareDetection.objects.filter(path=file_path).exists()
+    malware_detection.refresh_from_db()
+    assert malware_detection.status == MalwareDetectionStatus.FAILED
+    assert malware_detection.error_code == 4001
+    assert malware_detection.error_msg == "error message"


@responses.activate
@@ -337,7 +340,7 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
 ):
    """Test check_analysis with errors in the response."""
    file_path, file_hash = jcop_generate_file_path
-    factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
+    malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
    next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)

    # Mock the results endpoint
@@ -368,7 +371,10 @@ def test_jcop_backend_analyse_file_async_done_with_error_delete_detection_record
        },
        **used_kwargs,
    )
-    assert not MalwareDetection.objects.filter(path=file_path).exists()
+    malware_detection.refresh_from_db()
+    assert malware_detection.status == MalwareDetectionStatus.FAILED
+    assert malware_detection.error_code == 4001
+    assert malware_detection.error_msg == "error message"
    next_record.refresh_from_db()
    assert next_record.status == MalwareDetectionStatus.PROCESSING

@@ -597,7 +603,7 @@ def test_jcop_backend_trigger_new_analysis_unauthorized_complete_flow(
 ):
    """Test submission with invalid API key."""
    file_path, _ = jcop_generate_file_path
-    factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
+    malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
    next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)

    # Mock the submit endpoint
@@ -628,7 +634,10 @@ def test_jcop_backend_trigger_new_analysis_unauthorized_complete_flow(
        },
        **used_kwargs,
    )
-    assert not MalwareDetection.objects.filter(path=file_path).exists()
+    malware_detection.refresh_from_db()
+    assert malware_detection.status == MalwareDetectionStatus.FAILED
+    assert malware_detection.error_code == 401
+    assert malware_detection.error_msg == "Invalid API key"
    next_record.refresh_from_db()
    assert next_record.status == MalwareDetectionStatus.PROCESSING

@@ -693,7 +702,7 @@ def test_jcop_backend_trigger_new_analysis_file_too_large_complete_flow(
 ):
    """Test submission with file too large."""
    file_path, _ = jcop_generate_file_path
-    factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
+    malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
    next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
    # Mock the submit endpoint
    responses.add(
@@ -719,7 +728,10 @@ def test_jcop_backend_trigger_new_analysis_file_too_large_complete_flow(
        },
        **used_kwargs,
    )
-    assert not MalwareDetection.objects.filter(path=file_path).exists()
+    malware_detection.refresh_from_db()
+    assert malware_detection.status == MalwareDetectionStatus.FAILED
+    assert malware_detection.error_code == 413
+    assert malware_detection.error_msg == "File too large"
    next_record.refresh_from_db()
    assert next_record.status == MalwareDetectionStatus.PROCESSING

@@ -784,7 +796,7 @@ def test_jcop_backend_trigger_new_analysis_unknown_status_complete_flow(
 ):
    """Test submission with unknown status code."""
    file_path, _ = jcop_generate_file_path
-    factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
+    malware_detection = factories.MalwareDetectionFactory(path=file_path, status=MalwareDetectionStatus.PROCESSING)
    next_record = factories.MalwareDetectionFactory(status=MalwareDetectionStatus.PENDING)
    # Mock the submit endpoint
    responses.add(
@@ -810,7 +822,10 @@ def test_jcop_backend_trigger_new_analysis_unknown_status_complete_flow(
        },
        **used_kwargs,
    )
-    assert not MalwareDetection.objects.filter(path=file_path).exists()
+    malware_detection.refresh_from_db()
+    assert malware_detection.status == MalwareDetectionStatus.FAILED
+    assert malware_detection.error_code == 500
+    assert malware_detection.error_msg == "Unknown treatment"
    next_record.refresh_from_db()
    assert next_record.status == MalwareDetectionStatus.PROCESSING

--- a/tests/malware_detection/tasks/test_jcop_tasks.py
+++ b/tests/malware_detection/tasks/test_jcop_tasks.py
@@ -7,7 +7,7 @@ import requests
 from celery.exceptions import Retry

 from lasuite.malware_detection.exceptions import MalwareDetectionInvalidAuthenticationError
-from lasuite.malware_detection.tasks.jcop import analyse_file_async, trigger_new_analysis
+from lasuite.malware_detection.tasks.jcop import MaxRetriesErrorCodes, analyse_file_async, trigger_new_analysis


@pytest.fixture
@@ -72,7 +72,9 @@ def test_analyse_file_async_request_exception_max_retries(mock_backend):
        analyse_file_async("file.txt")

    backend.check_analysis.assert_called_once_with("file.txt", file_hash=None)
-    backend.failed_analysis.assert_called_once_with("file.txt", "Max retries fetching results exceeded")
+    backend.failed_analysis.assert_called_once_with(
+        "file.txt", error_code=MaxRetriesErrorCodes.ANALYSE_FILE, error_msg="Max retries fetching results exceeded"
+    )


 def test_analyse_file_async_with_auth_error_no_retry(mock_backend):
@@ -140,7 +142,11 @@ def test_trigger_new_analysis_request_exception_max_retries(mock_backend):
        trigger_new_analysis("file.txt")

    backend.trigger_new_analysis.assert_called_once_with("file.txt")
-    backend.failed_analysis.assert_called_once_with("file.txt", "Max retries triggering new analysis exceeded")
+    backend.failed_analysis.assert_called_once_with(
+        "file.txt",
+        error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS,
+        error_msg="Max retries triggering new analysis exceeded",
+    )


 def test_trigger_new_analysis_timeout_max_retries(mock_backend):
@@ -153,7 +159,11 @@ def test_trigger_new_analysis_timeout_max_retries(mock_backend):
        trigger_new_analysis("file.txt")

    backend.trigger_new_analysis.assert_called_once_with("file.txt")
-    backend.failed_analysis.assert_called_once_with("file.txt", "Max retries triggering new analysis exceeded")
+    backend.failed_analysis.assert_called_once_with(
+        "file.txt",
+        error_code=MaxRetriesErrorCodes.TRIGGER_NEW_ANALYSIS_TIMEOUT,
+        error_msg="Max retries triggering new analysis exceeded",
+    )


 def test_trigger_new_analysis_with_auth_error_no_retry(mock_backend):