From a47c35195dfcd87c2b94d7670c82a73dabde0efd Mon Sep 17 00:00:00 2001 From: Manuel Raynaud Date: Wed, 6 May 2026 12:35:36 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B(backend)=20replace=20document=20cr?= =?UTF-8?q?eation=20table=20locks=20with=20retry=20strategy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have situation where the number of locks in the database can increase dangerously creating deadlock situation. To remove this situation we decided to change the strategy to manage document creation concurrency. We decided to use a retry strategy, trying to create the document multiple times while a usable path is found. To avoid having an inifinite loop, we use a max_attempts counter configurable using the setting TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS --- CHANGELOG.md | 5 +- docs/env.md | 1 + src/backend/core/api/serializers.py | 14 +-- src/backend/core/api/viewsets.py | 23 ++--- src/backend/core/models.py | 24 ++--- .../test_utils_create_tree_node_with_retry.py | 89 +++++++++++++++++++ src/backend/core/utils/treebeard.py | 62 +++++++++++++ src/backend/impress/settings.py | 6 ++ 8 files changed, 180 insertions(+), 44 deletions(-) create mode 100644 src/backend/core/tests/test_utils_create_tree_node_with_retry.py create mode 100644 src/backend/core/utils/treebeard.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b97f80c96..d9d85faa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,15 +20,14 @@ and this project adheres to - 🐛(frontend) Emoji menu doesn't display above comment box #2229 - 🐛(frontend) Block menu doesn't stay open on 1st line #2229 - 🐛(frontend) The "+" on the first line of a new doc doesn't work #2229 +- 🐛(backend) manage race condition between GET and PATCH content #2271 +- 🐛(backend) replace document creation table locks with retry strategy #2274 ### Security - 🔒️(frontend) sanitize color during collaboration #2270 -### Fixed - -- 🐛(backend) manage race condition between GET and PATCH content ## [v5.0.0] - 2026-04-08 diff --git a/docs/env.md b/docs/env.md index 4846274a2..8eb347b86 100644 --- a/docs/env.md +++ b/docs/env.md @@ -135,6 +135,7 @@ These are the environment variables you can set for the `impress-backend` contai | THEME_CUSTOMIZATION_CACHE_TIMEOUT | Cache duration for the customization settings | 86400 | | THEME_CUSTOMIZATION_FILE_PATH | Full path to the file customizing the theme. An example is provided in src/backend/impress/configuration/theme/default.json | BASE_DIR/impress/configuration/theme/default.json | | TRASHBIN_CUTOFF_DAYS | Trashbin cutoff | 30 | +| TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS | Number of attempts to create a document before failing. | 10 | | USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] | | USER_ONBOARDING_DOCUMENTS | A list of documents IDs for which a read-only access will be created for new s | [] | | USER_ONBOARDING_SANDBOX_DOCUMENT | ID of a template sandbox document that will be duplicated for new users | | diff --git a/src/backend/core/api/serializers.py b/src/backend/core/api/serializers.py index 5cfce9e95..80c8a6fa0 100644 --- a/src/backend/core/api/serializers.py +++ b/src/backend/core/api/serializers.py @@ -7,7 +7,6 @@ from base64 import b64decode from os.path import splitext from django.conf import settings -from django.db import connection, transaction from django.db.models import Q from django.utils.functional import lazy from django.utils.text import slugify @@ -24,6 +23,7 @@ from core.services.converter_services import ( ConversionError, Converter, ) +from core.utils.treebeard import create_tree_node_with_retry class UserSerializer(serializers.ModelSerializer): @@ -467,18 +467,12 @@ class ServerCreateDocumentSerializer(serializers.Serializer): {"content": ["Could not convert content"]} ) from err - with transaction.atomic(): - # locks the table to ensure safe concurrent access - with connection.cursor() as cursor: - cursor.execute( - f'LOCK TABLE "{models.Document._meta.db_table}" ' # noqa: SLF001 - "IN SHARE ROW EXCLUSIVE MODE;" - ) - - document = models.Document.add_root( + document = create_tree_node_with_retry( + lambda: models.Document.add_root( title=validated_data["title"], creator=user, ) + ) if user: # Associate the document with the pre-existing user diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 6b8ca0678..8237a5291 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -68,6 +68,7 @@ from core.services.search_indexers import ( ) from core.tasks.mail import send_ask_for_access_mail from core.utils.paths import filter_descendants +from core.utils.treebeard import create_tree_node_with_retry from core.utils.users import users_sharing_documents_with from core.utils.yjs import extract_attachments @@ -706,18 +707,12 @@ class DocumentViewSet( {"file": ["Could not convert file content"]} ) from err - with transaction.atomic(): - # locks the table to ensure safe concurrent access - with connection.cursor() as cursor: - cursor.execute( - f'LOCK TABLE "{models.Document._meta.db_table}" ' # noqa: SLF001 - "IN SHARE ROW EXCLUSIVE MODE;" - ) - - obj = models.Document.add_root( + obj = create_tree_node_with_retry( + lambda: models.Document.add_root( creator=self.request.user, **serializer.validated_data, ) + ) serializer.instance = obj models.DocumentAccess.objects.create( document=obj, @@ -1021,16 +1016,12 @@ class DocumentViewSet( ) serializer.is_valid(raise_exception=True) - with transaction.atomic(): - # "select_for_update" locks the table to ensure safe concurrent access - locked_parent = models.Document.objects.select_for_update().get( - pk=document.pk - ) - - child_document = locked_parent.add_child( + child_document = create_tree_node_with_retry( + lambda: document.add_child( creator=request.user, **serializer.validated_data, ) + ) # Set the created instance to the serializer serializer.instance = child_document diff --git a/src/backend/core/models.py b/src/backend/core/models.py index 511756d20..c23b0cfd7 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -19,7 +19,7 @@ from django.core.cache import cache from django.core.files.base import ContentFile from django.core.files.storage import default_storage from django.core.mail import send_mail -from django.db import connection, models, transaction +from django.db import models, transaction from django.db.models.functions import Left, Length from django.template.loader import render_to_string from django.utils import timezone @@ -39,6 +39,7 @@ from core.choices import ( RoleChoices, get_equivalent_link_definition, ) +from core.utils.treebeard import create_tree_node_with_retry from core.validators import sub_validator logger = getLogger(__name__) @@ -265,8 +266,6 @@ class User(AbstractBaseUser, BaseModel, auth_models.PermissionsMixin): duplicate the sandbox document for the user """ if settings.USER_ONBOARDING_SANDBOX_DOCUMENT: - # transaction.atomic is used in a context manager to avoid a transaction if - # the settings USER_ONBOARDING_SANDBOX_DOCUMENT is unused sandbox_id = settings.USER_ONBOARDING_SANDBOX_DOCUMENT try: template_document = Document.objects.get(id=sandbox_id) @@ -276,20 +275,15 @@ class User(AbstractBaseUser, BaseModel, auth_models.PermissionsMixin): sandbox_id, ) return - with transaction.atomic(): - # locks the table to ensure safe concurrent access - with connection.cursor() as cursor: - cursor.execute( - f'LOCK TABLE "{Document._meta.db_table}" ' # noqa: SLF001 - "IN SHARE ROW EXCLUSIVE MODE;" + sandbox_document = create_tree_node_with_retry( + lambda: Document.add_root( + title=template_document.title, + content=template_document.content, + attachments=template_document.attachments, + duplicated_from=template_document, + creator=self, ) - sandbox_document = Document.add_root( - title=template_document.title, - content=template_document.content, - attachments=template_document.attachments, - duplicated_from=template_document, - creator=self, ) DocumentAccess.objects.create( diff --git a/src/backend/core/tests/test_utils_create_tree_node_with_retry.py b/src/backend/core/tests/test_utils_create_tree_node_with_retry.py new file mode 100644 index 000000000..cc327b280 --- /dev/null +++ b/src/backend/core/tests/test_utils_create_tree_node_with_retry.py @@ -0,0 +1,89 @@ +"""Tests for the create_tree_node_with_retry utils.""" + +from unittest import mock + +from django.core.exceptions import ValidationError as DjangoValidationError +from django.db import IntegrityError + +import pytest + +from core.factories import UserFactory +from core.models import Document +from core.utils.treebeard import _is_tree_path_collision, create_tree_node_with_retry + +pytestmark = pytest.mark.django_db + + +@pytest.mark.parametrize( + "exc", + [ + DjangoValidationError({"path": "not unique"}), + IntegrityError("impress_document_path_key"), + ], +) +def test_utils_create_tree_node_with_retry_exceed_max_attempts(settings, exc): + """Test exceeding the max attempts should reraise the exception.""" + + settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = 2 + + create_fn = mock.MagicMock() + create_fn.side_effect = exc + + with ( + pytest.raises(exc.__class__), + mock.patch( + "core.utils.treebeard._is_tree_path_collision" + ) as mock__is_tree_path_collision, + ): + mock__is_tree_path_collision.side_effect = _is_tree_path_collision + create_tree_node_with_retry(create_fn) + + mock__is_tree_path_collision.assert_called() + assert mock__is_tree_path_collision.call_count == 2 + assert create_fn.call_count == 2 + + +@pytest.mark.parametrize( + "exc", + [ + DjangoValidationError({"foo": "bar"}), + IntegrityError("not handled"), + ], +) +def test_utils_create_tree_node_with_retry_exceed_exception_not_handled(settings, exc): + """Test with an exception not handled should return reraise it immediately.""" + + settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = 2 + + create_fn = mock.MagicMock() + create_fn.side_effect = exc + + with ( + pytest.raises(exc.__class__), + mock.patch( + "core.utils.treebeard._is_tree_path_collision" + ) as mock__is_tree_path_collision, + ): + mock__is_tree_path_collision.side_effect = _is_tree_path_collision + create_tree_node_with_retry(create_fn) + + mock__is_tree_path_collision.assert_called() + assert mock__is_tree_path_collision.call_count == 1 + assert create_fn.call_count == 1 + + +def test_utils_create_tree_node_with_retry_success(): + """Test executing successfully the create_fn callback.""" + + user = UserFactory() + + document = create_tree_node_with_retry( + lambda: Document.add_root( + creator=user, + title="success", + ) + ) + + assert isinstance(document, Document) + assert document.title == "success" + assert document.path is not None diff --git a/src/backend/core/utils/treebeard.py b/src/backend/core/utils/treebeard.py new file mode 100644 index 000000000..27f387ff6 --- /dev/null +++ b/src/backend/core/utils/treebeard.py @@ -0,0 +1,62 @@ +"""Treebeard path collision handling utilities.""" + +import logging +import time + +from django.conf import settings +from django.core.exceptions import ValidationError as DjangoValidationError +from django.db import IntegrityError, transaction + +logger = logging.getLogger(__name__) + + +def _is_tree_path_collision(exc): + """Return True when `exc` is caused by a Document.path uniqueness conflict. + + Treebeard computes the materialized path by reading the current siblings; + under concurrency two callers may compute the same value. Depending on + timing this surfaces either as: + + - `django.core.exceptions.ValidationError` raised by `full_clean()` / + `validate_unique()` before the INSERT (BaseModel.save calls full_clean), + with this message `{'path': ['Document with this Path already exists.']}` + - or `IntegrityError` from the database unique index when the validate + step misses the conflict. With this message: + duplicate key value violates unique constraint "impress_document_path_key" + DETAIL: Key (path)=(0000001) already exists. + + """ + if isinstance(exc, DjangoValidationError): + message_dict = getattr(exc, "message_dict", None) + if message_dict is not None: + return "path" in message_dict + return "path" in str(exc).lower() + + # search in the IntegrityError exception + return "impress_document_path_key" in str(exc).lower() + + +def create_tree_node_with_retry(create_fn): + """Run `create_fn` in a fresh atomic block, retrying on path collisions. + + The Document.path field carries a unique constraint, which is the source of + truth that prevents duplicate paths. On collision we let the failed + transaction roll back, and call `create_fn` again so treebeard recomputes + the path from the latest state. + """ + max_attempts = settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS + for attempt in range(max_attempts): + try: + with transaction.atomic(): + return create_fn() + except (IntegrityError, DjangoValidationError) as exc: + if not _is_tree_path_collision(exc) or attempt == max_attempts - 1: + raise + logger.info( + "tree path collision on attempt %d/%d, retrying", + attempt + 1, + max_attempts, + ) + time.sleep(attempt * 0.1) + + raise RuntimeError("create_tree_node_with_retry exited without result") diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index dab41e666..28cec2190 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -1086,6 +1086,12 @@ class Base(Configuration): 60 * 60 * 24, environ_name="CONTENT_METADATA_CACHE_TIMEOUT", environ_prefix=None ) + TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = values.IntegerValue( + 10, + environ_name="TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS", + environ_prefix=None, + ) + # pylint: disable=invalid-name @property def ENVIRONMENT(self):