🐛(backend) replace document creation table locks with retry strategy

We have situation where the number of locks in the database can increase
dangerously creating deadlock situation. To remove this situation we
decided to change the strategy to manage document creation concurrency.
We decided to use a retry strategy, trying to create the document
multiple times while a usable path is found. To avoid having an
inifinite loop, we use a max_attempts counter configurable using the
setting TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS
This commit is contained in:
Manuel Raynaud
2026-05-06 12:35:36 +02:00
parent 8f67b37d70
commit a47c35195d
8 changed files with 180 additions and 44 deletions

View File

@@ -20,15 +20,14 @@ and this project adheres to
- 🐛(frontend) Emoji menu doesn't display above comment box #2229
- 🐛(frontend) Block menu doesn't stay open on 1st line #2229
- 🐛(frontend) The "+" on the first line of a new doc doesn't work #2229
- 🐛(backend) manage race condition between GET and PATCH content #2271
- 🐛(backend) replace document creation table locks with retry strategy #2274
### Security
- 🔒️(frontend) sanitize color during collaboration #2270
### Fixed
- 🐛(backend) manage race condition between GET and PATCH content
## [v5.0.0] - 2026-04-08

View File

@@ -135,6 +135,7 @@ These are the environment variables you can set for the `impress-backend` contai
| THEME_CUSTOMIZATION_CACHE_TIMEOUT | Cache duration for the customization settings | 86400 |
| THEME_CUSTOMIZATION_FILE_PATH | Full path to the file customizing the theme. An example is provided in src/backend/impress/configuration/theme/default.json | BASE_DIR/impress/configuration/theme/default.json |
| TRASHBIN_CUTOFF_DAYS | Trashbin cutoff | 30 |
| TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS | Number of attempts to create a document before failing. | 10 |
| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] |
| USER_ONBOARDING_DOCUMENTS | A list of documents IDs for which a read-only access will be created for new s | [] |
| USER_ONBOARDING_SANDBOX_DOCUMENT | ID of a template sandbox document that will be duplicated for new users | |

View File

@@ -7,7 +7,6 @@ from base64 import b64decode
from os.path import splitext
from django.conf import settings
from django.db import connection, transaction
from django.db.models import Q
from django.utils.functional import lazy
from django.utils.text import slugify
@@ -24,6 +23,7 @@ from core.services.converter_services import (
ConversionError,
Converter,
)
from core.utils.treebeard import create_tree_node_with_retry
class UserSerializer(serializers.ModelSerializer):
@@ -467,18 +467,12 @@ class ServerCreateDocumentSerializer(serializers.Serializer):
{"content": ["Could not convert content"]}
) from err
with transaction.atomic():
# locks the table to ensure safe concurrent access
with connection.cursor() as cursor:
cursor.execute(
f'LOCK TABLE "{models.Document._meta.db_table}" ' # noqa: SLF001
"IN SHARE ROW EXCLUSIVE MODE;"
)
document = models.Document.add_root(
document = create_tree_node_with_retry(
lambda: models.Document.add_root(
title=validated_data["title"],
creator=user,
)
)
if user:
# Associate the document with the pre-existing user

View File

@@ -68,6 +68,7 @@ from core.services.search_indexers import (
)
from core.tasks.mail import send_ask_for_access_mail
from core.utils.paths import filter_descendants
from core.utils.treebeard import create_tree_node_with_retry
from core.utils.users import users_sharing_documents_with
from core.utils.yjs import extract_attachments
@@ -706,18 +707,12 @@ class DocumentViewSet(
{"file": ["Could not convert file content"]}
) from err
with transaction.atomic():
# locks the table to ensure safe concurrent access
with connection.cursor() as cursor:
cursor.execute(
f'LOCK TABLE "{models.Document._meta.db_table}" ' # noqa: SLF001
"IN SHARE ROW EXCLUSIVE MODE;"
)
obj = models.Document.add_root(
obj = create_tree_node_with_retry(
lambda: models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
)
)
serializer.instance = obj
models.DocumentAccess.objects.create(
document=obj,
@@ -1021,16 +1016,12 @@ class DocumentViewSet(
)
serializer.is_valid(raise_exception=True)
with transaction.atomic():
# "select_for_update" locks the table to ensure safe concurrent access
locked_parent = models.Document.objects.select_for_update().get(
pk=document.pk
)
child_document = locked_parent.add_child(
child_document = create_tree_node_with_retry(
lambda: document.add_child(
creator=request.user,
**serializer.validated_data,
)
)
# Set the created instance to the serializer
serializer.instance = child_document

View File

@@ -19,7 +19,7 @@ from django.core.cache import cache
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.core.mail import send_mail
from django.db import connection, models, transaction
from django.db import models, transaction
from django.db.models.functions import Left, Length
from django.template.loader import render_to_string
from django.utils import timezone
@@ -39,6 +39,7 @@ from core.choices import (
RoleChoices,
get_equivalent_link_definition,
)
from core.utils.treebeard import create_tree_node_with_retry
from core.validators import sub_validator
logger = getLogger(__name__)
@@ -265,8 +266,6 @@ class User(AbstractBaseUser, BaseModel, auth_models.PermissionsMixin):
duplicate the sandbox document for the user
"""
if settings.USER_ONBOARDING_SANDBOX_DOCUMENT:
# transaction.atomic is used in a context manager to avoid a transaction if
# the settings USER_ONBOARDING_SANDBOX_DOCUMENT is unused
sandbox_id = settings.USER_ONBOARDING_SANDBOX_DOCUMENT
try:
template_document = Document.objects.get(id=sandbox_id)
@@ -276,20 +275,15 @@ class User(AbstractBaseUser, BaseModel, auth_models.PermissionsMixin):
sandbox_id,
)
return
with transaction.atomic():
# locks the table to ensure safe concurrent access
with connection.cursor() as cursor:
cursor.execute(
f'LOCK TABLE "{Document._meta.db_table}" ' # noqa: SLF001
"IN SHARE ROW EXCLUSIVE MODE;"
sandbox_document = create_tree_node_with_retry(
lambda: Document.add_root(
title=template_document.title,
content=template_document.content,
attachments=template_document.attachments,
duplicated_from=template_document,
creator=self,
)
sandbox_document = Document.add_root(
title=template_document.title,
content=template_document.content,
attachments=template_document.attachments,
duplicated_from=template_document,
creator=self,
)
DocumentAccess.objects.create(

View File

@@ -0,0 +1,89 @@
"""Tests for the create_tree_node_with_retry utils."""
from unittest import mock
from django.core.exceptions import ValidationError as DjangoValidationError
from django.db import IntegrityError
import pytest
from core.factories import UserFactory
from core.models import Document
from core.utils.treebeard import _is_tree_path_collision, create_tree_node_with_retry
pytestmark = pytest.mark.django_db
@pytest.mark.parametrize(
"exc",
[
DjangoValidationError({"path": "not unique"}),
IntegrityError("impress_document_path_key"),
],
)
def test_utils_create_tree_node_with_retry_exceed_max_attempts(settings, exc):
"""Test exceeding the max attempts should reraise the exception."""
settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = 2
create_fn = mock.MagicMock()
create_fn.side_effect = exc
with (
pytest.raises(exc.__class__),
mock.patch(
"core.utils.treebeard._is_tree_path_collision"
) as mock__is_tree_path_collision,
):
mock__is_tree_path_collision.side_effect = _is_tree_path_collision
create_tree_node_with_retry(create_fn)
mock__is_tree_path_collision.assert_called()
assert mock__is_tree_path_collision.call_count == 2
assert create_fn.call_count == 2
@pytest.mark.parametrize(
"exc",
[
DjangoValidationError({"foo": "bar"}),
IntegrityError("not handled"),
],
)
def test_utils_create_tree_node_with_retry_exceed_exception_not_handled(settings, exc):
"""Test with an exception not handled should return reraise it immediately."""
settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = 2
create_fn = mock.MagicMock()
create_fn.side_effect = exc
with (
pytest.raises(exc.__class__),
mock.patch(
"core.utils.treebeard._is_tree_path_collision"
) as mock__is_tree_path_collision,
):
mock__is_tree_path_collision.side_effect = _is_tree_path_collision
create_tree_node_with_retry(create_fn)
mock__is_tree_path_collision.assert_called()
assert mock__is_tree_path_collision.call_count == 1
assert create_fn.call_count == 1
def test_utils_create_tree_node_with_retry_success():
"""Test executing successfully the create_fn callback."""
user = UserFactory()
document = create_tree_node_with_retry(
lambda: Document.add_root(
creator=user,
title="success",
)
)
assert isinstance(document, Document)
assert document.title == "success"
assert document.path is not None

View File

@@ -0,0 +1,62 @@
"""Treebeard path collision handling utilities."""
import logging
import time
from django.conf import settings
from django.core.exceptions import ValidationError as DjangoValidationError
from django.db import IntegrityError, transaction
logger = logging.getLogger(__name__)
def _is_tree_path_collision(exc):
"""Return True when `exc` is caused by a Document.path uniqueness conflict.
Treebeard computes the materialized path by reading the current siblings;
under concurrency two callers may compute the same value. Depending on
timing this surfaces either as:
- `django.core.exceptions.ValidationError` raised by `full_clean()` /
`validate_unique()` before the INSERT (BaseModel.save calls full_clean),
with this message `{'path': ['Document with this Path already exists.']}`
- or `IntegrityError` from the database unique index when the validate
step misses the conflict. With this message:
duplicate key value violates unique constraint "impress_document_path_key"
DETAIL: Key (path)=(0000001) already exists.
"""
if isinstance(exc, DjangoValidationError):
message_dict = getattr(exc, "message_dict", None)
if message_dict is not None:
return "path" in message_dict
return "path" in str(exc).lower()
# search in the IntegrityError exception
return "impress_document_path_key" in str(exc).lower()
def create_tree_node_with_retry(create_fn):
"""Run `create_fn` in a fresh atomic block, retrying on path collisions.
The Document.path field carries a unique constraint, which is the source of
truth that prevents duplicate paths. On collision we let the failed
transaction roll back, and call `create_fn` again so treebeard recomputes
the path from the latest state.
"""
max_attempts = settings.TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS
for attempt in range(max_attempts):
try:
with transaction.atomic():
return create_fn()
except (IntegrityError, DjangoValidationError) as exc:
if not _is_tree_path_collision(exc) or attempt == max_attempts - 1:
raise
logger.info(
"tree path collision on attempt %d/%d, retrying",
attempt + 1,
max_attempts,
)
time.sleep(attempt * 0.1)
raise RuntimeError("create_tree_node_with_retry exited without result")

View File

@@ -1086,6 +1086,12 @@ class Base(Configuration):
60 * 60 * 24, environ_name="CONTENT_METADATA_CACHE_TIMEOUT", environ_prefix=None
)
TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS = values.IntegerValue(
10,
environ_name="TREEBEARD_PATH_COMPUTE_RETRY_MAX_ATTEMPTS",
environ_prefix=None,
)
# pylint: disable=invalid-name
@property
def ENVIRONMENT(self):