Files
docs/src/backend/core/tests/test_utils.py
Charles Englebert 0fca6db79c Integrate Find (#1834)
## Purpose

integrate Find to Docs

## Proposal

- [x]  add a `useSeachDocs` hook in charged of calling the search
endpoint.
- [x]  add a optional `path` param to the `search` route. This param
represents the parent document path in case of a sub-documents
(descendants) search.
- [x] ️return Indexer results directly without DB calls to retrieve the
Document objects. All informations necessary for display are indexed in
Find. We can skip the DB calls and improve performance.
- [x] ♻️ refactor react `DocSearchContent` components.
`DocSearchContent` and `DocSearchSubContent` are now merged a unique
component handling all search scenarios and relying on the unique
`search` route.
- [x] 🔥remove pagination logic in the Indexer. Removing the DB calls
also removes the DRF queryset object which handles the pagination. Also
we consider pagination not to be necessary for search v1.
- [x] 🔥remove the `document/<document_id>/descendants` route. This route
is not used anymore. The logic of finding the descendants are moved to
the internal `_list_descendants` method. This method is based on the
parent `path` instead of the parent `id` which has some consequence
about the user access management. Relying on the path prevents the use
of the `self.get_object()` method which used to handle the user access
logic.
- [x] handle fallback logic on DRF based title search in case of
non-configured, badly configured or failing at run time indexer.
- [x] handle language extension in `title` field. Find returns titles
with a language extension (ex: `{ title.fr: "rapport d'activité" }`
instead of `{ "title": "rapport d'activité" }`.
- [x] 🔧 add a `common.test` file to allow running the tests without
docker
- [x] ♻️ rename `SearchIndexer` -> `FindDocumentIndexer`. This class has
to do with Find in particular and the convention is more coherent with
`BaseDocumentIndexer`
- [x] ♻️ rename `SEARCH_INDEXER_URL` -> `INDEXING_URL` and
`SEARCH_INDEXER_QUERY_URL` -> `SEARCH_URL`. I found the original names
very confusing.
- [x] 🔧 update the environment variables to activate the
FindDocumentIndexer.
- [x] automate the generation of encryption key during bootstrap.
OIDC_STORE_REFRESH_TOKEN_KEY is a mandatory secret key. We can not push
it on Github and we want any contributor to be able to run the app by
only running the `make bootstrap`. We chose to generate and wright it
into the `common.local` during bootstrap.

## External contributions

Thank you for your contribution! 🎉  

Please ensure the following items are checked before submitting your
pull request:
- [x] I have read and followed the [contributing
guidelines](https://github.com/suitenumerique/docs/blob/main/CONTRIBUTING.md)
- [x] I have read and agreed to the [Code of
Conduct](https://github.com/suitenumerique/docs/blob/main/CODE_OF_CONDUCT.md)
- [x] I have signed off my commits with `git commit --signoff` (DCO
compliance)
- [x] I have signed my commits with my SSH or GPG key (`git commit -S`)
- [x] My commit messages follow the required format: `<gitmoji>(type)
title description`
- [x] I have added a changelog entry under `## [Unreleased]` section (if
noticeable change)
- [x] I have added corresponding tests for new features or bug fixes (if
applicable)

---------

Signed-off-by: charles <charles.englebert@protonmail.com>
2026-03-17 17:32:03 +01:00

243 lines
8.3 KiB
Python

"""Test util base64_yjs_to_text."""
import base64
import uuid
from django.core.cache import cache
import pycrdt
import pytest
from core import factories, utils
pytestmark = pytest.mark.django_db
# This base64 string is an example of what is saved in the database.
# This base64 is generated from the blocknote editor, it contains
# the text \n# *Hello* \n- w**or**ld
TEST_BASE64_STRING = (
"AR717vLVDgAHAQ5kb2N1bWVudC1zdG9yZQMKYmxvY2tHcm91cAcA9e7y1Q4AAw5ibG9ja0NvbnRh"
"aW5lcgcA9e7y1Q4BAwdoZWFkaW5nBwD17vLVDgIGBgD17vLVDgMGaXRhbGljAnt9hPXu8tUOBAVI"
"ZWxsb4b17vLVDgkGaXRhbGljBG51bGwoAPXu8tUOAg10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y"
"1Q4CBWxldmVsAX0BKAD17vLVDgECaWQBdyQwNGQ2MjM0MS04MzI2LTQyMzYtYTA4My00ODdlMjZm"
"YWQyMzAoAPXu8tUOAQl0ZXh0Q29sb3IBdwdkZWZhdWx0KAD17vLVDgEPYmFja2dyb3VuZENvbG9y"
"AXcHZGVmYXVsdIf17vLVDgEDDmJsb2NrQ29udGFpbmVyBwD17vLVDhADDmJ1bGxldExpc3RJdGVt"
"BwD17vLVDhEGBAD17vLVDhIBd4b17vLVDhMEYm9sZAJ7fYT17vLVDhQCb3KG9e7y1Q4WBGJvbGQE"
"bnVsbIT17vLVDhcCbGQoAPXu8tUOEQ10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y1Q4QAmlkAXck"
"ZDM1MWUwNjgtM2U1NS00MjI2LThlYTUtYWJiMjYzMTk4ZTJhKAD17vLVDhAJdGV4dENvbG9yAXcH"
"ZGVmYXVsdCgA9e7y1Q4QD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHSH9e7y1Q4QAw5ibG9ja0Nv"
"bnRhaW5lcgcA9e7y1Q4eAwlwYXJhZ3JhcGgoAPXu8tUOHw10ZXh0QWxpZ25tZW50AXcEbGVmdCgA"
"9e7y1Q4eAmlkAXckODk3MDBjMDctZTBlMS00ZmUwLWFjYTItODQ5MzIwOWE3ZTQyKAD17vLVDh4J"
"dGV4dENvbG9yAXcHZGVmYXVsdCgA9e7y1Q4eD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHQA"
)
def test_utils_base64_yjs_to_text():
"""Test extract text from saved yjs document"""
assert utils.base64_yjs_to_text(TEST_BASE64_STRING) == "Hello w or ld"
def test_utils_base64_yjs_to_xml():
"""Test extract xml from saved yjs document"""
content = utils.base64_yjs_to_xml(TEST_BASE64_STRING)
assert (
'<heading textAlignment="left" level="1"><italic>Hello</italic></heading>'
in content
or '<heading level="1" textAlignment="left"><italic>Hello</italic></heading>'
in content
)
assert (
'<bulletListItem textAlignment="left">w<bold>or</bold>ld</bulletListItem>'
in content
)
def test_utils_extract_attachments():
"""
All attachment keys in the document content should be extracted.
"""
document_id = uuid.uuid4()
image_key1 = f"{document_id!s}/attachments/{uuid.uuid4()!s}.png"
image_url1 = f"http://localhost/media/{image_key1:s}"
image_key2 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
image_url2 = f"http://localhost/{image_key2:s}"
image_key3 = f"{uuid.uuid4()!s}/attachments/{uuid.uuid4()!s}.png"
image_url3 = f"http://localhost/media/{image_key3:s}"
ydoc = pycrdt.Doc()
frag = pycrdt.XmlFragment(
[
pycrdt.XmlElement("img", {"src": image_url1}),
pycrdt.XmlElement("img", {"src": image_url2}),
pycrdt.XmlElement("p", {}, [pycrdt.XmlText(image_url3)]),
]
)
ydoc["document-store"] = frag
update = ydoc.get_update()
base64_string = base64.b64encode(update).decode("utf-8")
# image_key2 is missing the "/media/" part and shouldn't get extracted
assert utils.extract_attachments(base64_string) == [image_key1, image_key3]
def test_utils_get_ancestor_to_descendants_map_single_path():
"""Test ancestor mapping of a single path."""
paths = ["000100020005"]
result = utils.get_ancestor_to_descendants_map(paths, steplen=4)
assert result == {
"0001": {"000100020005"},
"00010002": {"000100020005"},
"000100020005": {"000100020005"},
}
def test_utils_get_ancestor_to_descendants_map_multiple_paths():
"""Test ancestor mapping of multiple paths with shared prefixes."""
paths = ["000100020005", "00010003"]
result = utils.get_ancestor_to_descendants_map(paths, steplen=4)
assert result == {
"0001": {"000100020005", "00010003"},
"00010002": {"000100020005"},
"000100020005": {"000100020005"},
"00010003": {"00010003"},
}
def test_utils_users_sharing_documents_with_cache_miss():
"""Test cache miss: should query database and cache result."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
user3 = factories.UserFactory()
doc1 = factories.DocumentFactory()
doc2 = factories.DocumentFactory()
factories.UserDocumentAccessFactory(user=user1, document=doc1)
factories.UserDocumentAccessFactory(user=user2, document=doc1)
factories.UserDocumentAccessFactory(user=user3, document=doc2)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.delete(cache_key)
result = utils.users_sharing_documents_with(user1)
assert user2.id in result
cached_data = cache.get(cache_key)
assert cached_data == result
def test_utils_users_sharing_documents_with_cache_hit():
"""Test cache hit: should return cached data without querying database."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
factories.UserDocumentAccessFactory(user=user1, document=doc1)
factories.UserDocumentAccessFactory(user=user2, document=doc1)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
test_cached_data = {user2.id: "2025-02-10"}
cache.set(cache_key, test_cached_data, 86400)
result = utils.users_sharing_documents_with(user1)
assert result == test_cached_data
def test_utils_users_sharing_documents_with_cache_invalidation_on_create():
"""Test that cache is invalidated when a DocumentAccess is created."""
# Create test data
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
# Pre-populate cache
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.set(cache_key, {}, 86400)
# Verify cache exists
assert cache.get(cache_key) is not None
# Create new DocumentAccess
factories.UserDocumentAccessFactory(user=user2, document=doc1)
# Cache should still exist (only created for user2 who was added)
# But if we create access for user1 being shared with, cache should be cleared
cache.set(cache_key, {"test": "data"}, 86400)
factories.UserDocumentAccessFactory(user=user1, document=doc1)
# Cache for user1 should be invalidated (cleared)
assert cache.get(cache_key) is None
def test_utils_users_sharing_documents_with_cache_invalidation_on_delete():
"""Test that cache is invalidated when a DocumentAccess is deleted."""
user1 = factories.UserFactory()
user2 = factories.UserFactory()
doc1 = factories.DocumentFactory()
doc_access = factories.UserDocumentAccessFactory(user=user1, document=doc1)
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.set(cache_key, {user2.id: "2025-02-10"}, 86400)
assert cache.get(cache_key) is not None
doc_access.delete()
assert cache.get(cache_key) is None
def test_utils_users_sharing_documents_with_empty_result():
"""Test when user is not sharing any documents."""
user1 = factories.UserFactory()
cache_key = utils.get_users_sharing_documents_with_cache_key(user1)
cache.delete(cache_key)
result = utils.users_sharing_documents_with(user1)
assert result == {}
cached_data = cache.get(cache_key)
assert cached_data == {}
def test_utils_get_value_by_pattern_matching_key():
"""Test extracting value from a dictionary with a matching key pattern."""
data = {"title.extension": "Bonjour", "id": 1, "content": "test"}
result = utils.get_value_by_pattern(data, r"^title\.")
assert set(result) == {"Bonjour"}
def test_utils_get_value_by_pattern_multiple_matches():
"""Test that all matching keys are returned."""
data = {"title.extension_1": "Bonjour", "title.extension_2": "Hello", "id": 1}
result = utils.get_value_by_pattern(data, r"^title\.")
assert set(result) == {
"Bonjour",
"Hello",
}
def test_utils_get_value_by_pattern_multiple_extensions():
"""Test that all matching keys are returned."""
data = {"title.extension_1.extension_2": "Bonjour", "id": 1}
result = utils.get_value_by_pattern(data, r"^title\.")
assert set(result) == {"Bonjour"}
def test_utils_get_value_by_pattern_no_match():
"""Test that empty list is returned when no key matches the pattern."""
data = {"name": "Test", "id": 1}
result = utils.get_value_by_pattern(data, r"^title\.")
assert result == []