mirror of
https://github.com/suitenumerique/docs.git
synced 2026-04-25 17:15:01 +02:00
🚸(backend) make document search on title accent-insensitive
This should work in both cases: - search for "vélo" when the document title contains "velo" - search for "velo" when the document title contains "vélo"
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
"""API filters for Impress' core application."""
|
||||
|
||||
import unicodedata
|
||||
|
||||
from django.db.models import CharField, Func
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
import django_filters
|
||||
@@ -7,12 +10,64 @@ import django_filters
|
||||
from core import models
|
||||
|
||||
|
||||
class DocumentFilter(django_filters.FilterSet):
|
||||
def remove_accents(value):
|
||||
"""Remove accents from a string (vélo -> velo)."""
|
||||
return "".join(
|
||||
c
|
||||
for c in unicodedata.normalize("NFD", value)
|
||||
if unicodedata.category(c) != "Mn"
|
||||
)
|
||||
|
||||
|
||||
# pylint: disable=abstract-method
|
||||
class Unaccent(Func):
|
||||
"""
|
||||
Custom filter for filtering documents.
|
||||
PostgreSQL unaccent function wrapper for use in Django ORM queries.
|
||||
|
||||
This allows you to annotate a field using the unaccented version of a
|
||||
text column, enabling accent-insensitive filtering.
|
||||
"""
|
||||
|
||||
title = django_filters.CharFilter(
|
||||
function = "unaccent"
|
||||
template = "unaccent(%(expressions)s::text)"
|
||||
output_field = CharField()
|
||||
|
||||
|
||||
class AccentInsensitiveCharFilter(django_filters.CharFilter):
|
||||
"""
|
||||
A custom CharFilter that performs case-insensitive and accent-insensitive filtering.
|
||||
|
||||
This filter uses PostgreSQL's extension `unaccent` function to remove diacritics (accents)
|
||||
from characters before applying the lookup expression (e.g., `icontains`).
|
||||
"""
|
||||
|
||||
def filter(self, qs, value):
|
||||
"""
|
||||
Apply the filter to the queryset using the unaccented version of the field.
|
||||
|
||||
Args:
|
||||
qs: The queryset to filter.
|
||||
value: The value to search for in the unaccented field.
|
||||
|
||||
Returns:
|
||||
A filtered queryset.
|
||||
"""
|
||||
if value:
|
||||
value = remove_accents(value)
|
||||
field_name = self.field_name
|
||||
annotated_field = f"unaccented_{field_name}"
|
||||
return qs.annotate(**{annotated_field: Unaccent(field_name)}).filter(
|
||||
**{f"{annotated_field}__{self.lookup_expr}": value}
|
||||
)
|
||||
return qs
|
||||
|
||||
|
||||
class DocumentFilter(django_filters.FilterSet):
|
||||
"""
|
||||
Custom filter for filtering documents on title (accent and case insensitive).
|
||||
"""
|
||||
|
||||
title = AccentInsensitiveCharFilter(
|
||||
field_name="title", lookup_expr="icontains", label=_("Title")
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("core", "0020_remove_is_public_add_field_attachments_and_duplicated_from"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunSQL(
|
||||
"CREATE EXTENSION IF NOT EXISTS unaccent;",
|
||||
reverse_sql="DROP EXTENSION IF EXISTS unaccent;",
|
||||
),
|
||||
]
|
||||
@@ -7,6 +7,7 @@ from faker import Faker
|
||||
from rest_framework.test import APIClient
|
||||
|
||||
from core import factories
|
||||
from core.api.filters import remove_accents
|
||||
|
||||
fake = Faker()
|
||||
pytestmark = pytest.mark.django_db
|
||||
@@ -49,14 +50,16 @@ def test_api_documents_descendants_filter_unknown_field():
|
||||
[
|
||||
("Project Alpha", 1), # Exact match
|
||||
("project", 2), # Partial match (case-insensitive)
|
||||
("Guide", 1), # Word match within a title
|
||||
("Guide", 2), # Word match within a title
|
||||
("Special", 0), # No match (nonexistent keyword)
|
||||
("2024", 2), # Match by numeric keyword
|
||||
("", 5), # Empty string
|
||||
("", 6), # Empty string
|
||||
("velo", 1), # Accent-insensitive match (velo vs vélo)
|
||||
("bêta", 1), # Accent-insensitive match (bêta vs beta)
|
||||
],
|
||||
)
|
||||
def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
"""Authenticated users should be able to search documents by their title."""
|
||||
"""Authenticated users should be able to search documents by their unaccented title."""
|
||||
user = factories.UserFactory()
|
||||
client = APIClient()
|
||||
client.force_login(user)
|
||||
@@ -70,6 +73,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
"User Guide",
|
||||
"Financial Report 2024",
|
||||
"Annual Review 2024",
|
||||
"Guide du vélo urbain", # <-- Title with accent for accent-insensitive test
|
||||
]
|
||||
for title in titles:
|
||||
factories.DocumentFactory(title=title, parent=document)
|
||||
@@ -85,4 +89,7 @@ def test_api_documents_descendants_filter_title(query, nb_results):
|
||||
|
||||
# Ensure all results contain the query in their title
|
||||
for result in results:
|
||||
assert query.lower().strip() in result["title"].lower()
|
||||
assert (
|
||||
remove_accents(query).lower().strip()
|
||||
in remove_accents(result["title"]).lower()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user