mirror of
https://github.com/suitenumerique/docs.git
synced 2026-04-26 01:25:05 +02:00
Compare commits
2 Commits
fix/link-p
...
feature/te
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
955b322a09 | ||
|
|
1ee8e5fdba |
@@ -9,6 +9,10 @@ and this project adheres to
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## Added
|
||||
|
||||
- ⚗️(backend) Extract text from base64 yjs document #270
|
||||
|
||||
|
||||
## [1.4.0] - 2024-09-17
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from django.core import mail
|
||||
|
||||
import pytest
|
||||
|
||||
from core.utils import email_invitation
|
||||
from core.utils import email_invitation, text_to_yjs_base64, yjs_base64_to_text
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
@@ -85,3 +85,34 @@ def test_utils__email_invitation_failed(mock_logger, _mock_send_mail):
|
||||
|
||||
assert email == "guest@example.com"
|
||||
assert isinstance(exception, smtplib.SMTPException)
|
||||
|
||||
|
||||
def test_yjs_base64_to_text():
|
||||
"""
|
||||
Test extract_text_from_saved_yjs_document
|
||||
This base64 string is an example of what is saved in the database.
|
||||
This base64 is generated from the blocknote editor, it contains
|
||||
the text \n# *Hello* \n- w**or**ld
|
||||
"""
|
||||
base64_string = (
|
||||
"AR717vLVDgAHAQ5kb2N1bWVudC1zdG9yZQMKYmxvY2tHcm91cAcA9e7y1Q4AAw5ibG9ja0NvbnRh"
|
||||
"aW5lcgcA9e7y1Q4BAwdoZWFkaW5nBwD17vLVDgIGBgD17vLVDgMGaXRhbGljAnt9hPXu8tUOBAVI"
|
||||
"ZWxsb4b17vLVDgkGaXRhbGljBG51bGwoAPXu8tUOAg10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y"
|
||||
"1Q4CBWxldmVsAX0BKAD17vLVDgECaWQBdyQwNGQ2MjM0MS04MzI2LTQyMzYtYTA4My00ODdlMjZm"
|
||||
"YWQyMzAoAPXu8tUOAQl0ZXh0Q29sb3IBdwdkZWZhdWx0KAD17vLVDgEPYmFja2dyb3VuZENvbG9y"
|
||||
"AXcHZGVmYXVsdIf17vLVDgEDDmJsb2NrQ29udGFpbmVyBwD17vLVDhADDmJ1bGxldExpc3RJdGVt"
|
||||
"BwD17vLVDhEGBAD17vLVDhIBd4b17vLVDhMEYm9sZAJ7fYT17vLVDhQCb3KG9e7y1Q4WBGJvbGQE"
|
||||
"bnVsbIT17vLVDhcCbGQoAPXu8tUOEQ10ZXh0QWxpZ25tZW50AXcEbGVmdCgA9e7y1Q4QAmlkAXck"
|
||||
"ZDM1MWUwNjgtM2U1NS00MjI2LThlYTUtYWJiMjYzMTk4ZTJhKAD17vLVDhAJdGV4dENvbG9yAXcH"
|
||||
"ZGVmYXVsdCgA9e7y1Q4QD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHSH9e7y1Q4QAw5ibG9ja0Nv"
|
||||
"bnRhaW5lcgcA9e7y1Q4eAwlwYXJhZ3JhcGgoAPXu8tUOHw10ZXh0QWxpZ25tZW50AXcEbGVmdCgA"
|
||||
"9e7y1Q4eAmlkAXckODk3MDBjMDctZTBlMS00ZmUwLWFjYTItODQ5MzIwOWE3ZTQyKAD17vLVDh4J"
|
||||
"dGV4dENvbG9yAXcHZGVmYXVsdCgA9e7y1Q4eD2JhY2tncm91bmRDb2xvcgF3B2RlZmF1bHQA"
|
||||
)
|
||||
|
||||
assert yjs_base64_to_text(base64_string) == "Hello world"
|
||||
|
||||
|
||||
def test_text_to_yjs_base64():
|
||||
base64_string = text_to_yjs_base64("Hello world")
|
||||
assert yjs_base64_to_text(base64_string) == "Hello world"
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
Utilities for the core app.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import smtplib
|
||||
from logging import getLogger
|
||||
|
||||
@@ -12,6 +13,9 @@ from django.template.loader import render_to_string
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django.utils.translation import override
|
||||
|
||||
import y_py as Y
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
@@ -38,3 +42,37 @@ def email_invitation(language, email, document_id):
|
||||
|
||||
except smtplib.SMTPException as exception:
|
||||
logger.error("invitation to %s was not sent: %s", email, exception)
|
||||
|
||||
|
||||
def yjs_base64_to_text(base64_string):
|
||||
"""Extract text from base64 yjs document"""
|
||||
|
||||
decoded_bytes = base64.b64decode(base64_string)
|
||||
uint8_array = bytearray(decoded_bytes)
|
||||
|
||||
doc = Y.YDoc() # pylint: disable=E1101
|
||||
Y.apply_update(doc, uint8_array) # pylint: disable=E1101
|
||||
blocknote_structure = str(doc.get_xml_element("document-store"))
|
||||
|
||||
soup = BeautifulSoup(blocknote_structure, "html.parser")
|
||||
return soup.get_text(separator=" ").strip()
|
||||
|
||||
|
||||
def text_to_yjs_base64(text: str) -> str:
|
||||
"""Convert plain text to a base64-encoded Yjs document"""
|
||||
doc = Y.YDoc()
|
||||
|
||||
# Insert the paragraph text into the document
|
||||
with doc.begin_transaction() as txn:
|
||||
xml_fragment = doc.get_xml_element('document-store')
|
||||
|
||||
xml_element = xml_fragment.push_xml_element(txn, 'paragraph')
|
||||
|
||||
xml_text = xml_element.push_xml_text(txn)
|
||||
xml_text.push(txn, text)
|
||||
|
||||
# Encode the document as a Uint8Array
|
||||
update = Y.encode_state_as_update(doc)
|
||||
|
||||
# Encode the result to base64
|
||||
return base64.b64encode(update).decode('utf-8')
|
||||
|
||||
@@ -12,7 +12,7 @@ from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from core import models
|
||||
from core import models, utils
|
||||
|
||||
from demo import defaults
|
||||
|
||||
@@ -127,17 +127,14 @@ def create_demo(stdout):
|
||||
|
||||
with Timeit(stdout, "Creating documents"):
|
||||
for _ in range(defaults.NB_OBJECTS["docs"]):
|
||||
queue.push(
|
||||
models.Document(
|
||||
title=fake.sentence(nb_words=4),
|
||||
link_reach=models.LinkReachChoices.AUTHENTICATED
|
||||
if random_true_with_probability(0.5)
|
||||
else random.choice(models.LinkReachChoices.values),
|
||||
)
|
||||
)
|
||||
|
||||
queue.flush()
|
||||
|
||||
models.Document(
|
||||
title=fake.sentence(nb_words=4),
|
||||
content=utils.text_to_yjs_base64(fake.text()),
|
||||
link_reach=models.LinkReachChoices.AUTHENTICATED
|
||||
if random_true_with_probability(0.5)
|
||||
else random.choice(models.LinkReachChoices.values),
|
||||
).save()
|
||||
|
||||
with Timeit(stdout, "Creating docs accesses"):
|
||||
docs_ids = list(models.Document.objects.values_list("id", flat=True))
|
||||
users_ids = list(models.User.objects.values_list("id", flat=True))
|
||||
|
||||
@@ -25,6 +25,7 @@ license = { file = "LICENSE" }
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"beautifulsoup4==4.12.3",
|
||||
"boto3==1.35.10",
|
||||
"Brotli==1.1.0",
|
||||
"celery[redis]==5.4.0",
|
||||
@@ -57,6 +58,7 @@ dependencies = [
|
||||
"WeasyPrint>=60.2",
|
||||
"whitenoise==6.7.0",
|
||||
"mozilla-django-oidc==4.0.1",
|
||||
"y-py==0.5.5",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
Reference in New Issue
Block a user