mirror of
https://github.com/mistralai/mistral-vibe
synced 2026-04-25 17:14:55 +02:00
Co-authored-by: Bastien <bastien.baret@gmail.com> Co-authored-by: Clément Sirieix <clement.sirieix@mistral.ai> Co-authored-by: Julien Legrand <72564015+JulienLGRD@users.noreply.github.com> Co-authored-by: Kim-Adeline Miguel <51720070+kimadeline@users.noreply.github.com> Co-authored-by: Mathias Gesbert <mathias.gesbert@mistral.ai> Co-authored-by: Pierre Rossinès <pierre.rossines@mistral.ai> Co-authored-by: Quentin <quentin.torroba@mistral.ai> Co-authored-by: Vincent G <10739306+VinceOPS@users.noreply.github.com> Co-authored-by: Mistral Vibe <vibe@mistral.ai>
117 lines
4.3 KiB
Python
117 lines
4.3 KiB
Python
from __future__ import annotations
|
||
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
from vibe.core.utils import get_server_url_from_api_base
|
||
import vibe.core.utils.io as io_utils
|
||
from vibe.core.utils.io import decode_safe, read_safe, read_safe_async
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
("api_base", "expected"),
|
||
[
|
||
("https://api.mistral.ai/v1", "https://api.mistral.ai"),
|
||
("https://on-prem.example.com/v1", "https://on-prem.example.com"),
|
||
("http://localhost:8080/v2", "http://localhost:8080"),
|
||
("not-a-url", None),
|
||
("ftp://example.com/v1", None),
|
||
],
|
||
)
|
||
def test_get_server_url_from_api_base(api_base, expected):
|
||
assert get_server_url_from_api_base(api_base) == expected
|
||
|
||
|
||
class TestReadSafe:
|
||
def test_reads_utf8(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "hello.txt"
|
||
f.write_text("café\n", encoding="utf-8")
|
||
assert read_safe(f).text == "café\n"
|
||
assert decode_safe(f.read_bytes()).text == "café\n"
|
||
|
||
def test_falls_back_on_non_utf8(
|
||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||
) -> None:
|
||
f = tmp_path / "latin.txt"
|
||
# \x81 invalid UTF-8 and undefined in CP1252 → U+FFFD on all platforms
|
||
f.write_bytes(b"maf\x81\n")
|
||
monkeypatch.setattr(io_utils, "_encoding_from_best_match", lambda _raw: None)
|
||
result = read_safe(f)
|
||
assert result.text == "maf<EFBFBD>\n"
|
||
|
||
def test_falls_back_to_detected_encoding(
|
||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||
) -> None:
|
||
f = tmp_path / "utf16.txt"
|
||
expected = "hello été\n"
|
||
f.write_bytes(expected.encode("utf-16le"))
|
||
monkeypatch.setattr(
|
||
io_utils.locale, "getpreferredencoding", lambda _do_setlocale: "utf-8"
|
||
)
|
||
|
||
assert read_safe(f).text == expected
|
||
|
||
def test_raise_on_error_true_utf8_succeeds(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "hello.txt"
|
||
f.write_text("café\n", encoding="utf-8")
|
||
assert read_safe(f, raise_on_error=True).text == "café\n"
|
||
|
||
def test_raise_on_error_true_non_utf8_raises(
|
||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||
) -> None:
|
||
f = tmp_path / "bad.txt"
|
||
# Invalid UTF-8; with raise_on_error=True we use default encoding (strict), so decode errors propagate
|
||
f.write_bytes(b"maf\x81\n")
|
||
monkeypatch.setattr(io_utils, "_encoding_from_best_match", lambda _raw: None)
|
||
assert read_safe(f, raise_on_error=False).text == "maf<EFBFBD>\n"
|
||
with pytest.raises(UnicodeDecodeError):
|
||
read_safe(f, raise_on_error=True)
|
||
|
||
def test_empty_file(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "empty.txt"
|
||
f.write_bytes(b"")
|
||
assert read_safe(f).text == ""
|
||
|
||
def test_binary_garbage_does_not_raise(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "garbage.bin"
|
||
f.write_bytes(bytes(range(256)))
|
||
result = read_safe(f)
|
||
assert isinstance(result.text, str)
|
||
|
||
def test_file_not_found_raises(self, tmp_path: Path) -> None:
|
||
with pytest.raises(FileNotFoundError):
|
||
read_safe(tmp_path / "nope.txt")
|
||
|
||
|
||
class TestReadSafeResultEncoding:
|
||
def test_reports_utf8_for_plain_utf8_file(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "x.txt"
|
||
f.write_text("ok\n", encoding="utf-8")
|
||
got = read_safe(f)
|
||
assert got.text == "ok\n"
|
||
assert got.encoding == "utf-8"
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_async_reports_utf16_when_bom_present(self, tmp_path: Path) -> None:
|
||
f = tmp_path / "u16.txt"
|
||
f.write_bytes("a\n".encode("utf-16"))
|
||
got = await read_safe_async(f)
|
||
assert got.encoding == "utf-16-le"
|
||
# utf-16-le leaves the BOM as U+FEFF in the string (unlike utf-8-sig).
|
||
assert got.text == "\ufeffa\n"
|
||
|
||
|
||
class TestReadSafeAsync:
|
||
@pytest.mark.asyncio
|
||
async def test_raise_on_error_final_utf8_strict_or_replace(
|
||
self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||
) -> None:
|
||
"""raise_on_error controls strict vs replace on the last UTF-8 fallback."""
|
||
f = tmp_path / "bad.txt"
|
||
f.write_bytes(b"maf\x81\n")
|
||
monkeypatch.setattr(io_utils, "_encoding_from_best_match", lambda _raw: None)
|
||
assert (await read_safe_async(f, raise_on_error=False)).text == "maf<EFBFBD>\n"
|
||
with pytest.raises(UnicodeDecodeError):
|
||
await read_safe_async(f, raise_on_error=True)
|