Files
serenity/Userland/Libraries/LibTextCodec/Encoder.h
Nico Weber 286872c135 LibTextCodec: Add Latin1Encoder for encoder_for_exact_name("iso-8859-1")
Latin1Encoder matches Latin1Decoder, and matches the first 256
codepoints of Unicode.

Note that on the web, iso-8859-1 actually gets you windows-1252:
https://encoding.spec.whatwg.org/#note-latin1-ascii

This is implemented in get_standardized_encoding(), which encoder_for()
calls. One has to call encoder_for_exact_name("iso-8859-1") to get this
new encoder.

That means both browser and iconv won't see this encoder.

(Maybe iconv should try calling encoder_for_exact_name() first, and
encoder_for() only if that doesn't return anything?)

This is a bit weird and subtle, but it matches the decoder, is
compatible with the web, and still allows code that does want actual
iso-8859-1 to get it.

See also #24594, especially the fourth commit, and #25119, which
picked up this design for the encoders as well.
2025-10-24 21:28:58 -04:00

98 lines
2.7 KiB
C++

/*
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
#include <AK/Function.h>
namespace TextCodec {
class Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) = 0;
protected:
virtual ~Encoder() = default;
};
class UTF8Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class Latin1Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class EUCJPEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class ISO2022JPEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
enum class State {
ASCII,
Roman,
jis0208,
};
ErrorOr<State> process_item(u32 item, State, Function<ErrorOr<void>(u8)>& on_byte, Function<ErrorOr<void>(u32)>& on_error);
};
class ShiftJISEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class EUCKREncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class Big5Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class GB18030Encoder final : public Encoder {
public:
enum class IsGBK {
Yes,
No,
};
GB18030Encoder(IsGBK is_gbk = IsGBK::No);
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
IsGBK m_is_gbk { IsGBK::No };
};
template<Integral ArrayType = u32>
class SingleByteEncoder final : public Encoder {
public:
SingleByteEncoder(Array<ArrayType, 128> translation_table)
: m_translation_table(translation_table)
{
}
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
Array<ArrayType, 128> m_translation_table;
};
Optional<Encoder&> encoder_for_exact_name(StringView encoding);
Optional<Encoder&> encoder_for(StringView label);
}