Files
serenity/Userland/Libraries/LibTextCodec/Encoder.h
Nico Weber 7154218094 LibTextCodec: Add UTF16BEEncoder, UTF16LEEncoder
Makes these work (and produce the same output as macOS system `iconv`):

    echo 😀 | Build/lagom/bin/iconv -f utf-8 -t utf-16be | xxd
    echo 😀 | Build/lagom/bin/iconv -f utf-8 -t utf-16le | xxd

This probably doesn't affect LibWeb, since over there TextEncoders
are usually created with the result of TextCodec::get_output_encoding(),
which maps utf-16be and utf-16le to utf-8.

(Originally I thought I'd use this to make the
ExtensionType::MultiByteCodedComment implementation in JBIG2Writer.cpp
look more similar to the ExtensionType::SingleByteCodedComment one,
but the TextCodec::Encoder interface doesn't make it easily possible
to reject surrogate pairs, so I'm not doing that part. Maybe we'll
add an ucs-2be one day.)
2025-10-24 21:28:58 -04:00

108 lines
3.1 KiB
C++

/*
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Forward.h>
#include <AK/Function.h>
namespace TextCodec {
class Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) = 0;
protected:
virtual ~Encoder() = default;
};
class UTF8Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class UTF16BEEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class UTF16LEEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class Latin1Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class EUCJPEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class ISO2022JPEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
enum class State {
ASCII,
Roman,
jis0208,
};
ErrorOr<State> process_item(u32 item, State, Function<ErrorOr<void>(u8)>& on_byte, Function<ErrorOr<void>(u32)>& on_error);
};
class ShiftJISEncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class EUCKREncoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class Big5Encoder final : public Encoder {
public:
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
};
class GB18030Encoder final : public Encoder {
public:
enum class IsGBK {
Yes,
No,
};
GB18030Encoder(IsGBK is_gbk = IsGBK::No);
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
IsGBK m_is_gbk { IsGBK::No };
};
template<Integral ArrayType = u32>
class SingleByteEncoder final : public Encoder {
public:
SingleByteEncoder(Array<ArrayType, 128> translation_table)
: m_translation_table(translation_table)
{
}
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
private:
Array<ArrayType, 128> m_translation_table;
};
Optional<Encoder&> encoder_for_exact_name(StringView encoding);
Optional<Encoder&> encoder_for(StringView label);
}