mirror of
https://github.com/SerenityOS/serenity
synced 2026-05-13 02:16:39 +02:00
Makes these work (and produce the same output as macOS system `iconv`):
echo 😀 | Build/lagom/bin/iconv -f utf-8 -t utf-16be | xxd
echo 😀 | Build/lagom/bin/iconv -f utf-8 -t utf-16le | xxd
This probably doesn't affect LibWeb, since over there TextEncoders
are usually created with the result of TextCodec::get_output_encoding(),
which maps utf-16be and utf-16le to utf-8.
(Originally I thought I'd use this to make the
ExtensionType::MultiByteCodedComment implementation in JBIG2Writer.cpp
look more similar to the ExtensionType::SingleByteCodedComment one,
but the TextCodec::Encoder interface doesn't make it easily possible
to reject surrogate pairs, so I'm not doing that part. Maybe we'll
add an ucs-2be one day.)
108 lines
3.1 KiB
C++
108 lines
3.1 KiB
C++
/*
|
|
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/Forward.h>
|
|
#include <AK/Function.h>
|
|
|
|
namespace TextCodec {
|
|
|
|
class Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) = 0;
|
|
|
|
protected:
|
|
virtual ~Encoder() = default;
|
|
};
|
|
|
|
class UTF8Encoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class UTF16BEEncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class UTF16LEEncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class Latin1Encoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class EUCJPEncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class ISO2022JPEncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
|
|
private:
|
|
enum class State {
|
|
ASCII,
|
|
Roman,
|
|
jis0208,
|
|
};
|
|
|
|
ErrorOr<State> process_item(u32 item, State, Function<ErrorOr<void>(u8)>& on_byte, Function<ErrorOr<void>(u32)>& on_error);
|
|
};
|
|
|
|
class ShiftJISEncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class EUCKREncoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class Big5Encoder final : public Encoder {
|
|
public:
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
};
|
|
|
|
class GB18030Encoder final : public Encoder {
|
|
public:
|
|
enum class IsGBK {
|
|
Yes,
|
|
No,
|
|
};
|
|
|
|
GB18030Encoder(IsGBK is_gbk = IsGBK::No);
|
|
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
|
|
private:
|
|
IsGBK m_is_gbk { IsGBK::No };
|
|
};
|
|
template<Integral ArrayType = u32>
|
|
class SingleByteEncoder final : public Encoder {
|
|
public:
|
|
SingleByteEncoder(Array<ArrayType, 128> translation_table)
|
|
: m_translation_table(translation_table)
|
|
{
|
|
}
|
|
|
|
virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
|
|
|
|
private:
|
|
Array<ArrayType, 128> m_translation_table;
|
|
};
|
|
|
|
Optional<Encoder&> encoder_for_exact_name(StringView encoding);
|
|
Optional<Encoder&> encoder_for(StringView label);
|
|
|
|
}
|