Files
ladybird/AK/FlyString.h
Andreas Kling 0317007ee1 AK: Make short ASCII string literals compile-time constants
Make the _string, _fly_string, _utf16, and _utf16_fly_string UDL
operators constexpr, with a fast path for short (<= 7 byte) ASCII
literals that folds directly into an inline ShortString. Previously,
every "foo"_fly_string (and friends) involved an out-of-line call
into the string factory, even though the result is entirely known
at compile time.
2026-04-17 16:22:56 +02:00

160 lines
5.4 KiB
C++

/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/AllOf.h>
#include <AK/Error.h>
#include <AK/Format.h>
#include <AK/Optional.h>
#include <AK/Platform.h>
#include <AK/String.h>
#include <AK/Traits.h>
#include <AK/Types.h>
namespace AK {
class FlyString {
AK_MAKE_DEFAULT_MOVABLE(FlyString);
AK_MAKE_DEFAULT_COPYABLE(FlyString);
public:
FlyString() = default;
static ErrorOr<FlyString> from_utf8(StringView);
static FlyString from_utf8_without_validation(ReadonlyBytes);
[[nodiscard]] static constexpr FlyString from_ascii_short_string_without_validation(char const* data, size_t length)
{
VERIFY(length <= Detail::MAX_SHORT_STRING_BYTE_COUNT);
auto short_string = Detail::ShortString::create_with_byte_count(length);
for (size_t i = 0; i < length; ++i)
short_string.storage[i] = static_cast<u8>(data[i]);
return FlyString { Detail::StringBase { short_string } };
}
template<typename T>
requires(IsOneOf<RemoveCVReference<T>, ByteString, FlyString, String>)
static ErrorOr<String> from_utf8(T&&) = delete;
FlyString(String const&);
FlyString& operator=(String const&);
[[nodiscard]] bool is_empty() const { return m_data.byte_count() == 0; }
[[nodiscard]] unsigned hash() const { return m_data.hash(); }
[[nodiscard]] u32 ascii_case_insensitive_hash() const;
explicit operator String() const;
String to_string() const;
[[nodiscard]] Utf8View code_points() const;
[[nodiscard]] ReadonlyBytes bytes() const LIFETIME_BOUND { return m_data.bytes(); }
[[nodiscard]] StringView bytes_as_string_view() const LIFETIME_BOUND { return m_data.bytes(); }
[[nodiscard]] ALWAYS_INLINE bool operator==(FlyString const& other) const { return m_data.raw(Badge<FlyString> {}) == other.m_data.raw(Badge<FlyString> {}); }
[[nodiscard]] bool operator==(String const& other) const { return m_data == other; }
[[nodiscard]] bool operator==(StringView) const;
[[nodiscard]] bool operator==(char const*) const;
[[nodiscard]] int operator<=>(FlyString const& other) const;
[[nodiscard]] Detail::StringBase data(Badge<String>) const;
// This is primarily interesting to unit tests.
[[nodiscard]] static size_t number_of_fly_strings();
template<typename T>
requires(IsSame<RemoveCVReference<T>, StringView>)
static ErrorOr<String> from_deprecated_fly_string(T&&) = delete;
// Compare this FlyString against another string with ASCII caseless matching.
[[nodiscard]] bool equals_ignoring_ascii_case(FlyString const&) const;
[[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
[[nodiscard]] FlyString to_ascii_lowercase() const;
[[nodiscard]] FlyString to_ascii_uppercase() const;
[[nodiscard]] bool is_ascii() const { return bytes_as_string_view().is_ascii(); }
[[nodiscard]] bool starts_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
[[nodiscard]] bool ends_with_bytes(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
template<typename... Ts>
[[nodiscard]] ALWAYS_INLINE constexpr bool is_one_of(Ts&&... strings) const
{
return (... || this->operator==(forward<Ts>(strings)));
}
template<typename... Ts>
[[nodiscard]] ALWAYS_INLINE constexpr bool is_one_of_ignoring_ascii_case(Ts&&... strings) const
{
return (... || this->equals_ignoring_ascii_case(forward<Ts>(strings)));
}
private:
friend struct SentinelOptionalTraits<FlyString>;
explicit constexpr FlyString(nullptr_t)
: m_data(nullptr)
{
}
explicit constexpr FlyString(Detail::StringBase data)
: m_data(move(data))
{
}
Detail::StringBase m_data;
constexpr bool is_invalid() const { return m_data.raw(Badge<FlyString> {}) == 0; }
};
template<>
struct SentinelOptionalTraits<FlyString> {
static constexpr FlyString sentinel_value() { return FlyString(nullptr); }
static constexpr bool is_sentinel(FlyString const& value) { return value.is_invalid(); }
};
template<>
class Optional<FlyString> : public SentinelOptional<FlyString> {
public:
using SentinelOptional::SentinelOptional;
};
template<>
struct Traits<FlyString> : public DefaultTraits<FlyString> {
static unsigned hash(FlyString const&);
static constexpr bool may_have_slow_equality_check() { return false; }
};
template<>
struct Formatter<FlyString> : Formatter<StringView> {
ErrorOr<void> format(FormatBuilder&, FlyString const&);
};
struct ASCIICaseInsensitiveFlyStringTraits : public Traits<String> {
static unsigned hash(FlyString const& s) { return s.ascii_case_insensitive_hash(); }
static bool equals(FlyString const& a, FlyString const& b) { return a.equals_ignoring_ascii_case(b); }
};
}
[[nodiscard]] ALWAYS_INLINE constexpr AK::FlyString operator""_fly_string(char const* cstring, size_t length)
{
// OPTIMIZATION: Short ASCII strings become compile-time constants with no runtime validation or table lookup.
if (length <= AK::Detail::MAX_SHORT_STRING_BYTE_COUNT
&& AK::all_of(cstring, cstring + length, AK::is_ascii)) {
return AK::FlyString::from_ascii_short_string_without_validation(cstring, length);
}
ASSERT(Utf8View(AK::StringView(cstring, length)).validate());
return AK::FlyString::from_utf8_without_validation({ cstring, length });
}
#if USING_AK_GLOBALLY
using AK::FlyString;
#endif