mirror of
https://github.com/SerenityOS/serenity
synced 2026-04-25 17:15:42 +02:00
AK: Implement reverse-order memory search
This is particularly useful for compression, where we want to search through the lookback buffer for the smallest possible distance [towards the end].
This commit is contained in:
64
AK/MemMem.h
64
AK/MemMem.h
@@ -101,6 +101,70 @@ requires(requires { (*haystack_begin).data(); (*haystack_begin).size(); })
|
||||
return {};
|
||||
}
|
||||
|
||||
template<typename HaystackIterT>
|
||||
inline Optional<size_t> memmem_reverse(HaystackIterT const& haystack_begin, HaystackIterT const& haystack_end, ReadonlyBytes needle)
|
||||
requires(requires { (*haystack_begin).data(); (*haystack_begin).size(); })
|
||||
{
|
||||
// Note: This is a simple inversion of our modified KMP algorithm that is used in AK::memmem.
|
||||
// Be aware that we keep the table values mostly positive and the indices refer to the number
|
||||
// of matched characters.
|
||||
// In short: We really only invert the array accesses into the needle and haystack.
|
||||
|
||||
auto prepare_kmp_partial_table = [&] {
|
||||
Vector<int, 64> table;
|
||||
table.try_resize(needle.size()).release_value_but_fixme_should_propagate_errors();
|
||||
|
||||
size_t position = 1;
|
||||
int candidate = 0;
|
||||
|
||||
table[0] = -1;
|
||||
while (position < needle.size()) {
|
||||
if (needle[needle.size() - 1 - position] == needle[needle.size() - 1 - candidate]) {
|
||||
table[position] = table[candidate];
|
||||
} else {
|
||||
table[position] = candidate;
|
||||
do {
|
||||
candidate = table[candidate];
|
||||
} while (candidate >= 0 && needle[needle.size() - 1 - candidate] != needle[needle.size() - 1 - position]);
|
||||
}
|
||||
++position;
|
||||
++candidate;
|
||||
}
|
||||
return table;
|
||||
};
|
||||
|
||||
auto table = prepare_kmp_partial_table();
|
||||
size_t total_haystack_index = 0;
|
||||
size_t current_haystack_index = 0;
|
||||
int needle_index = 0;
|
||||
auto haystack_it = haystack_begin;
|
||||
|
||||
while (haystack_it != haystack_end) {
|
||||
auto&& chunk = *haystack_it;
|
||||
if (current_haystack_index >= chunk.size()) {
|
||||
current_haystack_index = 0;
|
||||
++haystack_it;
|
||||
continue;
|
||||
}
|
||||
if (needle[needle.size() - 1 - needle_index] == chunk[chunk.size() - 1 - current_haystack_index]) {
|
||||
++needle_index;
|
||||
++current_haystack_index;
|
||||
++total_haystack_index;
|
||||
if ((size_t)needle_index == needle.size())
|
||||
return total_haystack_index;
|
||||
continue;
|
||||
}
|
||||
needle_index = table[needle_index];
|
||||
if (needle_index < 0) {
|
||||
++needle_index;
|
||||
++current_haystack_index;
|
||||
++total_haystack_index;
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
inline Optional<size_t> memmem_optional(void const* haystack, size_t haystack_length, void const* needle, size_t needle_length)
|
||||
{
|
||||
if (needle_length == 0)
|
||||
|
||||
@@ -76,6 +76,65 @@ TEST_CASE(kmp_two_chunks)
|
||||
EXPECT(!result_3.has_value());
|
||||
}
|
||||
|
||||
TEST_CASE(kmp_reverse_one_chunk)
|
||||
{
|
||||
Array<u8, 8> haystack { 1, 0, 1, 2, 3, 4, 5, 0 };
|
||||
Array<Array<u8, 8>, 1> haystack_arr { haystack };
|
||||
Array<u8, 4> needle_0 { 2, 3, 4, 5 };
|
||||
Array<u8, 4> needle_1 { 1, 2, 3, 4 };
|
||||
Array<u8, 4> needle_2 { 3, 4, 5, 0 };
|
||||
Array<u8, 4> needle_3 { 3, 4, 5, 6 };
|
||||
|
||||
auto result_0 = AK::memmem_reverse(haystack_arr.begin(), haystack_arr.end(), needle_0);
|
||||
auto result_1 = AK::memmem_reverse(haystack_arr.begin(), haystack_arr.end(), needle_1);
|
||||
auto result_2 = AK::memmem_reverse(haystack_arr.begin(), haystack_arr.end(), needle_2);
|
||||
auto result_3 = AK::memmem_reverse(haystack_arr.begin(), haystack_arr.end(), needle_3);
|
||||
|
||||
EXPECT_EQ(result_0.value_or(9), 5u);
|
||||
EXPECT_EQ(result_1.value_or(9), 6u);
|
||||
EXPECT_EQ(result_2.value_or(9), 4u);
|
||||
EXPECT(!result_3.has_value());
|
||||
}
|
||||
|
||||
TEST_CASE(kmp_reverse_two_chunks)
|
||||
{
|
||||
Array<u8, 4> haystack_first_half { 1, 0, 1, 2 }, haystack_second_half { 3, 4, 5, 0 };
|
||||
Array<Array<u8, 4>, 2> haystack { haystack_second_half, haystack_first_half };
|
||||
Array<u8, 4> needle_0 { 2, 3, 4, 5 };
|
||||
Array<u8, 4> needle_1 { 1, 2, 3, 4 };
|
||||
Array<u8, 4> needle_2 { 3, 4, 5, 0 };
|
||||
Array<u8, 4> needle_3 { 3, 4, 5, 6 };
|
||||
|
||||
auto result_0 = AK::memmem_reverse(haystack.begin(), haystack.end(), needle_0);
|
||||
auto result_1 = AK::memmem_reverse(haystack.begin(), haystack.end(), needle_1);
|
||||
auto result_2 = AK::memmem_reverse(haystack.begin(), haystack.end(), needle_2);
|
||||
auto result_3 = AK::memmem_reverse(haystack.begin(), haystack.end(), needle_3);
|
||||
|
||||
EXPECT_EQ(result_0.value_or(9), 5u);
|
||||
EXPECT_EQ(result_1.value_or(9), 6u);
|
||||
EXPECT_EQ(result_2.value_or(9), 4u);
|
||||
EXPECT(!result_3.has_value());
|
||||
}
|
||||
|
||||
TEST_CASE(kmp_match_order)
|
||||
{
|
||||
Array<u8, 4> haystack_first_half { 1, 0, 1, 2 }, haystack_second_half { 3, 4, 5, 0 };
|
||||
Array<Array<u8, 4>, 2> haystack_f { haystack_first_half, haystack_second_half };
|
||||
Array<Array<u8, 4>, 2> haystack_b { haystack_second_half, haystack_first_half };
|
||||
|
||||
Array<u8, 1> needle_0 { 0 };
|
||||
auto result_0_f = AK::memmem(haystack_f.begin(), haystack_f.end(), needle_0);
|
||||
auto result_0_b = AK::memmem_reverse(haystack_b.begin(), haystack_b.end(), needle_0);
|
||||
EXPECT_EQ(result_0_f.value_or(9), 1u);
|
||||
EXPECT_EQ(result_0_b.value_or(9), 1u);
|
||||
|
||||
Array<u8, 1> needle_1 { 1 };
|
||||
auto result_1_f = AK::memmem(haystack_f.begin(), haystack_f.end(), needle_1);
|
||||
auto result_1_b = AK::memmem_reverse(haystack_b.begin(), haystack_b.end(), needle_1);
|
||||
EXPECT_EQ(result_1_f.value_or(9), 0u);
|
||||
EXPECT_EQ(result_1_b.value_or(9), 6u);
|
||||
}
|
||||
|
||||
TEST_CASE(timing_safe_compare)
|
||||
{
|
||||
ByteString data_set = "abcdefghijklmnopqrstuvwxyz123456789";
|
||||
|
||||
Reference in New Issue
Block a user