mirror of
https://github.com/SerenityOS/serenity
synced 2026-04-25 17:15:42 +02:00
AK: Search CircularBuffer matches from the end of memory
This is generally advantageous for compression algorithms.
This commit is contained in:
@@ -410,19 +410,19 @@ Optional<SearchableCircularBuffer::Match> SearchableCircularBuffer::find_copy_in
|
||||
// Try a plain memory search for smaller values.
|
||||
// Note: This overlaps with the hash search for chunks of size HASH_CHUNK_SIZE for the purpose of validation.
|
||||
if (minimum_length <= HASH_CHUNK_SIZE) {
|
||||
size_t haystack_offset_from_start = 0;
|
||||
size_t haystack_offset_from_end = 0;
|
||||
Vector<ReadonlyBytes, 2> haystack;
|
||||
|
||||
// Note: memmem_reverse expects memory chunks in the order that it should search in,
|
||||
// so haystack[0] needs to be the memory with the highest match priority.
|
||||
haystack.append(next_search_span(search_limit()));
|
||||
if (haystack[0].size() < search_limit())
|
||||
haystack.append(next_search_span(search_limit() - haystack[0].size()));
|
||||
haystack.prepend(next_search_span(search_limit() - haystack[0].size()));
|
||||
|
||||
// TODO: `memmem` searches the memory in "natural" order, which means that it finds matches with a greater distance first.
|
||||
// Hash-based searching finds the shortest distances first, which is most likely better for encoding and memory efficiency.
|
||||
// Look into creating a `memmem_reverse`, which starts searching from the end.
|
||||
auto memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
||||
auto memmem_match = AK::memmem_reverse(haystack.begin(), haystack.end(), needle);
|
||||
while (memmem_match.has_value()) {
|
||||
auto match_offset = memmem_match.release_value();
|
||||
auto corrected_match_distance = search_limit() - haystack_offset_from_start - match_offset;
|
||||
auto corrected_match_distance = haystack_offset_from_end + match_offset;
|
||||
|
||||
// Validate the match through the set-distance-based implementation and extend it to the largest size possible.
|
||||
auto maybe_new_match = find_copy_in_seekback(Array { corrected_match_distance }, min(maximum_length, HASH_CHUNK_SIZE), minimum_length);
|
||||
@@ -445,13 +445,13 @@ Optional<SearchableCircularBuffer::Match> SearchableCircularBuffer::find_copy_in
|
||||
// Trim away the already processed bytes from the haystack.
|
||||
// Running out of haystack to discard is fine, in this case we found a match at the largest
|
||||
// distance and therefore tried to advance past that.
|
||||
haystack_offset_from_start += size_to_discard;
|
||||
haystack_offset_from_end += size_to_discard;
|
||||
while (size_to_discard > 0 && haystack.size() > 0) {
|
||||
if (haystack[0].size() <= size_to_discard) {
|
||||
size_to_discard -= haystack[0].size();
|
||||
haystack.remove(0);
|
||||
} else {
|
||||
haystack[0] = haystack[0].slice(size_to_discard);
|
||||
haystack[0] = haystack[0].slice(0, haystack[0].size() - size_to_discard);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -460,7 +460,7 @@ Optional<SearchableCircularBuffer::Match> SearchableCircularBuffer::find_copy_in
|
||||
break;
|
||||
|
||||
// Try and find the next match.
|
||||
memmem_match = AK::memmem(haystack.begin(), haystack.end(), needle);
|
||||
memmem_match = AK::memmem_reverse(haystack.begin(), haystack.end(), needle);
|
||||
}
|
||||
|
||||
// If we found a match of size HASH_CHUNK_SIZE, we should have already found that using the hash search. Investigate.
|
||||
|
||||
@@ -342,15 +342,11 @@ TEST_CASE(find_copy_in_seekback)
|
||||
auto written_needle_bytes = buffer.write(needle);
|
||||
VERIFY(written_needle_bytes == needle.size());
|
||||
|
||||
// Note: As of now, the preference during a tie is determined by which algorithm found the match.
|
||||
// Hash-based matching finds the shortest distance first, while memmem finds the greatest distance first.
|
||||
// A matching TODO can be found in CircularBuffer.cpp.
|
||||
|
||||
{
|
||||
// Find the largest match with a length between 1 and 1 (all "A").
|
||||
auto match = buffer.find_copy_in_seekback(1, 1);
|
||||
EXPECT(match.has_value());
|
||||
EXPECT_EQ(match.value().distance, 11ul);
|
||||
EXPECT_EQ(match.value().distance, 2ul);
|
||||
EXPECT_EQ(match.value().length, 1ul);
|
||||
}
|
||||
|
||||
@@ -358,7 +354,7 @@ TEST_CASE(find_copy_in_seekback)
|
||||
// Find the largest match with a length between 1 and 2 (all "AB", everything smaller gets eliminated).
|
||||
auto match = buffer.find_copy_in_seekback(2, 1);
|
||||
EXPECT(match.has_value());
|
||||
EXPECT_EQ(match.value().distance, 11ul);
|
||||
EXPECT_EQ(match.value().distance, 2ul);
|
||||
EXPECT_EQ(match.value().length, 2ul);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user