LibRegex: Fix backreferences to undefined capture groups

Fixes handling of backreferences when the referenced capture group is
undefined or hasn't participated in the match.
CharacterCompareType::NamedReference is added to distinguish numbered
(\1) from named (\k<name>) backreferences. Numbered backreferences use
exact group lookup. Named backreferences search for participating
groups among duplicates.
This commit is contained in:
aplefull
2025-07-23 20:48:34 +02:00
committed by Ali Mohammad Pur
parent 9b8f6b8108
commit c4eef822de
Notes: github-actions[bot] 2025-10-16 14:39:20 +00:00
5 changed files with 197 additions and 9 deletions

View File

@@ -131,6 +131,7 @@ static bool interpret_compares(Vector<CompareTypeAndValuePair> const& lhs, Stati
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
return false;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
// We've handled this before coming here.
break;
case CharacterCompareType::Property:
@@ -512,6 +513,7 @@ static bool has_overlap(Vector<CompareTypeAndValuePair> const& lhs, Vector<Compa
// We've transformed this into a series of ranges in flat_compares(), so bail out if we see it.
return true;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
// We've handled this before coming here.
break;
case CharacterCompareType::Property:
@@ -755,7 +757,7 @@ static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_preconditi
break;
if (any_of(compares, [&](auto& compare) {
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference;
return compare.type == CharacterCompareType::AnyChar || compare.type == CharacterCompareType::Reference || compare.type == CharacterCompareType::NamedReference;
}))
return AtomicRewritePreconditionResult::NotSatisfied;
@@ -1835,6 +1837,7 @@ static LookupTableInsertionOutcome insert_into_lookup_table(RedBlackTree<ByteCod
case CharacterCompareType::And:
return LookupTableInsertionOutcome::FlushOnInsertion;
case CharacterCompareType::Reference:
case CharacterCompareType::NamedReference:
case CharacterCompareType::Property:
case CharacterCompareType::GeneralCategory:
case CharacterCompareType::Script: