better fuzzing

This commit is contained in:
senstella
2025-10-08 15:19:30 +09:00
parent 91b4d10cc5
commit 117a4c2dc9
6 changed files with 4180 additions and 192 deletions

View File

@@ -1,105 +1,89 @@
use nucleo_matcher::{
Config, Matcher,
pattern::{AtomKind, CaseMatching, Normalization, Pattern},
};
use std::sync::{Mutex, OnceLock};
static MATCHER: OnceLock<Mutex<Matcher>> = OnceLock::new();
use seal::pair::{AlignmentSet, InMemoryAlignmentMatrix, SmithWaterman, Step};
pub fn match_lines(code: &str, start: &str, end: &str) -> Option<(usize, usize)> {
let mut matcher = MATCHER
.get_or_init(|| Mutex::new(Matcher::new(Config::DEFAULT)))
.lock()
let code_chars: Vec<char> = code.chars().collect();
if code_chars.is_empty() || start.is_empty() || end.is_empty() {
return None;
}
let strategy = SmithWaterman::new(3, -1, -1, -1);
if start.ends_with(end) {
let pattern_chars: Vec<char> = start.chars().collect();
let set: AlignmentSet<InMemoryAlignmentMatrix> =
AlignmentSet::new(code_chars.len(), pattern_chars.len(), strategy, |x, y| {
code_chars[x] == pattern_chars[y]
})
.ok()?;
let alignment = set.local_alignment();
let mut start_char_pos = None;
let mut end_char_pos = None;
for step in alignment.steps() {
if let Step::Align { x, .. } = step {
if start_char_pos.is_none() {
start_char_pos = Some(x);
}
end_char_pos = Some(x);
}
}
let start_char = start_char_pos?;
let end_char = end_char_pos?;
let start_line = code[..start_char].lines().count().saturating_sub(1);
let end_line = code[..=end_char].lines().count().saturating_sub(1);
Some((start_line, end_line))
} else {
let start_chars: Vec<char> = start.chars().collect();
let start_set: AlignmentSet<InMemoryAlignmentMatrix> = AlignmentSet::new(
code_chars.len(),
start_chars.len(),
strategy.clone(),
|x, y| code_chars[x] == start_chars[y],
)
.ok()?;
let lines: Vec<&str> = code.lines().collect();
let n = lines.len();
let start_alignment = start_set.local_alignment();
let start_char_pos = start_alignment
.steps()
.filter_map(|step| {
if let Step::Align { x, .. } = step {
Some(x)
} else {
None
}
})
.next()?;
let start_parts: Vec<&str> = start.split('\n').collect();
let end_parts: Vec<&str> = end.split('\n').collect();
let end_chars: Vec<char> = end.chars().collect();
let remaining_code: Vec<char> = code_chars[start_char_pos..].to_vec();
let end_set: AlignmentSet<InMemoryAlignmentMatrix> =
AlignmentSet::new(remaining_code.len(), end_chars.len(), strategy, |x, y| {
remaining_code[x] == end_chars[y]
})
.ok()?;
let start_len = start_parts.len();
let end_len = end_parts.len();
let end_alignment = end_set.local_alignments().next().unwrap();
let end_char_pos_relative = end_alignment
.steps()
.filter_map(|step| {
if let Step::Align { x, .. } = step {
Some(x)
} else {
None
}
})
.last()?;
if start_len == 0 || end_len == 0 || n == 0 {
return None;
let end_char_pos = start_char_pos + end_char_pos_relative;
let start_line = code[..start_char_pos].lines().count().saturating_sub(1);
let end_line = code[..=end_char_pos].lines().count().saturating_sub(1);
Some((start_line, end_line))
}
let non_empty: Vec<bool> = lines.iter().map(|line| !line.trim().is_empty()).collect();
let nei: Vec<usize> = (0..n).filter(|&i| non_empty[i]).collect();
let m = nei.len();
if m < start_len || m < end_len {
return None;
}
let start_patterns: Vec<Pattern> = start_parts
.iter()
.map(|&part| {
Pattern::new(
part,
CaseMatching::Ignore,
Normalization::Smart,
AtomKind::Fuzzy,
)
})
.collect();
let end_patterns: Vec<Pattern> = end_parts
.iter()
.map(|&part| {
Pattern::new(
part,
CaseMatching::Ignore,
Normalization::Smart,
AtomKind::Fuzzy,
)
})
.collect();
let mut start_candidates: Vec<(usize, usize)> = vec![];
for p in 0..=m - start_len {
let matches_all = (0..start_len).all(|k| {
let line = lines[nei[p + k]];
!start_patterns[k]
.match_list(std::iter::once(line), &mut matcher)
.is_empty()
});
if matches_all {
start_candidates.push((nei[p], nei[p + start_len - 1]));
}
}
let mut end_candidates: Vec<(usize, usize)> = vec![];
for p in 0..=m - end_len {
let matches_all = (0..end_len).all(|k| {
let line = lines[nei[p + k]];
!end_patterns[k]
.match_list(std::iter::once(line), &mut matcher)
.is_empty()
});
if matches_all {
end_candidates.push((nei[p], nei[p + end_len - 1]));
}
}
let mut result = None;
for &(s_start, s_end) in &start_candidates {
let pos = end_candidates.partition_point(|&(e_start, _)| e_start <= s_end);
if end_candidates[pos..].len() == 1 {
let (_, e_end) = end_candidates[pos];
if result.is_some() {
return None;
}
result = Some((s_start, e_end));
} else {
return None;
}
}
result
}