better fuzzing
This commit is contained in:
176
src/matcher.rs
176
src/matcher.rs
@@ -1,105 +1,89 @@
|
||||
use nucleo_matcher::{
|
||||
Config, Matcher,
|
||||
pattern::{AtomKind, CaseMatching, Normalization, Pattern},
|
||||
};
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
static MATCHER: OnceLock<Mutex<Matcher>> = OnceLock::new();
|
||||
use seal::pair::{AlignmentSet, InMemoryAlignmentMatrix, SmithWaterman, Step};
|
||||
|
||||
pub fn match_lines(code: &str, start: &str, end: &str) -> Option<(usize, usize)> {
|
||||
let mut matcher = MATCHER
|
||||
.get_or_init(|| Mutex::new(Matcher::new(Config::DEFAULT)))
|
||||
.lock()
|
||||
let code_chars: Vec<char> = code.chars().collect();
|
||||
|
||||
if code_chars.is_empty() || start.is_empty() || end.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let strategy = SmithWaterman::new(3, -1, -1, -1);
|
||||
|
||||
if start.ends_with(end) {
|
||||
let pattern_chars: Vec<char> = start.chars().collect();
|
||||
let set: AlignmentSet<InMemoryAlignmentMatrix> =
|
||||
AlignmentSet::new(code_chars.len(), pattern_chars.len(), strategy, |x, y| {
|
||||
code_chars[x] == pattern_chars[y]
|
||||
})
|
||||
.ok()?;
|
||||
|
||||
let alignment = set.local_alignment();
|
||||
let mut start_char_pos = None;
|
||||
let mut end_char_pos = None;
|
||||
|
||||
for step in alignment.steps() {
|
||||
if let Step::Align { x, .. } = step {
|
||||
if start_char_pos.is_none() {
|
||||
start_char_pos = Some(x);
|
||||
}
|
||||
end_char_pos = Some(x);
|
||||
}
|
||||
}
|
||||
|
||||
let start_char = start_char_pos?;
|
||||
let end_char = end_char_pos?;
|
||||
|
||||
let start_line = code[..start_char].lines().count().saturating_sub(1);
|
||||
let end_line = code[..=end_char].lines().count().saturating_sub(1);
|
||||
|
||||
Some((start_line, end_line))
|
||||
} else {
|
||||
let start_chars: Vec<char> = start.chars().collect();
|
||||
let start_set: AlignmentSet<InMemoryAlignmentMatrix> = AlignmentSet::new(
|
||||
code_chars.len(),
|
||||
start_chars.len(),
|
||||
strategy.clone(),
|
||||
|x, y| code_chars[x] == start_chars[y],
|
||||
)
|
||||
.ok()?;
|
||||
|
||||
let lines: Vec<&str> = code.lines().collect();
|
||||
let n = lines.len();
|
||||
let start_alignment = start_set.local_alignment();
|
||||
let start_char_pos = start_alignment
|
||||
.steps()
|
||||
.filter_map(|step| {
|
||||
if let Step::Align { x, .. } = step {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()?;
|
||||
|
||||
let start_parts: Vec<&str> = start.split('\n').collect();
|
||||
let end_parts: Vec<&str> = end.split('\n').collect();
|
||||
let end_chars: Vec<char> = end.chars().collect();
|
||||
let remaining_code: Vec<char> = code_chars[start_char_pos..].to_vec();
|
||||
let end_set: AlignmentSet<InMemoryAlignmentMatrix> =
|
||||
AlignmentSet::new(remaining_code.len(), end_chars.len(), strategy, |x, y| {
|
||||
remaining_code[x] == end_chars[y]
|
||||
})
|
||||
.ok()?;
|
||||
|
||||
let start_len = start_parts.len();
|
||||
let end_len = end_parts.len();
|
||||
let end_alignment = end_set.local_alignments().next().unwrap();
|
||||
let end_char_pos_relative = end_alignment
|
||||
.steps()
|
||||
.filter_map(|step| {
|
||||
if let Step::Align { x, .. } = step {
|
||||
Some(x)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.last()?;
|
||||
|
||||
if start_len == 0 || end_len == 0 || n == 0 {
|
||||
return None;
|
||||
let end_char_pos = start_char_pos + end_char_pos_relative;
|
||||
|
||||
let start_line = code[..start_char_pos].lines().count().saturating_sub(1);
|
||||
let end_line = code[..=end_char_pos].lines().count().saturating_sub(1);
|
||||
|
||||
Some((start_line, end_line))
|
||||
}
|
||||
|
||||
let non_empty: Vec<bool> = lines.iter().map(|line| !line.trim().is_empty()).collect();
|
||||
let nei: Vec<usize> = (0..n).filter(|&i| non_empty[i]).collect();
|
||||
let m = nei.len();
|
||||
|
||||
if m < start_len || m < end_len {
|
||||
return None;
|
||||
}
|
||||
|
||||
let start_patterns: Vec<Pattern> = start_parts
|
||||
.iter()
|
||||
.map(|&part| {
|
||||
Pattern::new(
|
||||
part,
|
||||
CaseMatching::Ignore,
|
||||
Normalization::Smart,
|
||||
AtomKind::Fuzzy,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let end_patterns: Vec<Pattern> = end_parts
|
||||
.iter()
|
||||
.map(|&part| {
|
||||
Pattern::new(
|
||||
part,
|
||||
CaseMatching::Ignore,
|
||||
Normalization::Smart,
|
||||
AtomKind::Fuzzy,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut start_candidates: Vec<(usize, usize)> = vec![];
|
||||
|
||||
for p in 0..=m - start_len {
|
||||
let matches_all = (0..start_len).all(|k| {
|
||||
let line = lines[nei[p + k]];
|
||||
!start_patterns[k]
|
||||
.match_list(std::iter::once(line), &mut matcher)
|
||||
.is_empty()
|
||||
});
|
||||
if matches_all {
|
||||
start_candidates.push((nei[p], nei[p + start_len - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
let mut end_candidates: Vec<(usize, usize)> = vec![];
|
||||
|
||||
for p in 0..=m - end_len {
|
||||
let matches_all = (0..end_len).all(|k| {
|
||||
let line = lines[nei[p + k]];
|
||||
!end_patterns[k]
|
||||
.match_list(std::iter::once(line), &mut matcher)
|
||||
.is_empty()
|
||||
});
|
||||
if matches_all {
|
||||
end_candidates.push((nei[p], nei[p + end_len - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
let mut result = None;
|
||||
|
||||
for &(s_start, s_end) in &start_candidates {
|
||||
let pos = end_candidates.partition_point(|&(e_start, _)| e_start <= s_end);
|
||||
|
||||
if end_candidates[pos..].len() == 1 {
|
||||
let (_, e_end) = end_candidates[pos];
|
||||
if result.is_some() {
|
||||
return None;
|
||||
}
|
||||
result = Some((s_start, e_end));
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user