字符串匹配
字符串匹配
问题描述
给定文本串 和模式串
,要求找出
中所有与
相同的子串的起始位置。
KMP算法
算法思想
- 利用已匹配的信息,避免不必要的比较
- 构建next数组(失配函数),记录最长相等前后缀
- 失配时根据next数组回退模式串
时间复杂度
- 预处理时间:
- 匹配时间:
- 空间复杂度:
代码实现
class Solution {
private:
vector<int> buildNext(string& pattern) {
int m = pattern.length();
vector<int> next(m);
next[0] = -1;
int j = -1;
for (int i = 1; i < m; i++) {
while (j >= 0 && pattern[j + 1] != pattern[i]) {
j = next[j];
}
if (pattern[j + 1] == pattern[i]) j++;
next[i] = j;
}
return next;
}
public:
vector<int> kmpMatch(string& text, string& pattern) {
vector<int> positions;
int n = text.length(), m = pattern.length();
if (m == 0) return positions;
vector<int> next = buildNext(pattern);
int j = -1;
for (int i = 0; i < n; i++) {
while (j >= 0 && pattern[j + 1] != text[i]) {
j = next[j];
}
if (pattern[j + 1] == text[i]) j++;
if (j == m - 1) {
positions.push_back(i - m + 1);
j = next[j];
}
}
return positions;
}
};
class Solution {
private int[] buildNext(String pattern) {
int m = pattern.length();
int[] next = new int[m];
next[0] = -1;
int j = -1;
for (int i = 1; i < m; i++) {
while (j >= 0 && pattern.charAt(j + 1) != pattern.charAt(i)) {
j = next[j];
}
if (pattern.charAt(j + 1) == pattern.charAt(i)) j++;
next[i] = j;
}
return next;
}
public List<Integer> kmpMatch(String text, String pattern) {
List<Integer> positions = new ArrayList<>();
int n = text.length(), m = pattern.length();
if (m == 0) return positions;
int[] next = buildNext(pattern);
int j = -1;
for (int i = 0; i < n; i++) {
while (j >= 0 && pattern.charAt(j + 1) != text.charAt(i)) {
j = next[j];
}
if (pattern.charAt(j + 1) == text.charAt(i)) j++;
if (j == m - 1) {
positions.add(i - m + 1);
j = next[j];
}
}
return positions;
}
}
class Solution:
def buildNext(self, pattern: str) -> List[int]:
m = len(pattern)
next = [0] * m
next[0] = -1
j = -1
for i in range(1, m):
while j >= 0 and pattern[j + 1] != pattern[i]:
j = next[j]
if pattern[j + 1] == pattern[i]:
j += 1
next[i] = j
return next
def kmpMatch(self, text: str, pattern: str) -> List[int]:
positions = []
n, m = len(text), len(pattern)
if m == 0:
return positions
next = self.buildNext(pattern)
j = -1
for i in range(n):
while j >= 0 and pattern[j + 1] != text[i]:
j = next[j]
if pattern[j + 1] == text[i]:
j += 1
if j == m - 1:
positions.append(i - m + 1)
j = next[j]
return positions
next数组的含义
next数组记录了模式串中每个位置的最长相等前后缀长度:
- next[i] = k 表示 pattern[0...k] = pattern[i-k...i]
- next[0] = -1 作为特殊标记
- 失配时,j = next[j] 表示回退到最长相等前缀的末尾
应用场景
- 文本编辑器的查找功能
- DNA序列匹配
- 网络入侵检测
- 文件内容搜索
- 拼写检查
注意事项
- 处理空串的情况
- 字符串下标的处理
- next数组的初始化
- 匹配位置的计算
- 多次匹配的处理
牛客代码笔记-牛栋 文章被收录于专栏
汗牛充栋,学海无涯。<br/> 内含算法知识点讲解,以及牛客题库精选例题。<br/> 学习算法,从牛栋开始。