字符串匹配算法

  1. def brute_force(t: str, p: str):
  2. # 暴力破解
  3. i = 0
  4. while i < len(t):
  5. j = 0
  6. while j < len(p):
  7. if t[i + j] != p[j]:
  8. break
  9. j = j + 1
  10. if j == len(p):
  11. return i
  12. i = i + 1
  13. return False
  14. def brute_force1(s: str, p: str):
  15. # aaaab, aaab
  16. # 暴力破解, 很接近kmp+3
  17. i = 0
  18. j = 0
  19. temp = 0
  20. while i < len(s) and j < len(p): # 考虑去掉一重f循环, 当内外层循环控制变量会有某种联系时候
  21. if (s[i] == p[j]):
  22. j = j + 1
  23. temp = temp + 1
  24. else: # 当前匹配失败(j的更新没有考虑前后缀问题)
  25. j = 0 # 恢复j
  26. i = i - temp # 恢复i
  27. temp = 0
  28. i = i + 1
  29. if j == len(p):
  30. return i - len(p)
  31. else:
  32. return False
  33. def get_next(p: str) -> []:
  34. # 基于前缀后缀
  35. #
  36. next: list[int] = [0]
  37. count = 0
  38. i = 0
  39. j = 0
  40. s = p[1:]
  41. p = p[:len(p) - 1]
  42. while i < len(s):
  43. # i 控制的是 当前 串的后缀, j 是前缀。只考虑当前串, 后缀某个字符开始, 和字符串开头字符开始匹配, 取其最大, 称为最大公共字串
  44. if (s[i] == p[j]):
  45. j = j + 1
  46. next.append(j)
  47. else: #
  48. if j == 0: # s_i ! = p_j and j == 0 表示第一个就匹配不上, 必定没有公共缀
  49. next.append(j)
  50. else: # s_i != p_J and j != 0 可能从头还会匹配上, 保持i
  51. j = 0
  52. continue
  53. i = i + 1
  54. return next
  55. def kmp(s: str, p: str) -> bool:
  56. i = 0
  57. j = 0
  58. next = get_next(p)
  59. while i < len(s) and j < len(p):
  60. if (s[i] == p[j]):
  61. j = j + 1
  62. else: #
  63. j = next[j - 1] + 1
  64. i = i + 1
  65. if j == len(p):
  66. return i - len(p)
  67. else:
  68. return False
  69. if __name__ == '__main__':
  70. print(brute_force('helloworld', 'world'))
  71. print(brute_force1('helloworld', 'world'))
  72. print(kmp('helloworld', 'world'))
  73. s = 'aaaab'
  74. p = 'aaab'
  75. print(kmp(s, p))

总结

  • 双重循环, 考虑去掉一重
  • 从暴力开始优化