Python | 正規表現の性能とアンチパターン（上級向け）

Python

2022.03.192025.11.05

では、これまで作った Notebook を 完全版教材 に統合します。

危険パターン vs 安全パターンの表
処理時間計測（危険・安全）
爆発的バックトラッキングの 分岐図（Graphviz）
バックトラック試行回数の アニメーション（指数的増加）

すべて1つの Notebook で実行可能です。

完全版 Notebook サンプルコード

# ==========================
# セル1: 説明
# ==========================
"""
# 正規表現の性能と爆発的バックトラッキング 完全版

このノートブックでは次を学習できます：

1. 危険パターン vs 安全パターン
2. 実際の処理時間の比較
3. 爆発的バックトラッキングの分岐図
4. 試行回数の指数的増加のアニメーション

目的：Python 正規表現での「遅いパターン」を理解し、安全な書き方を学ぶ
"""

# ==========================
# セル1: 説明
# ==========================
"""
# 正規表現の性能と爆発的バックトラッキング 完全版

このノートブックでは次を学習できます：

1. 危険パターン vs 安全パターン
2. 実際の処理時間の比較
3. 爆発的バックトラッキングの分岐図
4. 試行回数の指数的増加のアニメーション

目的：Python 正規表現での「遅いパターン」を理解し、安全な書き方を学ぶ
"""

Python

# ==========================
# セル2: 危険パターンと安全パターン
# ==========================
patterns = [
    {"name":"多重ネスト", "danger": r"(a+)+$", "safe": r"a+$", "text": "a"*30+"b"},
    {"name":"選択の共通部分", "danger": r"(a|aa)+$", "safe": r"a+$", "text": "a"*30+"b"},
    {"name":"貪欲な任意文字", "danger": r"(.*a)+b", "safe": r"(.*?a)+b", "text": "a"*25+"b"},
    {"name":"オプションと量指定", "danger": r"(a+)?a{30}b", "safe": r"a{31}b", "text": "a"*31+"b"},
    {"name":"共通接頭辞OR", "danger": r"(a|b|ab)+c", "safe": r"[ab]+c", "text": "ababababx"},
    {"name":"数字多重ネスト", "danger": r"(\d+)*$", "safe": r"\d+$", "text": "1234567x"}
]

import pandas as pd
df = pd.DataFrame(patterns)
df[["name","danger","safe","text"]]

# ==========================
# セル2: 危険パターンと安全パターン
# ==========================
patterns = [
    {"name":"多重ネスト", "danger": r"(a+)+$", "safe": r"a+$", "text": "a"*30+"b"},
    {"name":"選択の共通部分", "danger": r"(a|aa)+$", "safe": r"a+$", "text": "a"*30+"b"},
    {"name":"貪欲な任意文字", "danger": r"(.*a)+b", "safe": r"(.*?a)+b", "text": "a"*25+"b"},
    {"name":"オプションと量指定", "danger": r"(a+)?a{30}b", "safe": r"a{31}b", "text": "a"*31+"b"},
    {"name":"共通接頭辞OR", "danger": r"(a|b|ab)+c", "safe": r"[ab]+c", "text": "ababababx"},
    {"name":"数字多重ネスト", "danger": r"(\d+)*$", "safe": r"\d+$", "text": "1234567x"}
]

import pandas as pd
df = pd.DataFrame(patterns)
df[["name","danger","safe","text"]]

Python

# ==========================
# セル3: 危険パターン処理時間
# ==========================
import re, time

print("=== 危険パターン ===")
for p in patterns:
    start = time.time()
    try:
        re.match(p['danger'], p['text'])
    except Exception as e:
        print(p["name"], "Error:", e)
    end = time.time()
    print(f"{p['name']}: {end-start:.5f} 秒")

# ==========================
# セル3: 危険パターン処理時間
# ==========================
import re, time

print("=== 危険パターン ===")
for p in patterns:
    start = time.time()
    try:
        re.match(p['danger'], p['text'])
    except Exception as e:
        print(p["name"], "Error:", e)
    end = time.time()
    print(f"{p['name']}: {end-start:.5f} 秒")

Python

# ==========================
# セル4: 安全パターン処理時間
# ==========================
print("=== 安全パターン ===")
for p in patterns:
    start = time.time()
    try:
        re.match(p['safe'], p['text'])
    except Exception as e:
        print(p["name"], "Error:", e)
    end = time.time()
    print(f"{p['name']}: {end-start:.5f} 秒")

# ==========================
# セル4: 安全パターン処理時間
# ==========================
print("=== 安全パターン ===")
for p in patterns:
    start = time.time()
    try:
        re.match(p['safe'], p['text'])
    except Exception as e:
        print(p["name"], "Error:", e)
    end = time.time()
    print(f"{p['name']}: {end-start:.5f} 秒")

Python

# ==========================
# セル5: 爆発的バックトラッキング 分岐図
# ==========================
from graphviz import Digraph
from IPython.display import Image

dot = Digraph(comment='Catastrophic Backtracking Example')
dot.node('S', 'Start')
dot.node('A1', 'a+ を1回目')
dot.node('A2', 'a+ を2回目')
dot.node('F', 'Fail (bで不一致)')
dot.node('M', 'Match (成功)')
dot.edges(['SA1', 'A1A2', 'A2F'])
dot.edge('A2', 'A1', label='バックトラック')

dot.render('backtracking_example', format='png', cleanup=True)
Image('backtracking_example.png')

# ==========================
# セル5: 爆発的バックトラッキング 分岐図
# ==========================
from graphviz import Digraph
from IPython.display import Image

dot = Digraph(comment='Catastrophic Backtracking Example')
dot.node('S', 'Start')
dot.node('A1', 'a+ を1回目')
dot.node('A2', 'a+ を2回目')
dot.node('F', 'Fail (bで不一致)')
dot.node('M', 'Match (成功)')
dot.edges(['SA1', 'A1A2', 'A2F'])
dot.edge('A2', 'A1', label='バックトラック')

dot.render('backtracking_example', format='png', cleanup=True)
Image('backtracking_example.png')

Python

# ==========================
# セル6: バックトラック試行回数 アニメーション
# ==========================
import matplotlib.pyplot as plt
import matplotlib.animation as animation

n_max = 15
x = list(range(1, n_max+1))
y = [2**(i-1) for i in x]  # 試行回数指数増加

fig, ax = plt.subplots()
line, = ax.plot([], [], 'ro-', lw=2)
ax.set_xlim(0, n_max+1)
ax.set_ylim(0, max(y)*1.1)
ax.set_xlabel('文字列長 n')
ax.set_ylabel('バックトラック試行回数')
ax.set_title('Catastrophic Backtrackingの試行回数増加')

def init():
    line.set_data([], [])
    return line,

def animate(i):
    line.set_data(x[:i+1], y[:i+1])
    return line,

ani = animation.FuncAnimation(fig, animate, frames=n_max, init_func=init, blit=True, repeat=False)
plt.show()

# ==========================
# セル6: バックトラック試行回数 アニメーション
# ==========================
import matplotlib.pyplot as plt
import matplotlib.animation as animation

n_max = 15
x = list(range(1, n_max+1))
y = [2**(i-1) for i in x]  # 試行回数指数増加

fig, ax = plt.subplots()
line, = ax.plot([], [], 'ro-', lw=2)
ax.set_xlim(0, n_max+1)
ax.set_ylim(0, max(y)*1.1)
ax.set_xlabel('文字列長 n')
ax.set_ylabel('バックトラック試行回数')
ax.set_title('Catastrophic Backtrackingの試行回数増加')

def init():
    line.set_data([], [])
    return line,

def animate(i):
    line.set_data(x[:i+1], y[:i+1])
    return line,

ani = animation.FuncAnimation(fig, animate, frames=n_max, init_func=init, blit=True, repeat=False)
plt.show()