Python | 正規表現テストスニペット(複数ケースを自動判定するコード)

Python
スポンサーリンク

以下は「実用的な正規表現全文集」に対応する自動テストスニペットです。各パターンに対して「マッチするケース」「マッチしないケース」を用意し、実行するとどのケースがマッチしたかをまとめて表示します。コピーしてそのまま実行してください(Python 3)。

import re
from textwrap import dedent

patterns = {
    "jp_phone_flexible": {
        "pattern": re.compile(rr"(?x)\b(?:\(?0\d{1,4}\)?[ \-–.]*)?\d{1,4}(?:[ \-–.]*\d{1,4})*(?:[ \-–.]*(?:ext|ext\.|x)\s*\d{1,5})?\b"),
        "match": ["03-1234-5678", "(045) 123 4567", "0120 12 3456", "03.1234.5678 ext.123"],
        "nonmatch": ["abc-1234-5678", "123456", "+81-3-1234-5678"]
    },
    "jp_mobile_strict": {
        "pattern": re.compile(rr"\b(?:0(?:70|80|90)|050)[ \-–.]?\d{4}[ \-–.]?\d{4}\b"),
        "match": ["090-1234-5678", "08012345678", "050 1234 5678"],
        "nonmatch": ["070-123-4567", "091-1234-5678"]
    },
    "e164": {
        "pattern": re.compile(rr"\+[1-9]\d{1,14}\b"),
        "match": ["+819012345678", "+14155552671"],
        "nonmatch": ["0819012345678", "+012345"]
    },
    "intl_flexible": {
        "pattern": re.compile(rr"(?x)\b\+?[0-9]{1,3}[ \-.\(]*\d{1,4}[ \-.\)]*(?:\d{1,4}[ \-.\)]*)+(?:[ \-.,]*(?:ext|x|ext\.)[ \-]?\d{1,6})?\b"),
        "match": ["+81 90-1234-5678", "+44 (20) 1234 5678 x123", "+1-415-555-2671 ext 45"],
        "nonmatch": ["90-1234-5678", "+81-abc-def-ghij"]
    },
    "email_idn": {
        "pattern": re.compile(rr"[A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Za-z0-9\-._~%]+(?:\.[A-Za-z0-9\-._~%]+)+"),
        "match": ["user@example.com", "user.name+tag@sub.example.co.jp", "user@xn--wgv71a119e"],
        "nonmatch": ["user@@example.com", "user@-example.com", '"quoted"@example']
    },
    "email_strict": {
        "pattern": re.compile(dedent(r"""(?x)
            ^[A-Za-z0-9](?:[A-Za-z0-9._%+-]{0,62}[A-Za-z0-9])?
            @
            (?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+
            [A-Za-z]{2,63}$
        """)),
        "match": ["a@example.com", "long_local-part_123@example-domain.co"],
        "nonmatch": [".startdot@example.com", "user@toolongtld.abcdefghijklmnop"]
    },
    "domain_idn": {
        "pattern": re.compile(rr"\b(?:(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+(?:[A-Za-z]{2,63}|xn--[A-Za-z0-9-]+))\b"),
        "match": ["example.com", "sub.example.co.jp", "xn--wgv71a119e.jp"],
        "nonmatch": ["example..com", "-example.com"]
    },
    "ipv4_strict": {
        "pattern": re.compile(rr"\b(?:(?:25[0-5]|2[0-4]\d|1?\d{1,2})\.){3}(?:25[0-5]|2[0-4]\d|1?\d{1,2})\b"),
        "match": ["192.168.0.1", "0.0.0.0", "255.255.255.255"],
        "nonmatch": ["256.100.1.1", "01.02.03.04"]
    },
    "ipv6_simple": {
        "pattern": re.compile(rr"(?xi)\b(?:[0-9A-F]{1,4}:){7}[0-9A-F]{1,4}\b|\b(?:[0-9A-F]{1,4}:){0,7}::(?:[0-9A-F]{1,4}:){0,7}[0-9A-F]{0,4}\b"),
        "match": ["2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80::1", "::1"],
        "nonmatch": ["2001:db8:::1", "2001:db8:85a3:z:0:8a2e:0370:7334"]
    },
    "iso_datetime": {
        "pattern": re.compile(rr"\b(19|20)\d{2}[-/\.](0[1-9]|1[0-2])[-/\.](0[1-9]|[12]\d|3[01])(?:[Tt ]([01]\d|2[0-3]):[0-5]\d(?::[0-5]\d)?)?(?:Z|[+\-](?:[01]\d|2[0-3]):[0-5]\d)?\b"),
        "match": ["2023-07-15", "2023/07/15 12:34", "2023-07-15T23:59:59Z", "2023-07-15T09:30+09:00"],
        "nonmatch": ["2023-13-01", "20230715"]
    },
    "currency": {
        "pattern": re.compile(rr"\b(?:¥|\$|€|EUR)?\s*[+-]?\d{1,3}(?:[,\s]\d{3})*(?:\.\d+)?\b"),
        "match": ["$1,234.56", "¥ 1 234", "EUR1234.5"],
        "nonmatch": ["1,23,4", "$-"]
    },
    "cc_like": {
        "pattern": re.compile(rr"\b(?:\d{4}[ \-]?){3}\d{4}\b|\b\d{15,16}\b"),
        "match": ["4111 1111 1111 1111", "4111111111111111", "3782-822463-10005"],
        "nonmatch": ["1234 567 890", "4111 1111 1111 111X"]
    },
    "json_key": {
        "pattern": re.compile(rr'"\s*([A-Za-z0-9_]+)\s*"\s*:'),
        "match": ['"name":', '"user_id" :', '"age": 30'],
        "nonmatch": ['"complex key name":', "'single-quoted':"]
    },
    "url_http": {
        "pattern": re.compile(rr"https?://(?P<host>[^:/\s]+)(?::(?P<port>\d{1,5}))?(?P<path>/[^\s]*)?"),
        "match": ["http://example.com", "https://example.com:8080/path/to/page?x=1&y=2", "https://xn--wgv71a119e.jp/"],
        "nonmatch": ["www.example.com", "http:/example.com"]
    },
    "win_path": {
        "pattern": re.compile(rr"[A-Za-z]:\\(?:[^\\/:*?\"<>|\r\n]+\\)*[^\\/:*?\"<>|\r\n]*"),
        "match": [r"C:\Users\Alice\Documents\file.txt", r"D:\a\b\c\"],
        "nonmatch": ["C:Users\\Alice", r"C:\inva|id\name.txt"]
    },
    "jp_text": {
        "pattern": re.compile(rr"[\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]+"),
        "match": ["こんにちは", "コンニチハ123", "東京都"],
        "nonmatch": ["12345", "!@#"]
    },
    "float_num": {
        "pattern": re.compile(rr"[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:[eE][+-]?\d+)?"),
        "match": ["3.14", "-0.5", "1e10", ".75"],
        "nonmatch": ["..5", "1.2.3"]
    },
    "comma_amount": {
        "pattern": re.compile(rr"\b\d{1,3}(?:,\d{3})*\b"),
        "match": ["1,234,567", "123", "0"],
        "nonmatch": ["12,34", "1,2345"]
    },
    "id_after_label": {
        "pattern": re.compile(rr"(?<=ID:)\d+"),
        "match": ["ID:12345", "UserID:ID:6789"],  # note: second contains "ID:6789" substring
        "nonmatch": ["ID:abc", "ID:"]
    },
}

def test_patterns(specs):
    results = []
    for name, spec in specs.items():
        pat = spec["pattern"]
        matches_ok = []
        nonmatches_ok = []
        # test match cases: expect at least one match (search)
        for s in spec["match"]:
            ok = bool(pat.search(s))
            matches_ok.append((s, ok))
        # test nonmatch cases: expect no match
        for s in spec["nonmatch"]:
            ok = not bool(pat.search(s))
            nonmatches_ok.append((s, ok))
        results.append((name, matches_ok, nonmatches_ok))
    return results

def print_report(results):
    for name, matches_ok, nonmatches_ok in results:
        print(f"\n=== {name} ===")
        print(" match cases:")
        for s, ok in matches_ok:
            print(f"  [{'OK' if ok else 'NG'}] {s!r}")
        print(" non-match cases:")
        for s, ok in nonmatches_ok:
            print(f"  [{'OK' if ok else 'NG'}] {s!r}")

if __name__ == "__main__":
    res = test_patterns(patterns)
    print_report(res)
Python

使い方メモ

  • 生文字列(r”…”) はパターン定義内で既に使っています。コピー時に raw プレフィックスを落としたり、バックスラッシュを変更しないでください。
  • 各パターンは「典型的なマッチ/非マッチ」を示すテスト集合です。追加のケースは patterns[…] の “match”/”nonmatch” リストに足して試してください。
  • 実運用では「抽出後の正規化(ハイフン削除、IDNA 変換、int/ datetime 変換など)」を必ず行ってから最終判定してください。

Python
スポンサーリンク
シェアする
@lifehackerをフォローする
スポンサーリンク
タイトルとURLをコピーしました