以下は「実用的な正規表現全文集」に対応する自動テストスニペットです。各パターンに対して「マッチするケース」「マッチしないケース」を用意し、実行するとどのケースがマッチしたかをまとめて表示します。コピーしてそのまま実行してください(Python 3)。
import re
from textwrap import dedent
patterns = {
"jp_phone_flexible": {
"pattern": re.compile(rr"(?x)\b(?:\(?0\d{1,4}\)?[ \-–.]*)?\d{1,4}(?:[ \-–.]*\d{1,4})*(?:[ \-–.]*(?:ext|ext\.|x)\s*\d{1,5})?\b"),
"match": ["03-1234-5678", "(045) 123 4567", "0120 12 3456", "03.1234.5678 ext.123"],
"nonmatch": ["abc-1234-5678", "123456", "+81-3-1234-5678"]
},
"jp_mobile_strict": {
"pattern": re.compile(rr"\b(?:0(?:70|80|90)|050)[ \-–.]?\d{4}[ \-–.]?\d{4}\b"),
"match": ["090-1234-5678", "08012345678", "050 1234 5678"],
"nonmatch": ["070-123-4567", "091-1234-5678"]
},
"e164": {
"pattern": re.compile(rr"\+[1-9]\d{1,14}\b"),
"match": ["+819012345678", "+14155552671"],
"nonmatch": ["0819012345678", "+012345"]
},
"intl_flexible": {
"pattern": re.compile(rr"(?x)\b\+?[0-9]{1,3}[ \-.\(]*\d{1,4}[ \-.\)]*(?:\d{1,4}[ \-.\)]*)+(?:[ \-.,]*(?:ext|x|ext\.)[ \-]?\d{1,6})?\b"),
"match": ["+81 90-1234-5678", "+44 (20) 1234 5678 x123", "+1-415-555-2671 ext 45"],
"nonmatch": ["90-1234-5678", "+81-abc-def-ghij"]
},
"email_idn": {
"pattern": re.compile(rr"[A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Za-z0-9\-._~%]+(?:\.[A-Za-z0-9\-._~%]+)+"),
"match": ["user@example.com", "user.name+tag@sub.example.co.jp", "user@xn--wgv71a119e"],
"nonmatch": ["user@@example.com", "user@-example.com", '"quoted"@example']
},
"email_strict": {
"pattern": re.compile(dedent(r"""(?x)
^[A-Za-z0-9](?:[A-Za-z0-9._%+-]{0,62}[A-Za-z0-9])?
@
(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+
[A-Za-z]{2,63}$
""")),
"match": ["a@example.com", "long_local-part_123@example-domain.co"],
"nonmatch": [".startdot@example.com", "user@toolongtld.abcdefghijklmnop"]
},
"domain_idn": {
"pattern": re.compile(rr"\b(?:(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+(?:[A-Za-z]{2,63}|xn--[A-Za-z0-9-]+))\b"),
"match": ["example.com", "sub.example.co.jp", "xn--wgv71a119e.jp"],
"nonmatch": ["example..com", "-example.com"]
},
"ipv4_strict": {
"pattern": re.compile(rr"\b(?:(?:25[0-5]|2[0-4]\d|1?\d{1,2})\.){3}(?:25[0-5]|2[0-4]\d|1?\d{1,2})\b"),
"match": ["192.168.0.1", "0.0.0.0", "255.255.255.255"],
"nonmatch": ["256.100.1.1", "01.02.03.04"]
},
"ipv6_simple": {
"pattern": re.compile(rr"(?xi)\b(?:[0-9A-F]{1,4}:){7}[0-9A-F]{1,4}\b|\b(?:[0-9A-F]{1,4}:){0,7}::(?:[0-9A-F]{1,4}:){0,7}[0-9A-F]{0,4}\b"),
"match": ["2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80::1", "::1"],
"nonmatch": ["2001:db8:::1", "2001:db8:85a3:z:0:8a2e:0370:7334"]
},
"iso_datetime": {
"pattern": re.compile(rr"\b(19|20)\d{2}[-/\.](0[1-9]|1[0-2])[-/\.](0[1-9]|[12]\d|3[01])(?:[Tt ]([01]\d|2[0-3]):[0-5]\d(?::[0-5]\d)?)?(?:Z|[+\-](?:[01]\d|2[0-3]):[0-5]\d)?\b"),
"match": ["2023-07-15", "2023/07/15 12:34", "2023-07-15T23:59:59Z", "2023-07-15T09:30+09:00"],
"nonmatch": ["2023-13-01", "20230715"]
},
"currency": {
"pattern": re.compile(rr"\b(?:¥|\$|€|EUR)?\s*[+-]?\d{1,3}(?:[,\s]\d{3})*(?:\.\d+)?\b"),
"match": ["$1,234.56", "¥ 1 234", "EUR1234.5"],
"nonmatch": ["1,23,4", "$-"]
},
"cc_like": {
"pattern": re.compile(rr"\b(?:\d{4}[ \-]?){3}\d{4}\b|\b\d{15,16}\b"),
"match": ["4111 1111 1111 1111", "4111111111111111", "3782-822463-10005"],
"nonmatch": ["1234 567 890", "4111 1111 1111 111X"]
},
"json_key": {
"pattern": re.compile(rr'"\s*([A-Za-z0-9_]+)\s*"\s*:'),
"match": ['"name":', '"user_id" :', '"age": 30'],
"nonmatch": ['"complex key name":', "'single-quoted':"]
},
"url_http": {
"pattern": re.compile(rr"https?://(?P<host>[^:/\s]+)(?::(?P<port>\d{1,5}))?(?P<path>/[^\s]*)?"),
"match": ["http://example.com", "https://example.com:8080/path/to/page?x=1&y=2", "https://xn--wgv71a119e.jp/"],
"nonmatch": ["www.example.com", "http:/example.com"]
},
"win_path": {
"pattern": re.compile(rr"[A-Za-z]:\\(?:[^\\/:*?\"<>|\r\n]+\\)*[^\\/:*?\"<>|\r\n]*"),
"match": [r"C:\Users\Alice\Documents\file.txt", r"D:\a\b\c\"],
"nonmatch": ["C:Users\\Alice", r"C:\inva|id\name.txt"]
},
"jp_text": {
"pattern": re.compile(rr"[\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF]+"),
"match": ["こんにちは", "コンニチハ123", "東京都"],
"nonmatch": ["12345", "!@#"]
},
"float_num": {
"pattern": re.compile(rr"[+-]?(?:\d+\.\d*|\.\d+|\d+)(?:[eE][+-]?\d+)?"),
"match": ["3.14", "-0.5", "1e10", ".75"],
"nonmatch": ["..5", "1.2.3"]
},
"comma_amount": {
"pattern": re.compile(rr"\b\d{1,3}(?:,\d{3})*\b"),
"match": ["1,234,567", "123", "0"],
"nonmatch": ["12,34", "1,2345"]
},
"id_after_label": {
"pattern": re.compile(rr"(?<=ID:)\d+"),
"match": ["ID:12345", "UserID:ID:6789"], # note: second contains "ID:6789" substring
"nonmatch": ["ID:abc", "ID:"]
},
}
def test_patterns(specs):
results = []
for name, spec in specs.items():
pat = spec["pattern"]
matches_ok = []
nonmatches_ok = []
# test match cases: expect at least one match (search)
for s in spec["match"]:
ok = bool(pat.search(s))
matches_ok.append((s, ok))
# test nonmatch cases: expect no match
for s in spec["nonmatch"]:
ok = not bool(pat.search(s))
nonmatches_ok.append((s, ok))
results.append((name, matches_ok, nonmatches_ok))
return results
def print_report(results):
for name, matches_ok, nonmatches_ok in results:
print(f"\n=== {name} ===")
print(" match cases:")
for s, ok in matches_ok:
print(f" [{'OK' if ok else 'NG'}] {s!r}")
print(" non-match cases:")
for s, ok in nonmatches_ok:
print(f" [{'OK' if ok else 'NG'}] {s!r}")
if __name__ == "__main__":
res = test_patterns(patterns)
print_report(res)
Python使い方メモ
- 生文字列(r”…”) はパターン定義内で既に使っています。コピー時に raw プレフィックスを落としたり、バックスラッシュを変更しないでください。
- 各パターンは「典型的なマッチ/非マッチ」を示すテスト集合です。追加のケースは patterns[…] の “match”/”nonmatch” リストに足して試してください。
- 実運用では「抽出後の正規化(ハイフン削除、IDNA 変換、int/ datetime 変換など)」を必ず行ってから最終判定してください。
