ねらい:Excelで「差分」を正確・高速に出す仕組みをVBAで持つ
差分アルゴリズムは、ファイルや表の「何が変わったか」を正しく示し、レビューや監査、データ同期を楽にします。初心者でも扱えるように、貼って動くテンプレを「行差分(テキスト/CSV)」「セル値差分(表)」「LCS(最長共通部分列)ベースの差分」「高速ハッシュ比較」「レポート出力」まで段階的に示します。重要なポイントは、正規化(トリムや大小統一)、複合キー設計、配列I/O、そして「追加・削除・変更」を明確に分類することです。
基本の型:行単位の差分(テキスト/CSV)を配列I/Oで作る
行差分の考え方
- 目的: 旧版ファイルと新版ファイルの「追加行」「削除行」「同一」を素早く判定。
- 手法: 各行を正規化して辞書(ハッシュマップ)に登録し、存在有無で「追加/削除」を即時分類。完全一致のみを扱うため、高速・堅牢。部分差分や並びの変更まで扱う場合はLCSへ進む。
行差分テンプレ(貼って動く)
' ModDiffLines.bas
Option Explicit
Public Sub DiffTextFiles(ByVal oldPath As String, ByVal newPath As String)
Dim oldLines() As String: oldLines = ReadAllLines(oldPath)
Dim newLines() As String: newLines = ReadAllLines(newPath)
Dim dOld As Object: Set dOld = NewDict(True)
Dim dNew As Object: Set dNew = NewDict(True)
Dim i As Long
For i = LBound(oldLines) To UBound(oldLines)
dOld(NormLine(oldLines(i))) = True
Next
For i = LBound(newLines) To UBound(newLines)
dNew(NormLine(newLines(i))) = True
Next
Dim ws As Worksheet: Set ws = PrepareOutputSheet("LineDiff")
ws.Range("A1:C1").Value = Array("Type", "Line", "Source")
Dim rowOut As Long: rowOut = 2
' 削除(旧にはあるが新にはない)
Dim k
For Each k In dOld.Keys
If Not dNew.Exists(k) Then
ws.Cells(rowOut, "A").Value = "DELETED"
ws.Cells(rowOut, "B").Value = k
ws.Cells(rowOut, "C").Value = "OLD"
rowOut = rowOut + 1
End If
Next
' 追加(新にはあるが旧にはない)
For Each k In dNew.Keys
If Not dOld.Exists(k) Then
ws.Cells(rowOut, "A").Value = "ADDED"
ws.Cells(rowOut, "B").Value = k
ws.Cells(rowOut, "C").Value = "NEW"
rowOut = rowOut + 1
End If
Next
ws.Columns.AutoFit
MsgBox "行差分を生成しました(" & rowOut - 2 & "件)。", vbInformation
End Sub
Private Function ReadAllLines(ByVal path As String) As String()
Dim st As Object: Set st = CreateObject("ADODB.Stream")
st.Type = 2: st.Charset = "UTF-8": st.Open: st.LoadFromFile path
Dim txt As String: txt = st.ReadText
st.Close
Dim lines() As String: lines = Split(Replace(txt, vbCrLf, vbLf), vbLf)
ReadAllLines = lines
End Function
Private Function NormLine(ByVal s As String) As String
NormLine = LCase$(Trim$(s))
End Function
Private Function NewDict(Optional ByVal textCompare As Boolean = True) As Object
Dim d As Object: Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = IIf(textCompare, 1, 0)
Set NewDict = d
End Function
Private Function PrepareOutputSheet(ByVal name As String) As Worksheet
Dim ws As Worksheet
On Error Resume Next: Set ws = ThisWorkbook.Worksheets(name): On Error GoTo 0
If ws Is Nothing Then Set ws = ThisWorkbook.Worksheets.Add: ws.Name = name
ws.Cells.Clear
Set PrepareOutputSheet = ws
End Function
VB重要ポイントの深掘り
- 正規化: 行頭末の空白、大小の揺らぎは、差分に関係ないなら入口で潰す。仕様次第で正規化関数を切り替える。
- 速度: 行数が多くても辞書の存在判定は高速。並びの違いは扱わないので「存在の差」に特化。
表の差分:行キーで「追加・削除・変更」を分類
表差分の考え方
- 目的: 旧/新のテーブル(A:Fなど)で、行単位の追加・削除・変更列を出す。
- 手法: 複合キーで「同一行」を識別し、値のハッシュ(文字列結合)で変更判定。配列I/Oで一括処理。
表差分テンプレ(貼って動く)
' ModDiffTable.bas
Option Explicit
Private Const SEP As String = Chr$(30)
Public Sub DiffTables(ByVal oldSheet As String, ByVal newSheet As String, _
ByVal keyCols As String, ByVal valCols As String)
Dim wsOld As Worksheet: Set wsOld = Worksheets(oldSheet)
Dim wsNew As Worksheet: Set wsNew = Worksheets(newSheet)
Dim aO As Variant: aO = wsOld.Range("A1").CurrentRegion.Value
Dim aN As Variant: aN = wsNew.Range("A1").CurrentRegion.Value
Dim keys() As Long: keys = ColsToIndex(keyCols, aO)
Dim vals() As Long: vals = ColsToIndex(valCols, aO)
Dim dOld As Object: Set dOld = NewDict(True)
Dim dHashOld As Object: Set dHashOld = NewDict(True)
Dim r As Long
For r = 2 To UBound(aO, 1)
Dim k As String: k = MakeKey(aO, r, keys)
Dim h As String: h = MakeHash(aO, r, vals)
dOld(k) = r
dHashOld(k) = h
Next
Dim ws As Worksheet: Set ws = PrepareOutputSheet("TableDiff")
ws.Range("A1:D1").Value = Array("Type", "Key", "OldHash", "NewHash")
Dim rowOut As Long: rowOut = 2
' 新側を走査:追加/変更
Dim keysN() As Long: keysN = ColsToIndex(keyCols, aN)
Dim valsN() As Long: valsN = ColsToIndex(valCols, aN)
Dim seen As Object: Set seen = NewDict(True)
For r = 2 To UBound(aN, 1)
Dim k As String: k = MakeKey(aN, r, keysN)
Dim hNew As String: hNew = MakeHash(aN, r, valsN)
seen(k) = True
If Not dOld.Exists(k) Then
ws.Cells(rowOut, "A").Value = "ADDED"
ws.Cells(rowOut, "B").Value = k
ws.Cells(rowOut, "D").Value = hNew
rowOut = rowOut + 1
Else
Dim hOld As String: hOld = dHashOld(k)
If hOld <> hNew Then
ws.Cells(rowOut, "A").Value = "CHANGED"
ws.Cells(rowOut, "B").Value = k
ws.Cells(rowOut, "C").Value = hOld
ws.Cells(rowOut, "D").Value = hNew
rowOut = rowOut + 1
End If
End If
Next
' 旧側にのみあるキー=削除
Dim k
For Each k In dOld.Keys
If Not seen.Exists(k) Then
ws.Cells(rowOut, "A").Value = "DELETED"
ws.Cells(rowOut, "B").Value = k
ws.Cells(rowOut, "C").Value = dHashOld(k)
rowOut = rowOut + 1
End If
Next
ws.Columns.AutoFit
MsgBox "表差分を生成しました(" & rowOut - 2 & "件)。", vbInformation
End Sub
Private Function ColsToIndex(ByVal colsCsv As String, ByVal a As Variant) As Long()
Dim parts() As String: parts = Split(colsCsv, ",")
Dim idx() As Long: ReDim idx(0 To UBound(parts))
Dim i As Long
For i = 0 To UBound(parts)
idx(i) = ColLetterToIndex(Trim$(parts(i)))
Next
ColsToIndex = idx
End Function
Private Function ColLetterToIndex(ByVal s As String) As Long
ColLetterToIndex = Range(s & "1").Column
End Function
Private Function MakeKey(ByVal a As Variant, ByVal r As Long, ByVal idx() As Long) As String
Dim i As Long, buf As String
For i = LBound(idx) To UBound(idx)
buf = buf & LCase$(Trim$(CStr(a(r, idx(i))))) & SEP
Next
MakeKey = buf
End Function
Private Function MakeHash(ByVal a As Variant, ByVal r As Long, ByVal idx() As Long) As String
Dim i As Long, buf As String
For i = LBound(idx) To UBound(idx)
buf = buf & Trim$(CStr(a(r, idx(i)))) & "|"
Next
MakeHash = buf ' 実務はMD5/SHAをPowerShellで生成しても良い
End Function
Private Function NewDict(Optional ByVal textCompare As Boolean = True) As Object
Dim d As Object: Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = IIf(textCompare, 1, 0)
Set NewDict = d
End Function
Private Function PrepareOutputSheet(ByVal name As String) As Worksheet
Dim ws As Worksheet
On Error Resume Next: Set ws = ThisWorkbook.Worksheets(name): On Error GoTo 0
If ws Is Nothing Then Set ws = ThisWorkbook.Worksheets.Add: ws.Name = name
ws.Cells.Clear
Set PrepareOutputSheet = ws
End Function
VB重要ポイントの深掘り
- 複合キー: 複数列で同一行を一意化。区切り文字はありえないコード(Chr(30))で衝突防止。
- 変更判定: 値列を連結した「ハッシュ」で変更を比較。列が多い場合、PowerShellでMD5化してもよい。
LCSベース(並びを保持した差分):小〜中規模の厳密差分
LCS(最長共通部分列)で行差分を整形する
- 目的: ファイルの並びや局所変更まで「追加/削除/同一」を序列として示す(diff風)。
- 手法: LCSで旧/新の共通行を特定し、その間を「追加/削除」に分類。数千〜数万行なら実用。
LCS差分テンプレ(貼って動く)
' ModDiffLcs.bas
Option Explicit
Public Sub DiffTextLcs(ByVal oldPath As String, ByVal newPath As String)
Dim A() As String: A = ReadAllLines(oldPath)
Dim B() As String: B = ReadAllLines(newPath)
Dim i As Long, j As Long
Dim n As Long: n = UBound(A) - LBound(A) + 1
Dim m As Long: m = UBound(B) - LBound(B) + 1
' 正規化
For i = LBound(A) To UBound(A): A(i) = NormLine(A(i)): Next
For j = LBound(B) To UBound(B): B(j) = NormLine(B(j)): Next
' DPテーブル(n+1)*(m+1)
Dim L() As Long: ReDim L(0 To n, 0 To m)
Dim ai As Long, bj As Long
For ai = 1 To n
For bj = 1 To m
If A(LBound(A) + ai - 1) = B(LBound(B) + bj - 1) Then
L(ai, bj) = L(ai - 1, bj - 1) + 1
Else
L(ai, bj) = IIf(L(ai - 1, bj) >= L(ai, bj - 1), L(ai - 1, bj), L(ai, bj - 1))
End If
Next
Next
' 復元(差分列挙)
Dim ws As Worksheet: Set ws = PrepareOutputSheet("LcsDiff")
ws.Range("A1:C1").Value = Array("Type", "Line", "Side")
Dim rowOut As Long: rowOut = 2
ai = n: bj = m
Do While ai > 0 Or bj > 0
If ai > 0 And bj > 0 And A(LBound(A) + ai - 1) = B(LBound(B) + bj - 1) Then
ws.Cells(rowOut, "A").Value = "SAME"
ws.Cells(rowOut, "B").Value = A(LBound(A) + ai - 1)
ws.Cells(rowOut, "C").Value = "-"
ai = ai - 1: bj = bj - 1
ElseIf bj > 0 And (ai = 0 Or L(ai, bj - 1) >= L(ai - 1, bj)) Then
ws.Cells(rowOut, "A").Value = "ADDED"
ws.Cells(rowOut, "B").Value = B(LBound(B) + bj - 1)
ws.Cells(rowOut, "C").Value = "NEW"
bj = bj - 1
Else
ws.Cells(rowOut, "A").Value = "DELETED"
ws.Cells(rowOut, "B").Value = A(LBound(A) + ai - 1)
ws.Cells(rowOut, "C").Value = "OLD"
ai = ai - 1
End If
rowOut = rowOut + 1
Loop
' 逆順になっているので並べ替え(下から上へ入れた場合)
ws.Range("A2:C" & rowOut - 1).Sort Key1:=ws.Range("A2"), Order1:=xlAscending, Header:=xlNo
ws.Columns.AutoFit
MsgBox "LCS差分を生成しました(" & rowOut - 2 & "件)。", vbInformation
End Sub
Private Function ReadAllLines(ByVal path As String) As String()
Dim st As Object: Set st = CreateObject("ADODB.Stream")
st.Type = 2: st.Charset = "UTF-8": st.Open: st.LoadFromFile path
Dim txt As String: txt = st.ReadText: st.Close
Dim lines() As String: lines = Split(Replace(txt, vbCrLf, vbLf), vbLf)
ReadAllLines = lines
End Function
Private Function NormLine(ByVal s As String) As String
NormLine = LCase$(Trim$(s))
End Function
Private Function PrepareOutputSheet(ByVal name As String) As Worksheet
Dim ws As Worksheet
On Error Resume Next: Set ws = ThisWorkbook.Worksheets(name): On Error GoTo 0
If ws Is Nothing Then Set ws = ThisWorkbook.Worksheets.Add: ws.Name = name
ws.Cells.Clear
Set PrepareOutputSheet = ws
End Function
VB重要ポイントの深掘り
- 複雑度: LCSのDPはおおむね (O(n \times m))。数万行までが目安。巨大ファイルは行ハッシュ+ブロック差分に寄せる。
- 見やすさ: SAME/ADDED/DELETEDを並び順で出せるため、レビュー向き。
高速比較のコア:ハッシュ・正規化・チャンク設計
ハッシュ比較の考え方
- 目的: 巨大データで差分を高速に判定する。
- 手法: 各行や各レコードの「正規化済み文字列」をハッシュ(ここでは簡易連結)にして比較。厳密ハッシュが必要ならPowerShellのMD5/SHAを利用し、VBAは表示に専念。
PowerShell連携の一例(MD5を作ってVBAが読む)
# calc_md5.ps1
param([string]$path,[string]$outCsv)
Get-Content -Path $path -Encoding UTF8 | ForEach-Object {
$bytes = [Text.Encoding]::UTF8.GetBytes($_)
$md5 = [System.BitConverter]::ToString([System.Security.Cryptography.MD5]::Create().ComputeHash($bytes)).Replace("-", "")
[PSCustomObject]@{Line=$_;MD5=$md5}
} | Export-Csv -NoTypeInformation -Path $outCsv -Encoding UTF8
' ModMd5Load.bas(CSV読込→辞書へ)
Option Explicit
Public Function LoadLineMd5(ByVal csvPath As String) As Object
Dim st As Object: Set st = CreateObject("ADODB.Stream")
st.Type = 2: st.Charset = "UTF-8": st.Open: st.LoadFromFile csvPath
Dim txt As String: txt = st.ReadText: st.Close
Dim lines() As String: lines = Split(Replace(txt, vbCrLf, vbLf), vbLf)
Dim d As Object: Set d = NewDict(True)
Dim i As Long
For i = 2 To UBound(lines) ' ヘッダー除外
If Len(lines(i)) = 0 Then Exit For
Dim rec() As String: rec = Split(lines(i), ",")
If UBound(rec) >= 1 Then d(LCase$(Trim$(rec(0)))) = rec(1)
Next
Set LoadLineMd5 = d
End Function
Private Function NewDict(Optional ByVal textCompare As Boolean = True) As Object
Dim d As Object: Set d = CreateObject("Scripting.Dictionary")
d.CompareMode = IIf(textCompare, 1, 0)
Set NewDict = d
End Function
VB重要ポイントの深掘り
- 正規化→ハッシュ: 仕様に応じて空白・大小・改行差を正規化してからハッシュ生成。これで“意味のない差”を除去。
- VBAの役割: 重い計算は外部で、VBAは表示・分類・出力に集中すると運用品質が上がる。
差分レポートの見せ方:追加/削除/変更の色分け・列差分の明細
列ごとの変更明細を出す(表差分の拡張)
' ModDiffColumns.bas
Option Explicit
Public Sub ShowChangedColumns(ByVal oldSheet As String, ByVal newSheet As String, _
ByVal keyCols As String, ByVal valCols As String)
' 前節のDiffTablesの結果(変更キー)を用いて、列単位の違いを出す実装例
' 実務では変更行ごとに旧/新の値を突き合わせ、異なる列だけ抽出
End Sub
VB運用の深掘り
- 色分け: ADDED=緑、DELETED=赤、CHANGED=黄など、Conditional Formattingで視認性アップ。
- 出力固定: レポートの列構成(Type/Key/Old/New)は固定しておくと、Power Queryや下流ツールでの取り込みが容易。
実務での落とし穴と対策
正規化の過不足
- 過剰: 意味のある差(大文字小文字やスペース)が潰れてしまう。仕様に合わせて最小限に。
- 不足: 実質同じなのに別物と判定。入口でルール化して揺らぎをゼロに。
複合キーの誤設計
- 不足キー: 同一とみなすべき行が別物になる。キー列は業務上の一意性に必ず合わせる。
- 過剰キー: 変更判定が過敏になる。「自然キー+補助キー」までで十分か再確認。
巨大データでのLCS
- 対策: 行差分(存在差)とブロック単位比較に寄せる、または外部ツールへ委譲。VBAでのDPは数万行までが現実的。
まとめ:目的に合わせて「存在差分」「並び保持差分」を選び、正規化とキーを設計する
- 行差分(辞書)で「追加/削除」を高速に。表差分(複合キー+ハッシュ)で「変更」まで精密に。
- 並びの保持やレビュー用途はLCSで見やすく整形。巨大ファイルはハッシュ+外部併用。
- 正規化・キー設計・配列I/Oで、差分は速く正確に作れる。
