From d259d67fd59eaaf3cd545a7e9188dff1adba48fe Mon Sep 17 00:00:00 2001 From: leo Date: Thu, 29 Jul 2021 20:42:44 +0800 Subject: [PATCH] optimize: simplify DiffPlex algorithm --- src/Commands/Diff.cs | 71 ++-------- src/Models/TextCompare.cs | 277 ++++++++++++++++++++++++++++++++++++++ src/SourceGit.csproj | 3 - src/SourceGit_48.csproj | 1 - 4 files changed, 287 insertions(+), 65 deletions(-) create mode 100644 src/Models/TextCompare.cs diff --git a/src/Commands/Diff.cs b/src/Commands/Diff.cs index 263bde30..9fdd4f9e 100644 --- a/src/Commands/Diff.cs +++ b/src/Commands/Diff.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using System.Text.RegularExpressions; namespace SourceGit.Commands { @@ -8,44 +9,12 @@ namespace SourceGit.Commands { /// public class Diff : Command { private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@"); - private static readonly string WORD_SEPS = " \t+-*/=!:;.'\"/?|&#@%`<>()[]{}\\"; - private Models.TextChanges changes = new Models.TextChanges(); private List deleted = new List(); private List added = new List(); - private Chunker chunker = new Chunker(); private int oldLine = 0; private int newLine = 0; - public class Chunker : DiffPlex.IChunker { - public string[] Chunk(string text) { - var start = 0; - var size = text.Length; - var chunks = new List(); - - for (int i = 0; i < size; i++) { -#if NET48 - var ch = text.Substring(i, 1); - if (WORD_SEPS.Contains(ch)) { - if (start != i) chunks.Add(text.Substring(start, i - start)); - chunks.Add(ch); - start = i + 1; - } -#else - var ch = text[i]; - if (WORD_SEPS.Contains(ch)) { - if (start != i) chunks.Add(text.Substring(start, i - start)); - chunks.Add(text.Substring(i, 1)); - start = i + 1; - } -#endif - } - - if (start < size) chunks.Add(text.Substring(start)); - return chunks.ToArray(); - } - } - public Diff(string repo, string args) { Cwd = repo; Args = $"diff --ignore-cr-at-eol --unified=4 {args}"; @@ -104,7 +73,7 @@ namespace SourceGit.Commands { } private void ProcessChanges() { - if (deleted.Count > 0) { + if (deleted.Any()) { if (added.Count == deleted.Count) { for (int i = added.Count - 1; i >= 0; i--) { var left = deleted[i]; @@ -112,36 +81,16 @@ namespace SourceGit.Commands { if (left.Content.Length > 1024 || right.Content.Length > 1024) continue; - var result = DiffPlex.Differ.Instance.CreateDiffs(left.Content, right.Content, false, false, chunker); - if (result.DiffBlocks.Count > 4) continue; + var chunks = Models.TextCompare.Process(left.Content, right.Content); + if (chunks.Count > 4) continue; - foreach (var block in result.DiffBlocks) { - if (block.DeleteCountA > 0) { - var startPos = 0; - for (int j = 0; j < block.DeleteStartA; j++) { - startPos += result.PiecesOld[j].Length; - } - - var deleteNum = 0; - for (int j = 0; j < block.DeleteCountA; j++) { - deleteNum += result.PiecesOld[j + block.DeleteStartA].Length; - } - - left.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, deleteNum)); + foreach (var chunk in chunks) { + if (chunk.DeletedCount > 0) { + left.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.DeletedStart, chunk.DeletedCount)); } - if (block.InsertCountB > 0) { - var startPos = 0; - for (int j = 0; j < block.InsertStartB; j++) { - startPos += result.PiecesNew[j].Length; - } - - var addedNum = 0; - for (int j = 0; j < block.InsertCountB; j++) { - addedNum += result.PiecesNew[j + block.InsertStartB].Length; - } - - right.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, addedNum)); + if (chunk.AddedCount > 0) { + right.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.AddedStart, chunk.AddedCount)); } } } @@ -151,7 +100,7 @@ namespace SourceGit.Commands { deleted.Clear(); } - if (added.Count > 0) { + if (added.Any()) { changes.Lines.AddRange(added); added.Clear(); } diff --git a/src/Models/TextCompare.cs b/src/Models/TextCompare.cs new file mode 100644 index 00000000..e5ff129a --- /dev/null +++ b/src/Models/TextCompare.cs @@ -0,0 +1,277 @@ +using System.Collections.Generic; + +namespace SourceGit.Models { + + /// + /// 字串差异对比,改写自DiffPlex + /// + public class TextCompare { + private static readonly HashSet SEPS = new HashSet(" \t+-*/=!,:;.'\"/?|&#@%`<>()[]{}\\".ToCharArray()); + + /// + /// 差异信息 + /// + public class Different { + public int DeletedStart { get; set; } + public int DeletedCount { get; set; } + public int AddedStart { get; set; } + public int AddedCount { get; set; } + + public Different(int dp, int dc, int ap, int ac) { + DeletedStart = dp; + DeletedCount = dc; + AddedStart = ap; + AddedCount = ac; + } + } + + /// + /// 分片 + /// + public class Chunk { + public int Hash; + public bool Modified; + public int Start; + public int Size; + + public Chunk(int hash, int start, int size) { + Hash = hash; + Modified = false; + Start = start; + Size = size; + } + } + + /// + /// 区间修改状态 + /// + public enum Edit { + None, + DeletedRight, + DeletedLeft, + AddedRight, + AddedLeft, + } + + /// + /// 当前区间检测结果 + /// + public class EditResult { + public Edit State; + public int DeleteStart; + public int DeleteEnd; + public int AddStart; + public int AddEnd; + } + + /// + /// 对比字串 + /// + /// + /// + /// + public static List Process(string oldValue, string newValue) { + var hashes = new Dictionary(); + var chunksOld = MakeChunks(hashes, oldValue); + var chunksNew = MakeChunks(hashes, newValue); + var sizeOld = chunksOld.Count; + var sizeNew = chunksNew.Count; + var max = sizeOld + sizeNew + 2; + var forward = new int[max]; + var reverse = new int[max]; + CheckModified(chunksOld, 0, sizeOld, chunksNew, 0, sizeNew, forward, reverse); + + var ret = new List(); + var posOld = 0; + var posNew = 0; + do { + while (posOld < sizeOld && posNew < sizeNew && !chunksOld[posOld].Modified && !chunksNew[posNew].Modified) { + posOld++; + posNew++; + } + + var beginOld = posOld; + var beginNew = posNew; + var countOld = 0; + var countNew = 0; + for (; posOld < sizeOld && chunksOld[posOld].Modified; posOld++) countOld += chunksOld[posOld].Size; + for (; posNew < sizeNew && chunksNew[posNew].Modified; posNew++) countNew += chunksNew[posNew].Size; + + if (countOld + countNew > 0) { + ret.Add(new Different( + countOld > 0 ? chunksOld[beginOld].Start : 0, + countOld, + countNew > 0 ? chunksNew[beginNew].Start : 0, + countNew)); + } + } while (posOld < sizeOld && posNew < sizeNew); + + return ret; + } + + private static List MakeChunks(Dictionary hashes, string text) { + var start = 0; + var size = text.Length; + var chunks = new List(); + + for (int i = 0; i < size; i++) { + var ch = text[i]; + if (SEPS.Contains(ch)) { + if (start != i) AddChunk(chunks, hashes, text.Substring(start, i - start), start); + AddChunk(chunks, hashes, text.Substring(i, 1), i); + start = i + 1; + } + } + + if (start < size) AddChunk(chunks, hashes, text.Substring(start), start); + return chunks; + } + + private static void CheckModified(List chunksOld, int startOld, int endOld, List chunksNew, int startNew, int endNew, int[] forward, int[] reverse) { + while (startOld < endOld && startNew < endNew && chunksOld[startOld].Hash == chunksNew[startNew].Hash) { + startOld++; + startNew++; + } + + while (startOld < endOld && startNew < endNew && chunksOld[endOld - 1].Hash == chunksNew[endNew - 1].Hash) { + endOld--; + endNew--; + } + + var lenOld = endOld - startOld; + var lenNew = endNew - startNew; + if (lenOld > 0 && lenNew > 0) { + var rs = CheckModifiedEdit(chunksOld, startOld, endOld, chunksNew, startNew, endNew, forward, reverse); + if (rs.State == Edit.None) return; + + if (rs.State == Edit.DeletedRight && rs.DeleteStart - 1 > startOld) { + chunksOld[--rs.DeleteStart].Modified = true; + } else if (rs.State == Edit.DeletedLeft && rs.DeleteEnd < endOld) { + chunksOld[rs.DeleteEnd++].Modified = true; + } else if (rs.State == Edit.AddedRight && rs.AddStart - 1 > startNew) { + chunksNew[--rs.AddStart].Modified = true; + } else if (rs.State == Edit.AddedLeft && rs.AddEnd < endNew) { + chunksNew[rs.AddEnd++].Modified = true; + } + + CheckModified(chunksOld, startOld, rs.DeleteStart, chunksNew, startNew, rs.AddStart, forward, reverse); + CheckModified(chunksOld, rs.DeleteEnd, endOld, chunksNew, rs.AddEnd, endNew, forward, reverse); + } else if (lenOld > 0) { + for (int i = startOld; i < endOld; i++) chunksOld[i].Modified = true; + } else if (lenNew > 0) { + for (int i = startNew; i < endNew; i++) chunksNew[i].Modified = true; + } + } + + private static EditResult CheckModifiedEdit(List chunksOld, int startOld, int endOld, List chunksNew, int startNew, int endNew, int[] forward, int[] reverse) { + var lenOld = endOld - startOld; + var lenNew = endNew - startNew; + var max = lenOld + lenNew + 1; + var half = max / 2; + var delta = lenOld - lenNew; + var deltaEven = delta % 2 == 0; + var rs = new EditResult() { State = Edit.None }; + + forward[1 + half] = 0; + reverse[1 + half] = lenOld + 1; + + for (int i = 0; i <= half; i++) { + + // 正向 + for (int j = -i; j <= i; j += 2) { + var idx = j + half; + int o, n; + if (j == -i || (j != i && forward[idx - 1] < forward[idx + 1])) { + o = forward[idx + 1]; + rs.State = Edit.AddedRight; + } else { + o = forward[idx - 1] + 1; + rs.State = Edit.DeletedRight; + } + + n = o - j; + + var startX = o; + var startY = n; + while (o < lenOld && n < lenNew && chunksOld[o + startOld].Hash == chunksNew[n + startNew].Hash) { + o++; + n++; + } + + forward[idx] = o; + + if (!deltaEven && j - delta >= -i + 1 && j - delta <= i - 1) { + var revIdx = (j - delta) + half; + var revOld = reverse[revIdx]; + int revNew = revOld - j; + if (revOld <= o && revNew <= n) { + if (i == 0) { + rs.State = Edit.None; + } else { + rs.DeleteStart = startX + startOld; + rs.DeleteEnd = o + startOld; + rs.AddStart = startY + startNew; + rs.AddEnd = n + startNew; + } + return rs; + } + } + } + + // 反向 + for (int j = -i; j <= i; j += 2) { + var idx = j + half; + int o, n; + if (j == -i || (j != i && reverse[idx + 1] <= reverse[idx - 1])) { + o = reverse[idx + 1] - 1; + rs.State = Edit.DeletedLeft; + } else { + o = reverse[idx - 1]; + rs.State = Edit.AddedLeft; + } + + n = o - (j + delta); + + var endX = o; + var endY = n; + while (o > 0 && n > 0 && chunksOld[startOld + o - 1].Hash == chunksNew[startNew + n - 1].Hash) { + o--; + n--; + } + + reverse[idx] = o; + + if (deltaEven && j + delta >= -i && j + delta <= i) { + var forIdx = (j + delta) + half; + var forOld = forward[forIdx]; + int forNew = forOld - (j + delta); + if (forOld >= o && forNew >= n) { + if (i == 0) { + rs.State = Edit.None; + } else { + rs.DeleteStart = o + startOld; + rs.DeleteEnd = endX + startOld; + rs.AddStart = n + startNew; + rs.AddEnd = endY + startNew; + } + return rs; + } + } + } + } + + throw new System.Exception("SHOULD NEVER GET HERE"); + } + + private static void AddChunk(List chunks, Dictionary hashes, string data, int start) { + int hash; + if (hashes.TryGetValue(data, out hash)) { + chunks.Add(new Chunk(hash, start, data.Length)); + } else { + hash = hashes.Count; + hashes.Add(data, hash); + chunks.Add(new Chunk(hash, start, data.Length)); + } + } + } +} diff --git a/src/SourceGit.csproj b/src/SourceGit.csproj index b54e6a34..a1452ac3 100644 --- a/src/SourceGit.csproj +++ b/src/SourceGit.csproj @@ -17,7 +17,4 @@ true none - - - \ No newline at end of file diff --git a/src/SourceGit_48.csproj b/src/SourceGit_48.csproj index 49b05041..df0d55cf 100644 --- a/src/SourceGit_48.csproj +++ b/src/SourceGit_48.csproj @@ -21,6 +21,5 @@ - \ No newline at end of file