optimize<Diff>: simplify DiffPlex algorithm

This commit is contained in:
leo 2021-07-29 20:42:44 +08:00
parent 44873cf088
commit d259d67fd5
4 changed files with 287 additions and 65 deletions

View file

@ -1,5 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace SourceGit.Commands { namespace SourceGit.Commands {
@ -8,44 +9,12 @@ namespace SourceGit.Commands {
/// </summary> /// </summary>
public class Diff : Command { public class Diff : Command {
private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@"); private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@");
private static readonly string WORD_SEPS = " \t+-*/=!:;.'\"/?|&#@%`<>()[]{}\\";
private Models.TextChanges changes = new Models.TextChanges(); private Models.TextChanges changes = new Models.TextChanges();
private List<Models.TextChanges.Line> deleted = new List<Models.TextChanges.Line>(); private List<Models.TextChanges.Line> deleted = new List<Models.TextChanges.Line>();
private List<Models.TextChanges.Line> added = new List<Models.TextChanges.Line>(); private List<Models.TextChanges.Line> added = new List<Models.TextChanges.Line>();
private Chunker chunker = new Chunker();
private int oldLine = 0; private int oldLine = 0;
private int newLine = 0; private int newLine = 0;
public class Chunker : DiffPlex.IChunker {
public string[] Chunk(string text) {
var start = 0;
var size = text.Length;
var chunks = new List<string>();
for (int i = 0; i < size; i++) {
#if NET48
var ch = text.Substring(i, 1);
if (WORD_SEPS.Contains(ch)) {
if (start != i) chunks.Add(text.Substring(start, i - start));
chunks.Add(ch);
start = i + 1;
}
#else
var ch = text[i];
if (WORD_SEPS.Contains(ch)) {
if (start != i) chunks.Add(text.Substring(start, i - start));
chunks.Add(text.Substring(i, 1));
start = i + 1;
}
#endif
}
if (start < size) chunks.Add(text.Substring(start));
return chunks.ToArray();
}
}
public Diff(string repo, string args) { public Diff(string repo, string args) {
Cwd = repo; Cwd = repo;
Args = $"diff --ignore-cr-at-eol --unified=4 {args}"; Args = $"diff --ignore-cr-at-eol --unified=4 {args}";
@ -104,7 +73,7 @@ namespace SourceGit.Commands {
} }
private void ProcessChanges() { private void ProcessChanges() {
if (deleted.Count > 0) { if (deleted.Any()) {
if (added.Count == deleted.Count) { if (added.Count == deleted.Count) {
for (int i = added.Count - 1; i >= 0; i--) { for (int i = added.Count - 1; i >= 0; i--) {
var left = deleted[i]; var left = deleted[i];
@ -112,36 +81,16 @@ namespace SourceGit.Commands {
if (left.Content.Length > 1024 || right.Content.Length > 1024) continue; if (left.Content.Length > 1024 || right.Content.Length > 1024) continue;
var result = DiffPlex.Differ.Instance.CreateDiffs(left.Content, right.Content, false, false, chunker); var chunks = Models.TextCompare.Process(left.Content, right.Content);
if (result.DiffBlocks.Count > 4) continue; if (chunks.Count > 4) continue;
foreach (var block in result.DiffBlocks) { foreach (var chunk in chunks) {
if (block.DeleteCountA > 0) { if (chunk.DeletedCount > 0) {
var startPos = 0; left.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.DeletedStart, chunk.DeletedCount));
for (int j = 0; j < block.DeleteStartA; j++) {
startPos += result.PiecesOld[j].Length;
} }
var deleteNum = 0; if (chunk.AddedCount > 0) {
for (int j = 0; j < block.DeleteCountA; j++) { right.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.AddedStart, chunk.AddedCount));
deleteNum += result.PiecesOld[j + block.DeleteStartA].Length;
}
left.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, deleteNum));
}
if (block.InsertCountB > 0) {
var startPos = 0;
for (int j = 0; j < block.InsertStartB; j++) {
startPos += result.PiecesNew[j].Length;
}
var addedNum = 0;
for (int j = 0; j < block.InsertCountB; j++) {
addedNum += result.PiecesNew[j + block.InsertStartB].Length;
}
right.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, addedNum));
} }
} }
} }
@ -151,7 +100,7 @@ namespace SourceGit.Commands {
deleted.Clear(); deleted.Clear();
} }
if (added.Count > 0) { if (added.Any()) {
changes.Lines.AddRange(added); changes.Lines.AddRange(added);
added.Clear(); added.Clear();
} }

277
src/Models/TextCompare.cs Normal file
View file

@ -0,0 +1,277 @@
using System.Collections.Generic;
namespace SourceGit.Models {
/// <summary>
/// 字串差异对比改写自DiffPlex
/// </summary>
public class TextCompare {
private static readonly HashSet<char> SEPS = new HashSet<char>(" \t+-*/=!,:;.'\"/?|&#@%`<>()[]{}\\".ToCharArray());
/// <summary>
/// 差异信息
/// </summary>
public class Different {
public int DeletedStart { get; set; }
public int DeletedCount { get; set; }
public int AddedStart { get; set; }
public int AddedCount { get; set; }
public Different(int dp, int dc, int ap, int ac) {
DeletedStart = dp;
DeletedCount = dc;
AddedStart = ap;
AddedCount = ac;
}
}
/// <summary>
/// 分片
/// </summary>
public class Chunk {
public int Hash;
public bool Modified;
public int Start;
public int Size;
public Chunk(int hash, int start, int size) {
Hash = hash;
Modified = false;
Start = start;
Size = size;
}
}
/// <summary>
/// 区间修改状态
/// </summary>
public enum Edit {
None,
DeletedRight,
DeletedLeft,
AddedRight,
AddedLeft,
}
/// <summary>
/// 当前区间检测结果
/// </summary>
public class EditResult {
public Edit State;
public int DeleteStart;
public int DeleteEnd;
public int AddStart;
public int AddEnd;
}
/// <summary>
/// 对比字串
/// </summary>
/// <param name="oldValue"></param>
/// <param name="newValue"></param>
/// <returns></returns>
public static List<Different> Process(string oldValue, string newValue) {
var hashes = new Dictionary<string, int>();
var chunksOld = MakeChunks(hashes, oldValue);
var chunksNew = MakeChunks(hashes, newValue);
var sizeOld = chunksOld.Count;
var sizeNew = chunksNew.Count;
var max = sizeOld + sizeNew + 2;
var forward = new int[max];
var reverse = new int[max];
CheckModified(chunksOld, 0, sizeOld, chunksNew, 0, sizeNew, forward, reverse);
var ret = new List<Different>();
var posOld = 0;
var posNew = 0;
do {
while (posOld < sizeOld && posNew < sizeNew && !chunksOld[posOld].Modified && !chunksNew[posNew].Modified) {
posOld++;
posNew++;
}
var beginOld = posOld;
var beginNew = posNew;
var countOld = 0;
var countNew = 0;
for (; posOld < sizeOld && chunksOld[posOld].Modified; posOld++) countOld += chunksOld[posOld].Size;
for (; posNew < sizeNew && chunksNew[posNew].Modified; posNew++) countNew += chunksNew[posNew].Size;
if (countOld + countNew > 0) {
ret.Add(new Different(
countOld > 0 ? chunksOld[beginOld].Start : 0,
countOld,
countNew > 0 ? chunksNew[beginNew].Start : 0,
countNew));
}
} while (posOld < sizeOld && posNew < sizeNew);
return ret;
}
private static List<Chunk> MakeChunks(Dictionary<string, int> hashes, string text) {
var start = 0;
var size = text.Length;
var chunks = new List<Chunk>();
for (int i = 0; i < size; i++) {
var ch = text[i];
if (SEPS.Contains(ch)) {
if (start != i) AddChunk(chunks, hashes, text.Substring(start, i - start), start);
AddChunk(chunks, hashes, text.Substring(i, 1), i);
start = i + 1;
}
}
if (start < size) AddChunk(chunks, hashes, text.Substring(start), start);
return chunks;
}
private static void CheckModified(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
while (startOld < endOld && startNew < endNew && chunksOld[startOld].Hash == chunksNew[startNew].Hash) {
startOld++;
startNew++;
}
while (startOld < endOld && startNew < endNew && chunksOld[endOld - 1].Hash == chunksNew[endNew - 1].Hash) {
endOld--;
endNew--;
}
var lenOld = endOld - startOld;
var lenNew = endNew - startNew;
if (lenOld > 0 && lenNew > 0) {
var rs = CheckModifiedEdit(chunksOld, startOld, endOld, chunksNew, startNew, endNew, forward, reverse);
if (rs.State == Edit.None) return;
if (rs.State == Edit.DeletedRight && rs.DeleteStart - 1 > startOld) {
chunksOld[--rs.DeleteStart].Modified = true;
} else if (rs.State == Edit.DeletedLeft && rs.DeleteEnd < endOld) {
chunksOld[rs.DeleteEnd++].Modified = true;
} else if (rs.State == Edit.AddedRight && rs.AddStart - 1 > startNew) {
chunksNew[--rs.AddStart].Modified = true;
} else if (rs.State == Edit.AddedLeft && rs.AddEnd < endNew) {
chunksNew[rs.AddEnd++].Modified = true;
}
CheckModified(chunksOld, startOld, rs.DeleteStart, chunksNew, startNew, rs.AddStart, forward, reverse);
CheckModified(chunksOld, rs.DeleteEnd, endOld, chunksNew, rs.AddEnd, endNew, forward, reverse);
} else if (lenOld > 0) {
for (int i = startOld; i < endOld; i++) chunksOld[i].Modified = true;
} else if (lenNew > 0) {
for (int i = startNew; i < endNew; i++) chunksNew[i].Modified = true;
}
}
private static EditResult CheckModifiedEdit(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
var lenOld = endOld - startOld;
var lenNew = endNew - startNew;
var max = lenOld + lenNew + 1;
var half = max / 2;
var delta = lenOld - lenNew;
var deltaEven = delta % 2 == 0;
var rs = new EditResult() { State = Edit.None };
forward[1 + half] = 0;
reverse[1 + half] = lenOld + 1;
for (int i = 0; i <= half; i++) {
// 正向
for (int j = -i; j <= i; j += 2) {
var idx = j + half;
int o, n;
if (j == -i || (j != i && forward[idx - 1] < forward[idx + 1])) {
o = forward[idx + 1];
rs.State = Edit.AddedRight;
} else {
o = forward[idx - 1] + 1;
rs.State = Edit.DeletedRight;
}
n = o - j;
var startX = o;
var startY = n;
while (o < lenOld && n < lenNew && chunksOld[o + startOld].Hash == chunksNew[n + startNew].Hash) {
o++;
n++;
}
forward[idx] = o;
if (!deltaEven && j - delta >= -i + 1 && j - delta <= i - 1) {
var revIdx = (j - delta) + half;
var revOld = reverse[revIdx];
int revNew = revOld - j;
if (revOld <= o && revNew <= n) {
if (i == 0) {
rs.State = Edit.None;
} else {
rs.DeleteStart = startX + startOld;
rs.DeleteEnd = o + startOld;
rs.AddStart = startY + startNew;
rs.AddEnd = n + startNew;
}
return rs;
}
}
}
// 反向
for (int j = -i; j <= i; j += 2) {
var idx = j + half;
int o, n;
if (j == -i || (j != i && reverse[idx + 1] <= reverse[idx - 1])) {
o = reverse[idx + 1] - 1;
rs.State = Edit.DeletedLeft;
} else {
o = reverse[idx - 1];
rs.State = Edit.AddedLeft;
}
n = o - (j + delta);
var endX = o;
var endY = n;
while (o > 0 && n > 0 && chunksOld[startOld + o - 1].Hash == chunksNew[startNew + n - 1].Hash) {
o--;
n--;
}
reverse[idx] = o;
if (deltaEven && j + delta >= -i && j + delta <= i) {
var forIdx = (j + delta) + half;
var forOld = forward[forIdx];
int forNew = forOld - (j + delta);
if (forOld >= o && forNew >= n) {
if (i == 0) {
rs.State = Edit.None;
} else {
rs.DeleteStart = o + startOld;
rs.DeleteEnd = endX + startOld;
rs.AddStart = n + startNew;
rs.AddEnd = endY + startNew;
}
return rs;
}
}
}
}
throw new System.Exception("SHOULD NEVER GET HERE");
}
private static void AddChunk(List<Chunk> chunks, Dictionary<string, int> hashes, string data, int start) {
int hash;
if (hashes.TryGetValue(data, out hash)) {
chunks.Add(new Chunk(hash, start, data.Length));
} else {
hash = hashes.Count;
hashes.Add(data, hash);
chunks.Add(new Chunk(hash, start, data.Length));
}
}
}
}

View file

@ -17,7 +17,4 @@
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance> <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
<SatelliteResourceLanguages>none</SatelliteResourceLanguages> <SatelliteResourceLanguages>none</SatelliteResourceLanguages>
</PropertyGroup> </PropertyGroup>
<ItemGroup>
<PackageReference Include="DiffPlex" Version="1.7.0" />
</ItemGroup>
</Project> </Project>

View file

@ -21,6 +21,5 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" /> <PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
<PackageReference Include="DiffPlex" Version="1.7.0" />
</ItemGroup> </ItemGroup>
</Project> </Project>