mirror of
https://github.com/sourcegit-scm/sourcegit.git
synced 2025-01-11 23:57:21 -08:00
optimize<Diff>: simplify DiffPlex algorithm
This commit is contained in:
parent
44873cf088
commit
d259d67fd5
4 changed files with 287 additions and 65 deletions
|
@ -1,5 +1,6 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace SourceGit.Commands {
|
namespace SourceGit.Commands {
|
||||||
|
@ -8,44 +9,12 @@ namespace SourceGit.Commands {
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class Diff : Command {
|
public class Diff : Command {
|
||||||
private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@");
|
private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@");
|
||||||
private static readonly string WORD_SEPS = " \t+-*/=!:;.'\"/?|&#@%`<>()[]{}\\";
|
|
||||||
|
|
||||||
private Models.TextChanges changes = new Models.TextChanges();
|
private Models.TextChanges changes = new Models.TextChanges();
|
||||||
private List<Models.TextChanges.Line> deleted = new List<Models.TextChanges.Line>();
|
private List<Models.TextChanges.Line> deleted = new List<Models.TextChanges.Line>();
|
||||||
private List<Models.TextChanges.Line> added = new List<Models.TextChanges.Line>();
|
private List<Models.TextChanges.Line> added = new List<Models.TextChanges.Line>();
|
||||||
private Chunker chunker = new Chunker();
|
|
||||||
private int oldLine = 0;
|
private int oldLine = 0;
|
||||||
private int newLine = 0;
|
private int newLine = 0;
|
||||||
|
|
||||||
public class Chunker : DiffPlex.IChunker {
|
|
||||||
public string[] Chunk(string text) {
|
|
||||||
var start = 0;
|
|
||||||
var size = text.Length;
|
|
||||||
var chunks = new List<string>();
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
#if NET48
|
|
||||||
var ch = text.Substring(i, 1);
|
|
||||||
if (WORD_SEPS.Contains(ch)) {
|
|
||||||
if (start != i) chunks.Add(text.Substring(start, i - start));
|
|
||||||
chunks.Add(ch);
|
|
||||||
start = i + 1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
var ch = text[i];
|
|
||||||
if (WORD_SEPS.Contains(ch)) {
|
|
||||||
if (start != i) chunks.Add(text.Substring(start, i - start));
|
|
||||||
chunks.Add(text.Substring(i, 1));
|
|
||||||
start = i + 1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
if (start < size) chunks.Add(text.Substring(start));
|
|
||||||
return chunks.ToArray();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public Diff(string repo, string args) {
|
public Diff(string repo, string args) {
|
||||||
Cwd = repo;
|
Cwd = repo;
|
||||||
Args = $"diff --ignore-cr-at-eol --unified=4 {args}";
|
Args = $"diff --ignore-cr-at-eol --unified=4 {args}";
|
||||||
|
@ -104,7 +73,7 @@ namespace SourceGit.Commands {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ProcessChanges() {
|
private void ProcessChanges() {
|
||||||
if (deleted.Count > 0) {
|
if (deleted.Any()) {
|
||||||
if (added.Count == deleted.Count) {
|
if (added.Count == deleted.Count) {
|
||||||
for (int i = added.Count - 1; i >= 0; i--) {
|
for (int i = added.Count - 1; i >= 0; i--) {
|
||||||
var left = deleted[i];
|
var left = deleted[i];
|
||||||
|
@ -112,36 +81,16 @@ namespace SourceGit.Commands {
|
||||||
|
|
||||||
if (left.Content.Length > 1024 || right.Content.Length > 1024) continue;
|
if (left.Content.Length > 1024 || right.Content.Length > 1024) continue;
|
||||||
|
|
||||||
var result = DiffPlex.Differ.Instance.CreateDiffs(left.Content, right.Content, false, false, chunker);
|
var chunks = Models.TextCompare.Process(left.Content, right.Content);
|
||||||
if (result.DiffBlocks.Count > 4) continue;
|
if (chunks.Count > 4) continue;
|
||||||
|
|
||||||
foreach (var block in result.DiffBlocks) {
|
foreach (var chunk in chunks) {
|
||||||
if (block.DeleteCountA > 0) {
|
if (chunk.DeletedCount > 0) {
|
||||||
var startPos = 0;
|
left.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.DeletedStart, chunk.DeletedCount));
|
||||||
for (int j = 0; j < block.DeleteStartA; j++) {
|
|
||||||
startPos += result.PiecesOld[j].Length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var deleteNum = 0;
|
if (chunk.AddedCount > 0) {
|
||||||
for (int j = 0; j < block.DeleteCountA; j++) {
|
right.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.AddedStart, chunk.AddedCount));
|
||||||
deleteNum += result.PiecesOld[j + block.DeleteStartA].Length;
|
|
||||||
}
|
|
||||||
|
|
||||||
left.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, deleteNum));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (block.InsertCountB > 0) {
|
|
||||||
var startPos = 0;
|
|
||||||
for (int j = 0; j < block.InsertStartB; j++) {
|
|
||||||
startPos += result.PiecesNew[j].Length;
|
|
||||||
}
|
|
||||||
|
|
||||||
var addedNum = 0;
|
|
||||||
for (int j = 0; j < block.InsertCountB; j++) {
|
|
||||||
addedNum += result.PiecesNew[j + block.InsertStartB].Length;
|
|
||||||
}
|
|
||||||
|
|
||||||
right.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, addedNum));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -151,7 +100,7 @@ namespace SourceGit.Commands {
|
||||||
deleted.Clear();
|
deleted.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (added.Count > 0) {
|
if (added.Any()) {
|
||||||
changes.Lines.AddRange(added);
|
changes.Lines.AddRange(added);
|
||||||
added.Clear();
|
added.Clear();
|
||||||
}
|
}
|
||||||
|
|
277
src/Models/TextCompare.cs
Normal file
277
src/Models/TextCompare.cs
Normal file
|
@ -0,0 +1,277 @@
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
|
namespace SourceGit.Models {
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 字串差异对比,改写自DiffPlex
|
||||||
|
/// </summary>
|
||||||
|
public class TextCompare {
|
||||||
|
private static readonly HashSet<char> SEPS = new HashSet<char>(" \t+-*/=!,:;.'\"/?|&#@%`<>()[]{}\\".ToCharArray());
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 差异信息
|
||||||
|
/// </summary>
|
||||||
|
public class Different {
|
||||||
|
public int DeletedStart { get; set; }
|
||||||
|
public int DeletedCount { get; set; }
|
||||||
|
public int AddedStart { get; set; }
|
||||||
|
public int AddedCount { get; set; }
|
||||||
|
|
||||||
|
public Different(int dp, int dc, int ap, int ac) {
|
||||||
|
DeletedStart = dp;
|
||||||
|
DeletedCount = dc;
|
||||||
|
AddedStart = ap;
|
||||||
|
AddedCount = ac;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 分片
|
||||||
|
/// </summary>
|
||||||
|
public class Chunk {
|
||||||
|
public int Hash;
|
||||||
|
public bool Modified;
|
||||||
|
public int Start;
|
||||||
|
public int Size;
|
||||||
|
|
||||||
|
public Chunk(int hash, int start, int size) {
|
||||||
|
Hash = hash;
|
||||||
|
Modified = false;
|
||||||
|
Start = start;
|
||||||
|
Size = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 区间修改状态
|
||||||
|
/// </summary>
|
||||||
|
public enum Edit {
|
||||||
|
None,
|
||||||
|
DeletedRight,
|
||||||
|
DeletedLeft,
|
||||||
|
AddedRight,
|
||||||
|
AddedLeft,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 当前区间检测结果
|
||||||
|
/// </summary>
|
||||||
|
public class EditResult {
|
||||||
|
public Edit State;
|
||||||
|
public int DeleteStart;
|
||||||
|
public int DeleteEnd;
|
||||||
|
public int AddStart;
|
||||||
|
public int AddEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 对比字串
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="oldValue"></param>
|
||||||
|
/// <param name="newValue"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public static List<Different> Process(string oldValue, string newValue) {
|
||||||
|
var hashes = new Dictionary<string, int>();
|
||||||
|
var chunksOld = MakeChunks(hashes, oldValue);
|
||||||
|
var chunksNew = MakeChunks(hashes, newValue);
|
||||||
|
var sizeOld = chunksOld.Count;
|
||||||
|
var sizeNew = chunksNew.Count;
|
||||||
|
var max = sizeOld + sizeNew + 2;
|
||||||
|
var forward = new int[max];
|
||||||
|
var reverse = new int[max];
|
||||||
|
CheckModified(chunksOld, 0, sizeOld, chunksNew, 0, sizeNew, forward, reverse);
|
||||||
|
|
||||||
|
var ret = new List<Different>();
|
||||||
|
var posOld = 0;
|
||||||
|
var posNew = 0;
|
||||||
|
do {
|
||||||
|
while (posOld < sizeOld && posNew < sizeNew && !chunksOld[posOld].Modified && !chunksNew[posNew].Modified) {
|
||||||
|
posOld++;
|
||||||
|
posNew++;
|
||||||
|
}
|
||||||
|
|
||||||
|
var beginOld = posOld;
|
||||||
|
var beginNew = posNew;
|
||||||
|
var countOld = 0;
|
||||||
|
var countNew = 0;
|
||||||
|
for (; posOld < sizeOld && chunksOld[posOld].Modified; posOld++) countOld += chunksOld[posOld].Size;
|
||||||
|
for (; posNew < sizeNew && chunksNew[posNew].Modified; posNew++) countNew += chunksNew[posNew].Size;
|
||||||
|
|
||||||
|
if (countOld + countNew > 0) {
|
||||||
|
ret.Add(new Different(
|
||||||
|
countOld > 0 ? chunksOld[beginOld].Start : 0,
|
||||||
|
countOld,
|
||||||
|
countNew > 0 ? chunksNew[beginNew].Start : 0,
|
||||||
|
countNew));
|
||||||
|
}
|
||||||
|
} while (posOld < sizeOld && posNew < sizeNew);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Chunk> MakeChunks(Dictionary<string, int> hashes, string text) {
|
||||||
|
var start = 0;
|
||||||
|
var size = text.Length;
|
||||||
|
var chunks = new List<Chunk>();
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
var ch = text[i];
|
||||||
|
if (SEPS.Contains(ch)) {
|
||||||
|
if (start != i) AddChunk(chunks, hashes, text.Substring(start, i - start), start);
|
||||||
|
AddChunk(chunks, hashes, text.Substring(i, 1), i);
|
||||||
|
start = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start < size) AddChunk(chunks, hashes, text.Substring(start), start);
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void CheckModified(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
|
||||||
|
while (startOld < endOld && startNew < endNew && chunksOld[startOld].Hash == chunksNew[startNew].Hash) {
|
||||||
|
startOld++;
|
||||||
|
startNew++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (startOld < endOld && startNew < endNew && chunksOld[endOld - 1].Hash == chunksNew[endNew - 1].Hash) {
|
||||||
|
endOld--;
|
||||||
|
endNew--;
|
||||||
|
}
|
||||||
|
|
||||||
|
var lenOld = endOld - startOld;
|
||||||
|
var lenNew = endNew - startNew;
|
||||||
|
if (lenOld > 0 && lenNew > 0) {
|
||||||
|
var rs = CheckModifiedEdit(chunksOld, startOld, endOld, chunksNew, startNew, endNew, forward, reverse);
|
||||||
|
if (rs.State == Edit.None) return;
|
||||||
|
|
||||||
|
if (rs.State == Edit.DeletedRight && rs.DeleteStart - 1 > startOld) {
|
||||||
|
chunksOld[--rs.DeleteStart].Modified = true;
|
||||||
|
} else if (rs.State == Edit.DeletedLeft && rs.DeleteEnd < endOld) {
|
||||||
|
chunksOld[rs.DeleteEnd++].Modified = true;
|
||||||
|
} else if (rs.State == Edit.AddedRight && rs.AddStart - 1 > startNew) {
|
||||||
|
chunksNew[--rs.AddStart].Modified = true;
|
||||||
|
} else if (rs.State == Edit.AddedLeft && rs.AddEnd < endNew) {
|
||||||
|
chunksNew[rs.AddEnd++].Modified = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
CheckModified(chunksOld, startOld, rs.DeleteStart, chunksNew, startNew, rs.AddStart, forward, reverse);
|
||||||
|
CheckModified(chunksOld, rs.DeleteEnd, endOld, chunksNew, rs.AddEnd, endNew, forward, reverse);
|
||||||
|
} else if (lenOld > 0) {
|
||||||
|
for (int i = startOld; i < endOld; i++) chunksOld[i].Modified = true;
|
||||||
|
} else if (lenNew > 0) {
|
||||||
|
for (int i = startNew; i < endNew; i++) chunksNew[i].Modified = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EditResult CheckModifiedEdit(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
|
||||||
|
var lenOld = endOld - startOld;
|
||||||
|
var lenNew = endNew - startNew;
|
||||||
|
var max = lenOld + lenNew + 1;
|
||||||
|
var half = max / 2;
|
||||||
|
var delta = lenOld - lenNew;
|
||||||
|
var deltaEven = delta % 2 == 0;
|
||||||
|
var rs = new EditResult() { State = Edit.None };
|
||||||
|
|
||||||
|
forward[1 + half] = 0;
|
||||||
|
reverse[1 + half] = lenOld + 1;
|
||||||
|
|
||||||
|
for (int i = 0; i <= half; i++) {
|
||||||
|
|
||||||
|
// 正向
|
||||||
|
for (int j = -i; j <= i; j += 2) {
|
||||||
|
var idx = j + half;
|
||||||
|
int o, n;
|
||||||
|
if (j == -i || (j != i && forward[idx - 1] < forward[idx + 1])) {
|
||||||
|
o = forward[idx + 1];
|
||||||
|
rs.State = Edit.AddedRight;
|
||||||
|
} else {
|
||||||
|
o = forward[idx - 1] + 1;
|
||||||
|
rs.State = Edit.DeletedRight;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = o - j;
|
||||||
|
|
||||||
|
var startX = o;
|
||||||
|
var startY = n;
|
||||||
|
while (o < lenOld && n < lenNew && chunksOld[o + startOld].Hash == chunksNew[n + startNew].Hash) {
|
||||||
|
o++;
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
|
||||||
|
forward[idx] = o;
|
||||||
|
|
||||||
|
if (!deltaEven && j - delta >= -i + 1 && j - delta <= i - 1) {
|
||||||
|
var revIdx = (j - delta) + half;
|
||||||
|
var revOld = reverse[revIdx];
|
||||||
|
int revNew = revOld - j;
|
||||||
|
if (revOld <= o && revNew <= n) {
|
||||||
|
if (i == 0) {
|
||||||
|
rs.State = Edit.None;
|
||||||
|
} else {
|
||||||
|
rs.DeleteStart = startX + startOld;
|
||||||
|
rs.DeleteEnd = o + startOld;
|
||||||
|
rs.AddStart = startY + startNew;
|
||||||
|
rs.AddEnd = n + startNew;
|
||||||
|
}
|
||||||
|
return rs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 反向
|
||||||
|
for (int j = -i; j <= i; j += 2) {
|
||||||
|
var idx = j + half;
|
||||||
|
int o, n;
|
||||||
|
if (j == -i || (j != i && reverse[idx + 1] <= reverse[idx - 1])) {
|
||||||
|
o = reverse[idx + 1] - 1;
|
||||||
|
rs.State = Edit.DeletedLeft;
|
||||||
|
} else {
|
||||||
|
o = reverse[idx - 1];
|
||||||
|
rs.State = Edit.AddedLeft;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = o - (j + delta);
|
||||||
|
|
||||||
|
var endX = o;
|
||||||
|
var endY = n;
|
||||||
|
while (o > 0 && n > 0 && chunksOld[startOld + o - 1].Hash == chunksNew[startNew + n - 1].Hash) {
|
||||||
|
o--;
|
||||||
|
n--;
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse[idx] = o;
|
||||||
|
|
||||||
|
if (deltaEven && j + delta >= -i && j + delta <= i) {
|
||||||
|
var forIdx = (j + delta) + half;
|
||||||
|
var forOld = forward[forIdx];
|
||||||
|
int forNew = forOld - (j + delta);
|
||||||
|
if (forOld >= o && forNew >= n) {
|
||||||
|
if (i == 0) {
|
||||||
|
rs.State = Edit.None;
|
||||||
|
} else {
|
||||||
|
rs.DeleteStart = o + startOld;
|
||||||
|
rs.DeleteEnd = endX + startOld;
|
||||||
|
rs.AddStart = n + startNew;
|
||||||
|
rs.AddEnd = endY + startNew;
|
||||||
|
}
|
||||||
|
return rs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new System.Exception("SHOULD NEVER GET HERE");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void AddChunk(List<Chunk> chunks, Dictionary<string, int> hashes, string data, int start) {
|
||||||
|
int hash;
|
||||||
|
if (hashes.TryGetValue(data, out hash)) {
|
||||||
|
chunks.Add(new Chunk(hash, start, data.Length));
|
||||||
|
} else {
|
||||||
|
hash = hashes.Count;
|
||||||
|
hashes.Add(data, hash);
|
||||||
|
chunks.Add(new Chunk(hash, start, data.Length));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,7 +17,4 @@
|
||||||
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
|
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
|
||||||
<SatelliteResourceLanguages>none</SatelliteResourceLanguages>
|
<SatelliteResourceLanguages>none</SatelliteResourceLanguages>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
|
||||||
<PackageReference Include="DiffPlex" Version="1.7.0" />
|
|
||||||
</ItemGroup>
|
|
||||||
</Project>
|
</Project>
|
|
@ -21,6 +21,5 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
||||||
<PackageReference Include="DiffPlex" Version="1.7.0" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
Loading…
Reference in a new issue