mirror of
https://github.com/sourcegit-scm/sourcegit.git
synced 2025-01-23 01:36:57 -08:00
optimize<Diff>: simplify DiffPlex algorithm
This commit is contained in:
parent
44873cf088
commit
d259d67fd5
4 changed files with 287 additions and 65 deletions
|
@ -1,5 +1,6 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace SourceGit.Commands {
|
||||
|
@ -8,44 +9,12 @@ namespace SourceGit.Commands {
|
|||
/// </summary>
|
||||
public class Diff : Command {
|
||||
private static readonly Regex REG_INDICATOR = new Regex(@"^@@ \-(\d+),?\d* \+(\d+),?\d* @@");
|
||||
private static readonly string WORD_SEPS = " \t+-*/=!:;.'\"/?|&#@%`<>()[]{}\\";
|
||||
|
||||
private Models.TextChanges changes = new Models.TextChanges();
|
||||
private List<Models.TextChanges.Line> deleted = new List<Models.TextChanges.Line>();
|
||||
private List<Models.TextChanges.Line> added = new List<Models.TextChanges.Line>();
|
||||
private Chunker chunker = new Chunker();
|
||||
private int oldLine = 0;
|
||||
private int newLine = 0;
|
||||
|
||||
public class Chunker : DiffPlex.IChunker {
|
||||
public string[] Chunk(string text) {
|
||||
var start = 0;
|
||||
var size = text.Length;
|
||||
var chunks = new List<string>();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
#if NET48
|
||||
var ch = text.Substring(i, 1);
|
||||
if (WORD_SEPS.Contains(ch)) {
|
||||
if (start != i) chunks.Add(text.Substring(start, i - start));
|
||||
chunks.Add(ch);
|
||||
start = i + 1;
|
||||
}
|
||||
#else
|
||||
var ch = text[i];
|
||||
if (WORD_SEPS.Contains(ch)) {
|
||||
if (start != i) chunks.Add(text.Substring(start, i - start));
|
||||
chunks.Add(text.Substring(i, 1));
|
||||
start = i + 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (start < size) chunks.Add(text.Substring(start));
|
||||
return chunks.ToArray();
|
||||
}
|
||||
}
|
||||
|
||||
public Diff(string repo, string args) {
|
||||
Cwd = repo;
|
||||
Args = $"diff --ignore-cr-at-eol --unified=4 {args}";
|
||||
|
@ -104,7 +73,7 @@ namespace SourceGit.Commands {
|
|||
}
|
||||
|
||||
private void ProcessChanges() {
|
||||
if (deleted.Count > 0) {
|
||||
if (deleted.Any()) {
|
||||
if (added.Count == deleted.Count) {
|
||||
for (int i = added.Count - 1; i >= 0; i--) {
|
||||
var left = deleted[i];
|
||||
|
@ -112,36 +81,16 @@ namespace SourceGit.Commands {
|
|||
|
||||
if (left.Content.Length > 1024 || right.Content.Length > 1024) continue;
|
||||
|
||||
var result = DiffPlex.Differ.Instance.CreateDiffs(left.Content, right.Content, false, false, chunker);
|
||||
if (result.DiffBlocks.Count > 4) continue;
|
||||
var chunks = Models.TextCompare.Process(left.Content, right.Content);
|
||||
if (chunks.Count > 4) continue;
|
||||
|
||||
foreach (var block in result.DiffBlocks) {
|
||||
if (block.DeleteCountA > 0) {
|
||||
var startPos = 0;
|
||||
for (int j = 0; j < block.DeleteStartA; j++) {
|
||||
startPos += result.PiecesOld[j].Length;
|
||||
}
|
||||
|
||||
var deleteNum = 0;
|
||||
for (int j = 0; j < block.DeleteCountA; j++) {
|
||||
deleteNum += result.PiecesOld[j + block.DeleteStartA].Length;
|
||||
}
|
||||
|
||||
left.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, deleteNum));
|
||||
foreach (var chunk in chunks) {
|
||||
if (chunk.DeletedCount > 0) {
|
||||
left.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.DeletedStart, chunk.DeletedCount));
|
||||
}
|
||||
|
||||
if (block.InsertCountB > 0) {
|
||||
var startPos = 0;
|
||||
for (int j = 0; j < block.InsertStartB; j++) {
|
||||
startPos += result.PiecesNew[j].Length;
|
||||
}
|
||||
|
||||
var addedNum = 0;
|
||||
for (int j = 0; j < block.InsertCountB; j++) {
|
||||
addedNum += result.PiecesNew[j + block.InsertStartB].Length;
|
||||
}
|
||||
|
||||
right.Highlights.Add(new Models.TextChanges.HighlightRange(startPos, addedNum));
|
||||
if (chunk.AddedCount > 0) {
|
||||
right.Highlights.Add(new Models.TextChanges.HighlightRange(chunk.AddedStart, chunk.AddedCount));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +100,7 @@ namespace SourceGit.Commands {
|
|||
deleted.Clear();
|
||||
}
|
||||
|
||||
if (added.Count > 0) {
|
||||
if (added.Any()) {
|
||||
changes.Lines.AddRange(added);
|
||||
added.Clear();
|
||||
}
|
||||
|
|
277
src/Models/TextCompare.cs
Normal file
277
src/Models/TextCompare.cs
Normal file
|
@ -0,0 +1,277 @@
|
|||
using System.Collections.Generic;
|
||||
|
||||
namespace SourceGit.Models {
|
||||
|
||||
/// <summary>
|
||||
/// 字串差异对比,改写自DiffPlex
|
||||
/// </summary>
|
||||
public class TextCompare {
|
||||
private static readonly HashSet<char> SEPS = new HashSet<char>(" \t+-*/=!,:;.'\"/?|&#@%`<>()[]{}\\".ToCharArray());
|
||||
|
||||
/// <summary>
|
||||
/// 差异信息
|
||||
/// </summary>
|
||||
public class Different {
|
||||
public int DeletedStart { get; set; }
|
||||
public int DeletedCount { get; set; }
|
||||
public int AddedStart { get; set; }
|
||||
public int AddedCount { get; set; }
|
||||
|
||||
public Different(int dp, int dc, int ap, int ac) {
|
||||
DeletedStart = dp;
|
||||
DeletedCount = dc;
|
||||
AddedStart = ap;
|
||||
AddedCount = ac;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 分片
|
||||
/// </summary>
|
||||
public class Chunk {
|
||||
public int Hash;
|
||||
public bool Modified;
|
||||
public int Start;
|
||||
public int Size;
|
||||
|
||||
public Chunk(int hash, int start, int size) {
|
||||
Hash = hash;
|
||||
Modified = false;
|
||||
Start = start;
|
||||
Size = size;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 区间修改状态
|
||||
/// </summary>
|
||||
public enum Edit {
|
||||
None,
|
||||
DeletedRight,
|
||||
DeletedLeft,
|
||||
AddedRight,
|
||||
AddedLeft,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 当前区间检测结果
|
||||
/// </summary>
|
||||
public class EditResult {
|
||||
public Edit State;
|
||||
public int DeleteStart;
|
||||
public int DeleteEnd;
|
||||
public int AddStart;
|
||||
public int AddEnd;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 对比字串
|
||||
/// </summary>
|
||||
/// <param name="oldValue"></param>
|
||||
/// <param name="newValue"></param>
|
||||
/// <returns></returns>
|
||||
public static List<Different> Process(string oldValue, string newValue) {
|
||||
var hashes = new Dictionary<string, int>();
|
||||
var chunksOld = MakeChunks(hashes, oldValue);
|
||||
var chunksNew = MakeChunks(hashes, newValue);
|
||||
var sizeOld = chunksOld.Count;
|
||||
var sizeNew = chunksNew.Count;
|
||||
var max = sizeOld + sizeNew + 2;
|
||||
var forward = new int[max];
|
||||
var reverse = new int[max];
|
||||
CheckModified(chunksOld, 0, sizeOld, chunksNew, 0, sizeNew, forward, reverse);
|
||||
|
||||
var ret = new List<Different>();
|
||||
var posOld = 0;
|
||||
var posNew = 0;
|
||||
do {
|
||||
while (posOld < sizeOld && posNew < sizeNew && !chunksOld[posOld].Modified && !chunksNew[posNew].Modified) {
|
||||
posOld++;
|
||||
posNew++;
|
||||
}
|
||||
|
||||
var beginOld = posOld;
|
||||
var beginNew = posNew;
|
||||
var countOld = 0;
|
||||
var countNew = 0;
|
||||
for (; posOld < sizeOld && chunksOld[posOld].Modified; posOld++) countOld += chunksOld[posOld].Size;
|
||||
for (; posNew < sizeNew && chunksNew[posNew].Modified; posNew++) countNew += chunksNew[posNew].Size;
|
||||
|
||||
if (countOld + countNew > 0) {
|
||||
ret.Add(new Different(
|
||||
countOld > 0 ? chunksOld[beginOld].Start : 0,
|
||||
countOld,
|
||||
countNew > 0 ? chunksNew[beginNew].Start : 0,
|
||||
countNew));
|
||||
}
|
||||
} while (posOld < sizeOld && posNew < sizeNew);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private static List<Chunk> MakeChunks(Dictionary<string, int> hashes, string text) {
|
||||
var start = 0;
|
||||
var size = text.Length;
|
||||
var chunks = new List<Chunk>();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
var ch = text[i];
|
||||
if (SEPS.Contains(ch)) {
|
||||
if (start != i) AddChunk(chunks, hashes, text.Substring(start, i - start), start);
|
||||
AddChunk(chunks, hashes, text.Substring(i, 1), i);
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (start < size) AddChunk(chunks, hashes, text.Substring(start), start);
|
||||
return chunks;
|
||||
}
|
||||
|
||||
private static void CheckModified(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
|
||||
while (startOld < endOld && startNew < endNew && chunksOld[startOld].Hash == chunksNew[startNew].Hash) {
|
||||
startOld++;
|
||||
startNew++;
|
||||
}
|
||||
|
||||
while (startOld < endOld && startNew < endNew && chunksOld[endOld - 1].Hash == chunksNew[endNew - 1].Hash) {
|
||||
endOld--;
|
||||
endNew--;
|
||||
}
|
||||
|
||||
var lenOld = endOld - startOld;
|
||||
var lenNew = endNew - startNew;
|
||||
if (lenOld > 0 && lenNew > 0) {
|
||||
var rs = CheckModifiedEdit(chunksOld, startOld, endOld, chunksNew, startNew, endNew, forward, reverse);
|
||||
if (rs.State == Edit.None) return;
|
||||
|
||||
if (rs.State == Edit.DeletedRight && rs.DeleteStart - 1 > startOld) {
|
||||
chunksOld[--rs.DeleteStart].Modified = true;
|
||||
} else if (rs.State == Edit.DeletedLeft && rs.DeleteEnd < endOld) {
|
||||
chunksOld[rs.DeleteEnd++].Modified = true;
|
||||
} else if (rs.State == Edit.AddedRight && rs.AddStart - 1 > startNew) {
|
||||
chunksNew[--rs.AddStart].Modified = true;
|
||||
} else if (rs.State == Edit.AddedLeft && rs.AddEnd < endNew) {
|
||||
chunksNew[rs.AddEnd++].Modified = true;
|
||||
}
|
||||
|
||||
CheckModified(chunksOld, startOld, rs.DeleteStart, chunksNew, startNew, rs.AddStart, forward, reverse);
|
||||
CheckModified(chunksOld, rs.DeleteEnd, endOld, chunksNew, rs.AddEnd, endNew, forward, reverse);
|
||||
} else if (lenOld > 0) {
|
||||
for (int i = startOld; i < endOld; i++) chunksOld[i].Modified = true;
|
||||
} else if (lenNew > 0) {
|
||||
for (int i = startNew; i < endNew; i++) chunksNew[i].Modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
private static EditResult CheckModifiedEdit(List<Chunk> chunksOld, int startOld, int endOld, List<Chunk> chunksNew, int startNew, int endNew, int[] forward, int[] reverse) {
|
||||
var lenOld = endOld - startOld;
|
||||
var lenNew = endNew - startNew;
|
||||
var max = lenOld + lenNew + 1;
|
||||
var half = max / 2;
|
||||
var delta = lenOld - lenNew;
|
||||
var deltaEven = delta % 2 == 0;
|
||||
var rs = new EditResult() { State = Edit.None };
|
||||
|
||||
forward[1 + half] = 0;
|
||||
reverse[1 + half] = lenOld + 1;
|
||||
|
||||
for (int i = 0; i <= half; i++) {
|
||||
|
||||
// 正向
|
||||
for (int j = -i; j <= i; j += 2) {
|
||||
var idx = j + half;
|
||||
int o, n;
|
||||
if (j == -i || (j != i && forward[idx - 1] < forward[idx + 1])) {
|
||||
o = forward[idx + 1];
|
||||
rs.State = Edit.AddedRight;
|
||||
} else {
|
||||
o = forward[idx - 1] + 1;
|
||||
rs.State = Edit.DeletedRight;
|
||||
}
|
||||
|
||||
n = o - j;
|
||||
|
||||
var startX = o;
|
||||
var startY = n;
|
||||
while (o < lenOld && n < lenNew && chunksOld[o + startOld].Hash == chunksNew[n + startNew].Hash) {
|
||||
o++;
|
||||
n++;
|
||||
}
|
||||
|
||||
forward[idx] = o;
|
||||
|
||||
if (!deltaEven && j - delta >= -i + 1 && j - delta <= i - 1) {
|
||||
var revIdx = (j - delta) + half;
|
||||
var revOld = reverse[revIdx];
|
||||
int revNew = revOld - j;
|
||||
if (revOld <= o && revNew <= n) {
|
||||
if (i == 0) {
|
||||
rs.State = Edit.None;
|
||||
} else {
|
||||
rs.DeleteStart = startX + startOld;
|
||||
rs.DeleteEnd = o + startOld;
|
||||
rs.AddStart = startY + startNew;
|
||||
rs.AddEnd = n + startNew;
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 反向
|
||||
for (int j = -i; j <= i; j += 2) {
|
||||
var idx = j + half;
|
||||
int o, n;
|
||||
if (j == -i || (j != i && reverse[idx + 1] <= reverse[idx - 1])) {
|
||||
o = reverse[idx + 1] - 1;
|
||||
rs.State = Edit.DeletedLeft;
|
||||
} else {
|
||||
o = reverse[idx - 1];
|
||||
rs.State = Edit.AddedLeft;
|
||||
}
|
||||
|
||||
n = o - (j + delta);
|
||||
|
||||
var endX = o;
|
||||
var endY = n;
|
||||
while (o > 0 && n > 0 && chunksOld[startOld + o - 1].Hash == chunksNew[startNew + n - 1].Hash) {
|
||||
o--;
|
||||
n--;
|
||||
}
|
||||
|
||||
reverse[idx] = o;
|
||||
|
||||
if (deltaEven && j + delta >= -i && j + delta <= i) {
|
||||
var forIdx = (j + delta) + half;
|
||||
var forOld = forward[forIdx];
|
||||
int forNew = forOld - (j + delta);
|
||||
if (forOld >= o && forNew >= n) {
|
||||
if (i == 0) {
|
||||
rs.State = Edit.None;
|
||||
} else {
|
||||
rs.DeleteStart = o + startOld;
|
||||
rs.DeleteEnd = endX + startOld;
|
||||
rs.AddStart = n + startNew;
|
||||
rs.AddEnd = endY + startNew;
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new System.Exception("SHOULD NEVER GET HERE");
|
||||
}
|
||||
|
||||
private static void AddChunk(List<Chunk> chunks, Dictionary<string, int> hashes, string data, int start) {
|
||||
int hash;
|
||||
if (hashes.TryGetValue(data, out hash)) {
|
||||
chunks.Add(new Chunk(hash, start, data.Length));
|
||||
} else {
|
||||
hash = hashes.Count;
|
||||
hashes.Add(data, hash);
|
||||
chunks.Add(new Chunk(hash, start, data.Length));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,7 +17,4 @@
|
|||
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
|
||||
<SatelliteResourceLanguages>none</SatelliteResourceLanguages>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="DiffPlex" Version="1.7.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -21,6 +21,5 @@
|
|||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
|
||||
<PackageReference Include="DiffPlex" Version="1.7.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in a new issue