一个简单字符串差异对比暴力算法实现
如题:请求出两个字符串的差异部分,并以不同的颜色区分显示到浏览器上。
1. 解题思路
1. 找出两字符串中相同的部分,标记;
2. 找出两字符串中不同的部分,标记;
3. 尽可能长的匹配相同部分;
4. 尽可能少的使用复杂度(所有算法的重要目标);
2. 算法实现
算法实现如下:(js实现)
DOCTYPE html>
DOCTYPE html>
<html>
<head>
<title>diff function testtitle>
<script type="text/javascript">
// 思路1: 使用双指针暴力解法
// 1. 先扫描a, 直到一个与b[j]相同的元素为止, 保存为aQueue
// 2. 再扫描b, 每次的查找范围为aQueue
// 3. 如果找到, 则进行接下来的最长匹配
// 4. 如果没有找到, 则让a进行最长匹配
// 5. 进入下一轮循环,直到a循环为止
// 时间复杂度: a:m, b:n, a1:m, b1:m(m+1)/2*n, ∴ O(m2n)
var logSwitch = 1;
function diffWithWhitespace(a, b) {
var aValues = a.split("");
var bValues = b.split("");
var alen = aValues.length;
var blen = bValues.length;
var i = 0, j = 0;
var equalLongest = [];
var aDiffLongest = [];
var aResult = [];
var bResult = [];
// 上一次比较结果, EQUAL:相等, ADIFF:A, BDIFF:B
var lastDiffResultType = "EQUAL";
while(i < alen || j < blen) {
if(aValues[i] == bValues[j]) {
// todo: 记录结果, 到a中,b中
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(aValues[i]);
printLog("equal:" + aValues[i] + "
");
i++;
j++;
continue;
}
var i2 = i, j2 = j;
aDiffLongest = [];
while((i2) < alen
&& aValues[i2] != bValues[j2]) {
aDiffLongest.push(aValues[i2]);
++i2;
}
var aDiffTmp = [];
var bDiffTmp = [];
if(i2 > alen) {
// no equal find
// the last one
}
else if(i2 != alen && aValues[i2] == bValues[j]) {
aDiffLongest.push(aValues[i2]);
}
else{
if(j >= blen) {
aDiffTmp.push(aValues[i]);
// the last one
aResult.push({"item": aDiffTmp, "type":"diff"});
i++;
continue;
}
// a中未找到,全部退回到b中进行查找
bDiffTmp.push(bValues[j]);
bResult.push({"item": bDiffTmp, "type":"diff"})
// 去重相同项,也同时跳过上一相同的项
while(++j2 < blen
&& bValues[j2] == bValues[j]) {
bDiffTmp.push(bValues[j2]);
j = j2;
}
printLog("bdiff:" + bDiffTmp + "
");
j++;
lastDiffResultType = "BDIFF";
continue;
}
var curMaxStep = aDiffLongest.length;
var foundCloser = 0;
while(++j2 < blen && curMaxStep-- > 0) {
var i3 = 0;
for (;i3 < aDiffLongest.length; i3++) {
if(bValues[j2] == aDiffLongest[i3]) {
// 相同段
foundCloser = 1;
break;
}
}
if(foundCloser == 1) {
for (var c = i; c < i + i3; c++) {
aDiffTmp.push(aValues[c]);
}
for (var c = j ; c < j2; c++) {
bDiffTmp.push(bValues[c]);
}
if(aDiffTmp.length > 0) {
aResult.push({"item": aDiffTmp, "type":"diff"});
printLog("adiff:" + aDiffTmp + "");
}
if(bDiffTmp.length > 0) {
bResult.push({"item": bDiffTmp, "type":"diff"});
printLog("bdiff:" + bDiffTmp + "");
}
var eqItem = bValues[j2];
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(eqItem);
printLog("equal:" + eqItem +"
");
aDiffLongest.splice(0, 1);
i = i + i3;
j = j2;
i++;
j++;
break;
}
else {
if(aDiffLongest.length == 0) {
lastDiffResultType = "BDIFF";
}
else{
lastDiffResultType = "ADIFF";
}
lastDiffResultType = "DIFF";
}
}
if(!foundCloser) {
for (var c = aDiffLongest.length - 1; c > 0; c--) {
aDiffTmp.push(aDiffLongest[0]);
aDiffLongest.splice(0, 1);
}
for (var c = j ; c < j2; c++) {
bDiffTmp.push(bValues[c]);
}
if(aDiffTmp.length > 0) {
aResult.push({"item": aDiffTmp, "type":"diff"})
printLog("adiff:" + aDiffTmp + "");
}
if(bDiffTmp.length > 0) {
bResult.push({"item": bDiffTmp, "type":"diff"});
printLog("bdiff:" + bDiffTmp + "");
}
var eqItem = aDiffLongest[0];
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
bDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(eqItem);
printLog("equal:" + eqItem +"
");
aDiffLongest.splice(0, 1);
i = i2 - i3;
j = j2;
i++;
j++;
lastDiffResultType = "ADIFF";
continue;
}
}
return {"a": aResult, "b":bResult};
}
turnOffLogSwitch();
var aText = "今天 是个 好天气";
var bText = "今天, 真是 一个 好阴天啊";
diffWithWhitespaceAndAppendBody(aText, bText);
aText = "ParseException";
bText = "RuntimeException";
diffWithWhitespaceAndAppendBody(aText, bText);
function diffWithWhitespaceAndAppendBody(a, b) {
printLog(aText);
printLog("");
printLog(bText);
printLog("compare result:
");
var diffResult = diffWithWhitespace(aText, bText);
document.write("A SIDE:
")
diffResult.a.forEach(function (r) {
writeDiffResult(r);
});
document.write("B SIDE:
")
diffResult.b.forEach(function (r) {
writeDiffResult(r);
});
}
function writeDiffResult(structText) {
var item = structText.item.join("");
if(structText.type != "equal") {
item = "" + item + "";
}
document.write(item);
}
function printLog(msg) {
if(logSwitch == 1) {
document.write(msg);
}
}
function turnOnLogSwitch() {
logSwitch = 1;
}
function turnOffLogSwitch() {
logSwitch = 0;
}
script>
head>
<body>
body>
html>
DOCTYPE html>
DOCTYPE html>
<html>
<head>
<title>diff function testtitle>
<script type="text/javascript">
// 思路1: 使用双指针暴力解法
// 1. 先扫描a, 直到一个与b[j]相同的元素为止, 保存为aQueue
// 2. 再扫描b, 每次的查找范围为aQueue
// 3. 如果找到, 则进行接下来的最长匹配
// 4. 如果没有找到, 则让a进行最长匹配
// 5. 进入下一轮循环,直到a循环为止
// 时间复杂度: a:m, b:n, a1:m, b1:m(m+1)/2*n, ∴ O(m2n)
var logSwitch = 1;
function diffWithWhitespace(a, b) {
var aValues = a.split("");
var bValues = b.split("");
var alen = aValues.length;
var blen = bValues.length;
var i = 0, j = 0;
var equalLongest = [];
var aDiffLongest = [];
var aResult = [];
var bResult = [];
// 上一次比较结果, EQUAL:相等, ADIFF:A, BDIFF:B
var lastDiffResultType = "EQUAL";
while(i < alen || j < blen) {
if(aValues[i] == bValues[j]) {
// todo: 记录结果, 到a中,b中
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(aValues[i]);
printLog("equal:" + aValues[i] + "
");
i++;
j++;
continue;
}
var i2 = i, j2 = j;
aDiffLongest = [];
while((i2) < alen
&& aValues[i2] != bValues[j2]) {
aDiffLongest.push(aValues[i2]);
++i2;
}
var aDiffTmp = [];
var bDiffTmp = [];
if(i2 > alen) {
// no equal find
// the last one
}
else if(i2 != alen && aValues[i2] == bValues[j]) {
aDiffLongest.push(aValues[i2]);
}
else{
if(j >= blen) {
aDiffTmp.push(aValues[i]);
// the last one
aResult.push({"item": aDiffTmp, "type":"diff"});
i++;
continue;
}
// a中未找到,全部退回到b中进行查找
bDiffTmp.push(bValues[j]);
bResult.push({"item": bDiffTmp, "type":"diff"})
// 去重相同项,也同时跳过上一相同的项
while(++j2 < blen
&& bValues[j2] == bValues[j]) {
bDiffTmp.push(bValues[j2]);
j = j2;
}
printLog("bdiff:" + bDiffTmp + "
");
j++;
lastDiffResultType = "BDIFF";
continue;
}
var curMaxStep = aDiffLongest.length;
var foundCloser = 0;
while(++j2 < blen && curMaxStep-- > 0) {
var i3 = 0;
for (;i3 < aDiffLongest.length; i3++) {
if(bValues[j2] == aDiffLongest[i3]) {
// 相同段
foundCloser = 1;
break;
}
}
if(foundCloser == 1) {
for (var c = i; c < i + i3; c++) {
aDiffTmp.push(aValues[c]);
}
for (var c = j ; c < j2; c++) {
bDiffTmp.push(bValues[c]);
}
if(aDiffTmp.length > 0) {
aResult.push({"item": aDiffTmp, "type":"diff"});
printLog("adiff:" + aDiffTmp + "");
}
if(bDiffTmp.length > 0) {
bResult.push({"item": bDiffTmp, "type":"diff"});
printLog("bdiff:" + bDiffTmp + "");
}
var eqItem = bValues[j2];
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(eqItem);
printLog("equal:" + eqItem +"
");
aDiffLongest.splice(0, 1);
i = i + i3;
j = j2;
i++;
j++;
break;
}
else {
if(aDiffLongest.length == 0) {
lastDiffResultType = "BDIFF";
}
else{
lastDiffResultType = "ADIFF";
}
lastDiffResultType = "DIFF";
}
}
if(!foundCloser) {
for (var c = aDiffLongest.length - 1; c > 0; c--) {
aDiffTmp.push(aDiffLongest[0]);
aDiffLongest.splice(0, 1);
}
for (var c = j ; c < j2; c++) {
bDiffTmp.push(bValues[c]);
}
if(aDiffTmp.length > 0) {
aResult.push({"item": aDiffTmp, "type":"diff"})
printLog("adiff:" + aDiffTmp + "");
}
if(bDiffTmp.length > 0) {
bResult.push({"item": bDiffTmp, "type":"diff"});
printLog("bdiff:" + bDiffTmp + "");
}
var eqItem = aDiffLongest[0];
if(lastDiffResultType != "EQUAL"
|| equalLongest.length == 0) {
equalLongest = [];
aDiffLongest = [];
bDiffLongest = [];
lastDiffResultType = "EQUAL";
aResult.push({"item": equalLongest, "type":"equal"})
bResult.push({"item": equalLongest, "type":"equal"})
}
equalLongest.push(eqItem);
printLog("equal:" + eqItem +"
");
aDiffLongest.splice(0, 1);
i = i2 - i3;
j = j2;
i++;
j++;
lastDiffResultType = "ADIFF";
continue;
}
}
return {"a": aResult, "b":bResult};
}
turnOffLogSwitch();
var aText = "今天 是个 好天气";
var bText = "今天, 真是 一个 好阴天啊";
diffWithWhitespaceAndAppendBody(aText, bText);
aText = "ParseException";
bText = "RuntimeException";
diffWithWhitespaceAndAppendBody(aText, bText);
function diffWithWhitespaceAndAppendBody(a, b) {
printLog(aText);
printLog("");
printLog(bText);
printLog("compare result:
");
var diffResult = diffWithWhitespace(aText, bText);
document.write("A SIDE:
")
diffResult.a.forEach(function (r) {
writeDiffResult(r);
});
document.write("B SIDE:
")
diffResult.b.forEach(function (r) {
writeDiffResult(r);
});
}
function writeDiffResult(structText) {
var item = structText.item.join("");
if(structText.type != "equal") {
item = "" + item + "";
}
document.write(item);
}
function printLog(msg) {
if(logSwitch == 1) {
document.write(msg);
}
}
function turnOnLogSwitch() {
logSwitch = 1;
}
function turnOffLogSwitch() {
logSwitch = 0;
}
script>
head>
<body>
body>
html>
算法属于暴力解法,简单使用了双指针法,没有太多技巧,需要进一步优化。
3. 一点闲话
需要注意的量,虽然样子很像最长公共子序列的命题,但却并不是一回事。供参考。
与beyond compare软件结果相比,还是不太准确,最长匹配这个原则还没有体现好。另外,对于多行型 的字符串比较,并没有给出参考,但一般的,多行会被当作整体处理,行与行之间都有单字符类的比较。