Add Diff Template Library 1.20.

This commit is contained in:
Bartosz Taudul 2022-12-23 18:47:10 +01:00
parent bd06d4e104
commit cbc85003f7
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3
8 changed files with 1543 additions and 0 deletions

706
dtl/Diff.hpp Normal file
View File

@ -0,0 +1,706 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF_H
#define DTL_DIFF_H
namespace dtl {
/**
* diff class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff
{
private :
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
size_t M;
size_t N;
size_t delta;
size_t offset;
long long *fp;
long long editDistance;
Lcs< elem > lcs;
Ses< elem > ses;
editPath path;
editPathCordinates pathCordinates;
bool swapped;
bool huge;
bool trivial;
bool editDistanceOnly;
uniHunkVec uniHunks;
comparator cmp;
long long ox;
long long oy;
public :
Diff () {}
Diff (const sequence& a,
const sequence& b) : A(a), B(b), ses(false) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
init();
}
Diff (const sequence& a,
const sequence& b,
const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deleteFirst,
const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
init();
}
~Diff() {}
long long getEditDistance () const {
return editDistance;
}
Lcs< elem > getLcs () const {
return lcs;
}
elemVec getLcsVec () const {
return lcs.getSequence();
}
Ses< elem > getSes () const {
return ses;
}
uniHunkVec getUniHunks () const {
return uniHunks;
}
/* These should be deprecated */
bool isHuge () const {
return huge;
}
void onHuge () {
this->huge = true;
}
void offHuge () {
this->huge = false;
}
bool isUnserious () const {
return trivial;
}
void onUnserious () {
this->trivial = true;
}
void offUnserious () {
this->trivial = false;
}
void onOnlyEditDistance () {
this->editDistanceOnly = true;
}
/* These are the replacements for the above */
bool hugeEnabled () const {
return huge;
}
void enableHuge () {
this->huge = true;
}
void disableHuge () {
this->huge = false;
}
bool trivialEnabled () const {
return trivial;
}
void enableTrivial () const {
this->trivial = true;
}
void disableTrivial () {
this->trivial = false;
}
void editDistanceOnlyEnabled () {
this->editDistanceOnly = true;
}
/**
* patching with Unified Format Hunks
*/
sequence uniPatch (const sequence& seq) {
elemList seqLst(seq.begin(), seq.end());
sesElemVec shunk;
sesElemVec_iter vsesIt;
elemList_iter lstIt = seqLst.begin();
long long inc_dec_total = 0;
long long gap = 1;
for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
joinSesVec(shunk, it->common[0]);
joinSesVec(shunk, it->change);
joinSesVec(shunk, it->common[1]);
it->a += inc_dec_total;
inc_dec_total += it->inc_dec_count;
for (long long i=0;i<it->a - gap;++i) {
++lstIt;
}
gap = it->a + it->b + it->inc_dec_count;
vsesIt = shunk.begin();
while (vsesIt!=shunk.end()) {
switch (vsesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, vsesIt->first);
break;
case SES_DELETE :
if (lstIt != seqLst.end()) {
lstIt = seqLst.erase(lstIt);
}
break;
case SES_COMMON :
if (lstIt != seqLst.end()) {
++lstIt;
}
break;
default :
// no fall-through
break;
}
++vsesIt;
}
shunk.clear();
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* patching with Shortest Edit Script (SES)
*/
sequence patch (const sequence& seq) const {
sesElemVec sesSeq = ses.getSequence();
elemList seqLst(seq.begin(), seq.end());
elemList_iter lstIt = seqLst.begin();
for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
switch (sesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, sesIt->first);
break;
case SES_DELETE :
lstIt = seqLst.erase(lstIt);
break;
case SES_COMMON :
++lstIt;
break;
default :
// no through
break;
}
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* compose Longest Common Subsequence and Shortest Edit Script.
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
* described by Sun Wu, Udi Manber and Gene Myers
*/
void compose() {
if (isHuge()) {
pathCordinates.reserve(MAX_CORDINATES_SIZE);
}
ox = 0;
oy = 0;
long long p = -1;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
path = editPath(M + N + 3);
fill(path.begin(), path.end(), -1);
ONP:
do {
++p;
for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
} while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
editDistance += static_cast<long long>(delta) + 2 * p;
long long r = path[delta+offset];
P cordinate;
editPathCordinates epc(0);
// recording edit distance only
if (editDistanceOnly) {
delete[] this->fp;
return;
}
while(r != -1) {
cordinate.x = pathCordinates[(size_t)r].x;
cordinate.y = pathCordinates[(size_t)r].y;
epc.push_back(cordinate);
r = pathCordinates[(size_t)r].k;
}
// record Longest Common Subsequence & Shortest Edit Script
if (!recordSequence(epc)) {
pathCordinates.resize(0);
epc.resize(0);
p = -1;
goto ONP;
}
delete[] this->fp;
}
/**
* print difference between A and B as an SES
*/
template < typename stream >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
void printSES (ostream& out = cout) const {
printSES< ostream >(out);
}
/**
* print differences given an SES
*/
template < typename stream >
static void printSES (const Ses< elem >& s, stream& out) {
sesElemVec ses_v = s.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
static void printSES (const Ses< elem >& s, ostream& out = cout) {
printSES< ostream >(s, out);
}
/**
* print difference between A and B as an SES with custom printer
*/
template < typename stream, template < typename SEET, typename STRT > class PT >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence ();
for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
}
/**
* store difference between A and B as an SES with custom storage
*/
template < typename storedData, template < typename SEET, typename STRT > class ST >
void storeSES(storedData& sd) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ST < sesElem, storedData >(sd));
}
/**
* print difference between A and B in the Unified Format
*/
template < typename stream >
void printUnifiedFormat (stream& out) const {
for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
}
void printUnifiedFormat (ostream& out = cout) const {
printUnifiedFormat< ostream >(out);
}
/**
* print unified format difference with given unified format hunks
*/
template < typename stream >
static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
}
static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
printUnifiedFormat< ostream >(hunks, out);
}
/**
* compose Unified Format Hunks from Shortest Edit Script
*/
void composeUnifiedHunks () {
sesElemVec common[2];
sesElemVec change;
sesElemVec ses_v = ses.getSequence();
long long l_cnt = 1;
long long length = distance(ses_v.begin(), ses_v.end());
long long middle = 0;
bool isMiddle, isAfter;
elemInfo einfo;
long long a, b, c, d; // @@ -a,b +c,d @@
long long inc_dec_count = 0;
uniHunk< sesElem > hunk;
sesElemVec adds;
sesElemVec deletes;
isMiddle = isAfter = false;
a = b = c = d = 0;
for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
einfo = it->second;
switch (einfo.type) {
case SES_ADD :
middle = 0;
++inc_dec_count;
adds.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++d;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_DELETE :
middle = 0;
--inc_dec_count;
deletes.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++b;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_COMMON :
++b;++d;
if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
if (a == 0 && c == 0) {
if (!wasSwapped()) {
a = einfo.beforeIdx;
c = einfo.afterIdx;
} else {
a = einfo.afterIdx;
c = einfo.beforeIdx;
}
}
common[0].push_back(*it);
} else {
rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
common[0].pop_back();
common[0].push_back(*it);
++a;++c;
--b;--d;
}
}
if (isMiddle && !isAfter) {
++middle;
joinSesVec(change, deletes);
joinSesVec(change, adds);
change.push_back(*it);
if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
isAfter = true;
}
adds.clear();
deletes.clear();
}
break;
default :
// no through
break;
}
// compose unified format hunk
if (isAfter && !change.empty()) {
sesElemVec_iter cit = it;
long long cnt = 0;
for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
if (cit->second.type == SES_COMMON) {
++cnt;
}
}
if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
middle = 0;
isAfter = false;
continue;
}
if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
long long c0size = static_cast<long long>(common[0].size());
rotate(common[0].begin(),
common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
common[0].end());
for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
common[0].pop_back();
}
a += c0size - DTL_SEPARATE_SIZE;
c += c0size - DTL_SEPARATE_SIZE;
}
if (a == 0) ++a;
if (c == 0) ++c;
if (wasSwapped()) swap(a, c);
hunk.a = a;
hunk.b = b;
hunk.c = c;
hunk.d = d;
hunk.common[0] = common[0];
hunk.change = change;
hunk.common[1] = common[1];
hunk.inc_dec_count = inc_dec_count;
uniHunks.push_back(hunk);
isMiddle = false;
isAfter = false;
common[0].clear();
common[1].clear();
adds.clear();
deletes.clear();
change.clear();
a = b = c = d = middle = inc_dec_count = 0;
}
}
}
/**
* compose ses from stream
*/
template <typename stream>
static Ses< elem > composeSesFromStream (stream& st)
{
elem line;
Ses< elem > ret;
long long x_idx, y_idx;
x_idx = y_idx = 1;
while (getline(st, line)) {
elem mark(line.begin(), line.begin() + 1);
elem e(line.begin() + 1, line.end());
if (mark == SES_MARK_DELETE) {
ret.addSequence(e, x_idx, 0, SES_DELETE);
++x_idx;
} else if (mark == SES_MARK_ADD) {
ret.addSequence(e, y_idx, 0, SES_ADD);
++y_idx;
} else if (mark == SES_MARK_COMMON) {
ret.addSequence(e, x_idx, y_idx, SES_COMMON);
++x_idx;
++y_idx;
}
}
return ret;
}
private :
/**
* initialize
*/
void init () {
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
if (M < N) {
swapped = false;
} else {
swap(A, B);
swap(M, N);
swapped = true;
}
editDistance = 0;
delta = N - M;
offset = M + 1;
huge = false;
trivial = false;
editDistanceOnly = false;
fp = NULL;
}
/**
* search shortest path and record the path
*/
long long snake(const long long& k, const long long& above, const long long& below) {
long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
long long y = max(above, below);
long long x = y - k;
while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
++x;++y;
}
path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
if (!editDistanceOnly) {
P p;
p.x = x;p.y = y;p.k = r;
pathCordinates.push_back(p);
}
return y;
}
/**
* record SES and LCS
*/
bool recordSequence (const editPathCordinates& v) {
sequence_const_iter x(A.begin());
sequence_const_iter y(B.begin());
long long x_idx, y_idx; // line number for Unified Format
long long px_idx, py_idx; // cordinates
bool complete = false;
x_idx = y_idx = 1;
px_idx = py_idx = 0;
for (size_t i=v.size()-1;!complete;--i) {
while(px_idx < v[i].x || py_idx < v[i].y) {
if (v[i].y - v[i].x > py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*y, 0, y_idx + oy, SES_ADD);
} else {
ses.addSequence(*y, y_idx + oy, 0, SES_DELETE);
}
++y;
++y_idx;
++py_idx;
} else if (v[i].y - v[i].x < py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*x, x_idx + ox, 0, SES_DELETE);
} else {
ses.addSequence(*x, 0, x_idx + ox, SES_ADD);
}
++x;
++x_idx;
++px_idx;
} else {
if (!wasSwapped()) {
lcs.addSequence(*x);
ses.addSequence(*x, x_idx + ox, y_idx + oy, SES_COMMON);
} else {
lcs.addSequence(*y);
ses.addSequence(*y, y_idx + oy, x_idx + ox, SES_COMMON);
}
++x;
++y;
++x_idx;
++y_idx;
++px_idx;
++py_idx;
}
}
if (i == 0) complete = true;
}
if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
// all recording succeeded
} else {
// trivial difference
if (trivialEnabled()) {
if (!wasSwapped()) {
recordOddSequence(x_idx, M, x, SES_DELETE);
recordOddSequence(y_idx, N, y, SES_ADD);
} else {
recordOddSequence(x_idx, M, x, SES_ADD);
recordOddSequence(y_idx, N, y, SES_DELETE);
}
return true;
}
// nontrivial difference
sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
A = A_;
B = B_;
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
delta = N - M;
offset = M + 1;
delete[] fp;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
fill(path.begin(), path.end(), -1);
ox = x_idx - 1;
oy = y_idx - 1;
return false;
}
return true;
}
/**
* record odd sequence in SES
*/
void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
while(idx < length){
ses.addSequence(*it, idx, 0, et);
++it;
++idx;
++editDistance;
}
ses.addSequence(*it, idx, 0, et);
++editDistance;
}
/**
* join SES vectors
*/
void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
if (!s2.empty()) {
for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if the sequences have been swapped
*/
bool inline wasSwapped () const {
return swapped;
}
};
}
#endif // DTL_DIFF_H

245
dtl/Diff3.hpp Normal file
View File

@ -0,0 +1,245 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF3_H
#define DTL_DIFF3_H
namespace dtl {
/**
* diff3 class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff3
{
private:
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
sequence C;
sequence S;
Diff< elem, sequence, comparator > diff_ba;
Diff< elem, sequence, comparator > diff_bc;
bool conflict;
elem csepabegin;
elem csepa;
elem csepaend;
public :
Diff3 () {}
Diff3 (const sequence& a,
const sequence& b,
const sequence& c) : A(a), B(b), C(c),
diff_ba(b, a), diff_bc(b, c),
conflict(false) {}
~Diff3 () {}
bool isConflict () const {
return conflict;
}
sequence getMergedSequence () const {
return S;
}
/**
* merge changes B and C into A
*/
bool merge () {
if (diff_ba.getEditDistance() == 0) { // A == B
if (diff_bc.getEditDistance() == 0) { // A == B == C
S = B;
return true;
}
S = C;
return true;
} else { // A != B
if (diff_bc.getEditDistance() == 0) { // A != B == C
S = A;
return true;
} else { // A != B != C
S = merge_();
if (isConflict()) { // conflict occured
return false;
}
}
}
return true;
}
/**
* compose differences
*/
void compose () {
diff_ba.compose();
diff_bc.compose();
}
private :
/**
* merge implementation
*/
sequence merge_ () {
elemVec seq;
Ses< elem > ses_ba = diff_ba.getSes();
Ses< elem > ses_bc = diff_bc.getSes();
sesElemVec ses_ba_v = ses_ba.getSequence();
sesElemVec ses_bc_v = ses_bc.getSequence();
sesElemVec_iter ba_it = ses_ba_v.begin();
sesElemVec_iter bc_it = ses_bc_v.begin();
sesElemVec_iter ba_end = ses_ba_v.end();
sesElemVec_iter bc_end = ses_bc_v.end();
while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
while (true) {
if (!isEnd(ba_end, ba_it) &&
!isEnd(bc_end, bc_it) &&
ba_it->first == bc_it->first &&
ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_COMMON) {
// do nothing
} else {
break;
}
if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
}
if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
if ( ba_it->second.type == SES_COMMON
&& bc_it->second.type == SES_DELETE) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_ADD) {
seq.push_back(bc_it->first);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_COMMON) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_DELETE) {
if (ba_it->first == bc_it->first) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_ADD) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_COMMON) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_DELETE) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_ADD) {
if (ba_it->first == bc_it->first) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
}
}
if (isEnd(ba_end, ba_it)) {
addDecentSequence(bc_end, bc_it, seq);
} else if (isEnd(bc_end, bc_it)) {
addDecentSequence(ba_end, ba_it, seq);
}
sequence mergedSeq(seq.begin(), seq.end());
return mergedSeq;
}
/**
* join elem vectors
*/
void inline joinElemVec (elemVec& s1, elemVec& s2) const {
if (!s2.empty()) {
for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if sequence is at end
*/
template <typename T_iter>
bool inline isEnd (const T_iter& end, const T_iter& it) const {
return it == end ? true : false;
}
/**
* increment iterator until iterator is at end
*/
template <typename T_iter>
void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
if (!isEnd(end, it)) ++it;
}
/**
* add elements whose SES's type is ADD
*/
void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
while (!isEnd(end, it)) {
if (it->second.type == SES_ADD) seq.push_back(it->first);
++it;
}
}
};
}
#endif // DTL_DIFF3_H

55
dtl/Lcs.hpp Normal file
View File

@ -0,0 +1,55 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_LCS_H
#define DTL_LCS_H
namespace dtl {
/**
* Longest Common Subsequence template class
*/
template <typename elem>
class Lcs : public Sequence< elem >
{
public :
Lcs () {}
~Lcs () {}
};
}
#endif // DTL_LCS_H

65
dtl/Sequence.hpp Normal file
View File

@ -0,0 +1,65 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SEQUENCE_H
#define DTL_SEQUENCE_H
namespace dtl {
/**
* sequence class template
*/
template <typename elem>
class Sequence
{
public :
typedef vector< elem > elemVec;
Sequence () {}
virtual ~Sequence () {}
elemVec getSequence () const {
return sequence;
}
void addSequence (elem e) {
sequence.push_back(e);
}
protected :
elemVec sequence;
};
}
#endif // DTL_SEQUENCE_H

132
dtl/Ses.hpp Normal file
View File

@ -0,0 +1,132 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SES_H
#define DTL_SES_H
namespace dtl {
/**
* Shortest Edit Script template class
*/
template <typename elem>
class Ses : public Sequence< elem >
{
private :
typedef pair< elem, elemInfo > sesElem;
typedef vector< sesElem > sesElemVec;
public :
Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) {
nextDeleteIdx = 0;
}
Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) {
nextDeleteIdx = 0;
}
~Ses () {}
bool isOnlyAdd () const {
return onlyAdd;
}
bool isOnlyDelete () const {
return onlyDelete;
}
bool isOnlyCopy () const {
return onlyCopy;
}
bool isOnlyOneOperation () const {
return isOnlyAdd() || isOnlyDelete() || isOnlyCopy();
}
bool isChange () const {
return !onlyCopy;
}
using Sequence< elem >::addSequence;
void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
elemInfo info;
info.beforeIdx = beforeIdx;
info.afterIdx = afterIdx;
info.type = type;
sesElem pe(e, info);
if (!deletesFirst) {
sequence.push_back(pe);
}
switch (type) {
case SES_DELETE:
onlyCopy = false;
onlyAdd = false;
if (deletesFirst) {
sequence.insert(sequence.begin() + nextDeleteIdx, pe);
nextDeleteIdx++;
}
break;
case SES_COMMON:
onlyAdd = false;
onlyDelete = false;
if (deletesFirst) {
sequence.push_back(pe);
nextDeleteIdx = sequence.size();
}
break;
case SES_ADD:
onlyDelete = false;
onlyCopy = false;
if (deletesFirst) {
sequence.push_back(pe);
}
break;
}
}
sesElemVec getSequence () const {
return sequence;
}
private :
sesElemVec sequence;
bool onlyAdd;
bool onlyDelete;
bool onlyCopy;
bool deletesFirst;
size_t nextDeleteIdx;
};
}
#endif // DTL_SES_H

47
dtl/dtl.hpp Normal file
View File

@ -0,0 +1,47 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DTL_H
#define DTL_H
#include "variables.hpp"
#include "functors.hpp"
#include "Sequence.hpp"
#include "Lcs.hpp"
#include "Ses.hpp"
#include "Diff.hpp"
#include "Diff3.hpp"
#endif // DTL_H

151
dtl/functors.hpp Normal file
View File

@ -0,0 +1,151 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_FUNCTORS_H
#define DTL_FUNCTORS_H
namespace dtl {
/**
* printer class template
*/
template <typename sesElem, typename stream = ostream >
class Printer
{
public :
Printer () : out_(cout) {}
Printer (stream& out) : out_(out) {}
virtual ~Printer () {}
virtual void operator() (const sesElem& se) const = 0;
protected :
stream& out_;
};
/**
* common element printer class template
*/
template <typename sesElem, typename stream = ostream >
class CommonPrinter : public Printer < sesElem, stream >
{
public :
CommonPrinter () : Printer < sesElem, stream > () {}
CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
~CommonPrinter () {}
void operator() (const sesElem& se) const {
this->out_ << SES_MARK_COMMON << se.first << endl;
}
};
/**
* ses element printer class template
*/
template <typename sesElem, typename stream = ostream >
class ChangePrinter : public Printer < sesElem, stream >
{
public :
ChangePrinter () : Printer < sesElem, stream > () {}
ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
~ChangePrinter () {}
void operator() (const sesElem& se) const {
switch (se.second.type) {
case SES_ADD:
this->out_ << SES_MARK_ADD << se.first << endl;
break;
case SES_DELETE:
this->out_ << SES_MARK_DELETE << se.first << endl;
break;
case SES_COMMON:
this->out_ << SES_MARK_COMMON << se.first << endl;
break;
}
}
};
/**
* unified format element printer class template
*/
template <typename sesElem, typename stream = ostream >
class UniHunkPrinter
{
public :
UniHunkPrinter () : out_(cout) {}
UniHunkPrinter (stream& out) : out_(out) {}
~UniHunkPrinter () {}
void operator() (const uniHunk< sesElem >& hunk) const {
out_ << "@@"
<< " -" << hunk.a << "," << hunk.b
<< " +" << hunk.c << "," << hunk.d
<< " @@" << endl;
for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
}
private :
stream& out_;
};
/**
* storage class template
*/
template <typename sesElem, typename storedData >
class Storage
{
public:
Storage(storedData& sd) : storedData_(sd) {}
virtual ~Storage() {}
virtual void operator() (const sesElem& se) const = 0;
protected:
storedData& storedData_;
};
/**
* compare class template
*/
template <typename elem>
class Compare
{
public :
Compare () {}
virtual ~Compare () {}
virtual inline bool impl (const elem& e1, const elem& e2) const {
return e1 == e2;
}
};
}
#endif // DTL_FUNCTORS_H

142
dtl/variables.hpp Normal file
View File

@ -0,0 +1,142 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_VARIABLES_H
#define DTL_VARIABLES_H
#include <vector>
#include <list>
#include <string>
#include <algorithm>
#include <iostream>
namespace dtl {
using std::vector;
using std::string;
using std::pair;
using std::ostream;
using std::list;
using std::for_each;
using std::distance;
using std::fill;
using std::cout;
using std::endl;
using std::rotate;
using std::swap;
using std::max;
/**
* version string
*/
const string version = "1.20";
/**
* type of edit for SES
*/
typedef int edit_t;
const edit_t SES_DELETE = -1;
const edit_t SES_COMMON = 0;
const edit_t SES_ADD = 1;
/**
* mark of SES
*/
#define SES_MARK_DELETE "-"
#define SES_MARK_COMMON " "
#define SES_MARK_ADD "+"
/**
* info for Unified Format
*/
typedef struct eleminfo {
long long beforeIdx; // index of prev sequence
long long afterIdx; // index of after sequence
edit_t type; // type of edit(Add, Delete, Common)
bool operator==(const eleminfo& other) const{
return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type);
}
} elemInfo;
const long long DTL_SEPARATE_SIZE = 3;
const long long DTL_CONTEXT_SIZE = 3;
/**
* cordinate for registering route
*/
typedef struct Point {
long long x; // x cordinate
long long y; // y cordinate
long long k; // vertex
} P;
/**
* limit of cordinate size
*/
const unsigned long long MAX_CORDINATES_SIZE = 2000000;
typedef vector< long long > editPath;
typedef vector< P > editPathCordinates;
/**
* Structure of Unified Format Hunk
*/
template <typename sesElem>
struct uniHunk {
long long a, b, c, d; // @@ -a,b +c,d @@
vector< sesElem > common[2]; // anteroposterior commons on changes
vector< sesElem > change; // changes
long long inc_dec_count; // count of increace and decrease
};
#define dtl_typedefs(elem, sequence) \
typedef pair< elem, elemInfo > sesElem; \
typedef vector< sesElem > sesElemVec; \
typedef vector< uniHunk< sesElem > > uniHunkVec; \
typedef list< elem > elemList; \
typedef vector< elem > elemVec; \
typedef typename uniHunkVec::iterator uniHunkVec_iter; \
typedef typename sesElemVec::iterator sesElemVec_iter; \
typedef typename elemList::iterator elemList_iter; \
typedef typename sequence::iterator sequence_iter; \
typedef typename sequence::const_iterator sequence_const_iter; \
typedef typename elemVec::iterator elemVec_iter;
}
#endif // DTL_VARIABLES_H