001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.util.DiffResult;
018    import com.liferay.portal.kernel.util.FileUtil;
019    import com.liferay.portal.kernel.util.StringBundler;
020    import com.liferay.portal.kernel.util.StringPool;
021    
022    import java.io.Reader;
023    
024    import java.util.ArrayList;
025    import java.util.List;
026    
027    import org.incava.util.diff.Diff;
028    import org.incava.util.diff.Difference;
029    
030    /**
031     * This class can compare two different versions of a text. Source refers to the
032     * earliest version of the text and target refers to a modified version of
033     * source. Changes are considered either as a removal from the source or as an
034     * addition to the target. This class detects changes to an entire line and also
035     * detects changes within lines, such as, removal or addition of characters.
036     * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
037     *
038     * @author Bruno Farache
039     */
040    public class DiffImpl implements com.liferay.portal.kernel.util.Diff {
041    
042            /**
043             * This is a diff method with default values.
044             *
045             * @param  source the source text
046             * @param  target the modified version of the source text
047             * @return an array containing two lists of <code>DiffResults</code>, the
048             *         first element contains DiffResults related to changes in source
049             *         and the second element to changes in target
050             */
051            public List<DiffResult>[] diff(Reader source, Reader target) {
052                    int margin = 2;
053    
054                    return diff(
055                            source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
056            }
057    
058            /**
059             * The main entrance of this class. This method will compare the two texts,
060             * highlight the changes by enclosing them with markers and return a list of
061             * <code>DiffResults</code>.
062             *
063             * @param  source the source text
064             * @param  target the modified version of the source text
065             * @param  addedMarkerStart the marker to indicate the start of text added
066             *         to the source
067             * @param  addedMarkerEnd the marker to indicate the end of text added to
068             *         the source
069             * @param  deletedMarkerStart the marker to indicate the start of text
070             *         deleted from the source
071             * @param  deletedMarkerEnd the marker to indicate the end of text deleted
072             *         from the source
073             * @param  margin the vertical margin to use in displaying differences
074             *         between changed line changes
075             * @return an array containing two lists of <code>DiffResults</code>, the
076             *         first element contains DiffResults related to changes in source
077             *         and the second element to changes in target
078             */
079            public List<DiffResult>[] diff(
080                    Reader source, Reader target, String addedMarkerStart,
081                    String addedMarkerEnd, String deletedMarkerStart,
082                    String deletedMarkerEnd, int margin) {
083    
084                    List<DiffResult> sourceResults = new ArrayList<DiffResult>();
085                    List<DiffResult> targetResults = new ArrayList<DiffResult>();
086    
087                    List<DiffResult>[] results = new List[] {sourceResults, targetResults};
088    
089                    // Convert the texts to Lists where each element are lines of the texts.
090    
091                    List<String> sourceStringList = FileUtil.toList(source);
092                    List<String> targetStringList = FileUtil.toList(target);
093    
094                    // Make a a Diff of these lines and iterate over their Differences.
095    
096                    Diff diff = new Diff(sourceStringList, targetStringList);
097    
098                    List<Difference> differences = diff.diff();
099    
100                    for (Difference difference : differences) {
101                            if (difference.getAddedEnd() == Difference.NONE) {
102    
103                                    // Lines were deleted from source only.
104    
105                                    _highlightLines(
106                                            sourceStringList, deletedMarkerStart, deletedMarkerEnd,
107                                            difference.getDeletedStart(), difference.getDeletedEnd());
108    
109                                    margin = _calculateMargin(
110                                            sourceResults, targetResults, difference.getDeletedStart(),
111                                            difference.getAddedStart(), margin);
112    
113                                    List<String> changedLines = _addMargins(
114                                            sourceResults, sourceStringList,
115                                            difference.getDeletedStart(), margin);
116    
117                                    _addResults(
118                                            sourceResults, sourceStringList, changedLines,
119                                            difference.getDeletedStart(), difference.getDeletedEnd());
120    
121                                    changedLines = _addMargins(
122                                            targetResults, targetStringList, difference.getAddedStart(),
123                                            margin);
124    
125                                    int deletedLines =
126                                            difference.getDeletedEnd() + 1 -
127                                                    difference.getDeletedStart();
128    
129                                    for (int i = 0; i < deletedLines; i++) {
130                                            changedLines.add(CONTEXT_LINE);
131                                    }
132    
133                                    DiffResult diffResult = new DiffResult(
134                                            difference.getDeletedStart(), changedLines);
135    
136                                    targetResults.add(diffResult);
137                            }
138                            else if (difference.getDeletedEnd() == Difference.NONE) {
139    
140                                    // Lines were added to target only.
141    
142                                    _highlightLines(
143                                            targetStringList, addedMarkerStart, addedMarkerEnd,
144                                            difference.getAddedStart(), difference.getAddedEnd());
145    
146                                    margin = _calculateMargin(
147                                            sourceResults, targetResults, difference.getDeletedStart(),
148                                            difference.getAddedStart(), margin);
149    
150                                    List<String> changedLines = _addMargins(
151                                            sourceResults, sourceStringList,
152                                            difference.getDeletedStart(), margin);
153    
154                                    int addedLines =
155                                            difference.getAddedEnd() + 1 - difference.getAddedStart();
156    
157                                    for (int i = 0; i < addedLines; i++) {
158                                            changedLines.add(CONTEXT_LINE);
159                                    }
160    
161                                    DiffResult diffResult = new DiffResult(
162                                            difference.getAddedStart(), changedLines);
163    
164                                    sourceResults.add(diffResult);
165    
166                                    changedLines = _addMargins(
167                                            targetResults, targetStringList, difference.getAddedStart(),
168                                            margin);
169    
170                                    _addResults(
171                                            targetResults, targetStringList, changedLines,
172                                            difference.getAddedStart(), difference.getAddedEnd());
173                            }
174                            else {
175    
176                                    // Lines were deleted from source and added to target at the
177                                    // same position. It needs to check for characters differences.
178    
179                                    _checkCharDiffs(
180                                            sourceResults, targetResults, sourceStringList,
181                                            targetStringList, addedMarkerStart, addedMarkerEnd,
182                                            deletedMarkerStart, deletedMarkerEnd, difference, margin);
183                            }
184                    }
185    
186                    return results;
187            }
188    
189            private static List<String> _addMargins(
190                    List<DiffResult> results, List<String> stringList, int startPos,
191                    int margin) {
192    
193                    List<String> changedLines = new ArrayList<String>();
194    
195                    if ((margin == 0) || (startPos == 0)) {
196                            return changedLines;
197                    }
198    
199                    int i = startPos - margin;
200    
201                    for (; i < 0; i++) {
202                            changedLines.add(CONTEXT_LINE);
203                    }
204    
205                    for (; i < startPos; i++) {
206                            if (i < stringList.size()) {
207                                    changedLines.add(stringList.get(i));
208                            }
209                    }
210    
211                    return changedLines;
212            }
213    
214            private static void _addResults(
215                    List<DiffResult> results, List<String> stringList,
216                    List<String> changedLines, int start, int end) {
217    
218                    changedLines.addAll(stringList.subList(start, end + 1));
219    
220                    DiffResult diffResult = new DiffResult(start, changedLines);
221    
222                    results.add(diffResult);
223            }
224    
225            private static int _calculateMargin(
226                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
227                    int sourceBeginPos, int targetBeginPos, int margin) {
228    
229                    int sourceMargin = _checkOverlapping(
230                            sourceResults, sourceBeginPos, margin);
231                    int targetMargin = _checkOverlapping(
232                            targetResults, targetBeginPos, margin);
233    
234                    if (sourceMargin < targetMargin) {
235                            return sourceMargin;
236                    }
237    
238                    return targetMargin;
239            }
240    
241            private static void _checkCharDiffs(
242                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
243                    List<String> sourceStringList, List<String> targetStringList,
244                    String addedMarkerStart, String addedMarkerEnd,
245                    String deletedMarkerStart, String deletedMarkerEnd,
246                    Difference difference, int margin) {
247    
248                    boolean aligned = false;
249    
250                    int i = difference.getDeletedStart();
251                    int j = difference.getAddedStart();
252    
253                    // A line with changed characters may have its position shifted some
254                    // lines above or below. These for loops will try to align these lines.
255                    // While these lines are not aligned, highlight them as either additions
256                    // or deletions.
257    
258                    for (; i <= difference.getDeletedEnd(); i++) {
259                            for (; j <= difference.getAddedEnd(); j++) {
260                                    if (!_isMaxLineLengthExceeded(
261                                                    sourceStringList.get(i), targetStringList.get(j)) &&
262                                            _lineDiff(
263                                                    sourceResults, targetResults, sourceStringList,
264                                                    targetStringList, addedMarkerStart, addedMarkerEnd,
265                                                    deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
266    
267                                            aligned = true;
268    
269                                            break;
270                                    }
271    
272                                    _highlightLines(
273                                            targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
274    
275                                    DiffResult targetResult = new DiffResult(
276                                            j, targetStringList.subList(j, j + 1));
277    
278                                    targetResults.add(targetResult);
279    
280                                    sourceResults.add(new DiffResult(j, CONTEXT_LINE));
281                            }
282    
283                            if (aligned) {
284                                    break;
285                            }
286    
287                            _highlightLines(
288                                    sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
289    
290                            DiffResult sourceResult = new DiffResult(
291                                    i, sourceStringList.subList(i, i + 1));
292    
293                            sourceResults.add(sourceResult);
294    
295                            targetResults.add(new DiffResult(i, CONTEXT_LINE));
296                    }
297    
298                    i = i + 1;
299                    j = j + 1;
300    
301                    // Lines are aligned, check for differences of the following lines.
302    
303                    for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
304                                    i++, j++) {
305    
306                            if (!_isMaxLineLengthExceeded(
307                                            sourceStringList.get(i), targetStringList.get(j))) {
308    
309                                    _lineDiff(
310                                            sourceResults, targetResults, sourceStringList,
311                                            targetStringList, addedMarkerStart, addedMarkerEnd,
312                                            deletedMarkerStart, deletedMarkerEnd, i, j, true);
313                            }
314                            else {
315                                    _highlightLines(
316                                            sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
317                                            i);
318    
319                                    DiffResult sourceResult = new DiffResult(
320                                            i, sourceStringList.subList(i, i + 1));
321    
322                                    sourceResults.add(sourceResult);
323    
324                                    targetResults.add(new DiffResult(i, CONTEXT_LINE));
325    
326                                    _highlightLines(
327                                            targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
328    
329                                    DiffResult targetResult = new DiffResult(
330                                            j, targetStringList.subList(j, j + 1));
331    
332                                    targetResults.add(targetResult);
333    
334                                    sourceResults.add(new DiffResult(j, CONTEXT_LINE));
335                            }
336                    }
337    
338                    // After the for loop above, some lines might remained unchecked.
339                    // They are considered as deletions or additions.
340    
341                    for (; i <= difference.getDeletedEnd(); i++) {
342                            _highlightLines(
343                                    sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
344    
345                            DiffResult sourceResult = new DiffResult(
346                                    i, sourceStringList.subList(i, i + 1));
347    
348                            sourceResults.add(sourceResult);
349    
350                            targetResults.add(new DiffResult(i, CONTEXT_LINE));
351                    }
352    
353                    for (; j <= difference.getAddedEnd(); j++) {
354                            _highlightLines(
355                                    targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
356    
357                            DiffResult targetResult = new DiffResult(
358                                    j, targetStringList.subList(j, j + 1));
359    
360                            targetResults.add(targetResult);
361    
362                            sourceResults.add(new DiffResult(j, CONTEXT_LINE));
363                    }
364            }
365    
366            private static int _checkOverlapping(
367                    List<DiffResult> results, int startPos, int margin) {
368    
369                    if ((results.size() == 0) || ((startPos - margin) < 0)) {
370                            return margin;
371                    }
372    
373                    DiffResult lastDiff = results.get(results.size() - 1);
374    
375                    if (lastDiff.getChangedLines().size() == 0) {
376                            return margin;
377                    }
378    
379                    int lastChangedLine =
380                            (lastDiff.getLineNumber() - 1) + lastDiff.getChangedLines().size();
381    
382                    int currentChangedLine = startPos - margin;
383    
384                    if ((lastDiff.getChangedLines().size() == 1) &&
385                            lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE)) {
386    
387                            currentChangedLine = currentChangedLine + 1;
388                    }
389    
390                    if (currentChangedLine < lastChangedLine) {
391                            return margin + currentChangedLine - lastChangedLine;
392                    }
393    
394                    return margin;
395            }
396    
397            private static void _highlightChars(
398                    List<String> stringList, String markerStart, String markerEnd,
399                    int startPos, int endPos) {
400    
401                    String start = markerStart + stringList.get(startPos);
402    
403                    stringList.set(startPos, start);
404    
405                    String end = stringList.get(endPos) + markerEnd;
406    
407                    stringList.set(endPos, end);
408            }
409    
410            private static void _highlightLines(
411                    List<String> stringList, String markerStart, String markerEnd,
412                    int startPos, int endPos) {
413    
414                    for (int i = startPos; i <= endPos; i++) {
415                            stringList.set(i, markerStart + stringList.get(i) + markerEnd);
416                    }
417            }
418    
419            private static boolean _isMaxLineLengthExceeded(
420                    String sourceString, String targetString) {
421    
422                    if ((sourceString.length() > _DIFF_MAX_LINE_LENGTH) ||
423                            (targetString.length() > _DIFF_MAX_LINE_LENGTH)) {
424    
425                            return true;
426                    }
427    
428                    return false;
429            }
430    
431            private static boolean _lineDiff(
432                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
433                    List<String> sourceStringList, List<String> targetStringList,
434                    String addedMarkerStart, String addedMarkerEnd,
435                    String deletedMarkerStart, String deletedMarkerEnd,
436                    int sourceChangedLine, int targetChangedLine, boolean aligned) {
437    
438                    String source = sourceStringList.get(sourceChangedLine);
439                    String target = targetStringList.get(targetChangedLine);
440    
441                    // Convert the lines to lists where each element are chars of the lines.
442    
443                    List<String> sourceList = _toList(source);
444                    List<String> targetList = _toList(target);
445    
446                    Diff diff = new Diff(sourceList, targetList);
447    
448                    List<Difference> differences = diff.diff();
449    
450                    int deletedChars = 0;
451                    int addedChars = 0;
452    
453                    // The following while loop will calculate how many characters of
454                    // the source line need to be changed to be equals to the target line.
455    
456                    if (!aligned) {
457                            for (Difference difference : differences) {
458                                    if (difference.getDeletedEnd() != Difference.NONE) {
459                                            deletedChars +=
460                                                    (difference.getDeletedEnd() -
461                                                            difference.getDeletedStart() + 1);
462                                    }
463    
464                                    if (difference.getAddedEnd() != Difference.NONE) {
465                                            addedChars +=
466                                                    (difference.getAddedEnd() - difference.getAddedStart() +
467                                                            1);
468                                    }
469                            }
470                    }
471    
472                    // If a lot of changes were needed (more than half of the source line
473                    // length), consider this as not aligned yet.
474    
475                    if ((deletedChars > (sourceList.size() / 2)) ||
476                            (addedChars > (sourceList.size() / 2))) {
477    
478                            return false;
479                    }
480    
481                    boolean sourceChanged = false;
482                    boolean targetChanged = false;
483    
484                    // Iterate over Differences between chars of these lines.
485    
486                    for (Difference difference : differences) {
487                            if (difference.getAddedEnd() == Difference.NONE) {
488    
489                                    // Chars were deleted from source only.
490    
491                                    _highlightChars(
492                                            sourceList, deletedMarkerStart, deletedMarkerEnd,
493                                            difference.getDeletedStart(), difference.getDeletedEnd());
494    
495                                    sourceChanged = true;
496                            }
497                            else if (difference.getDeletedEnd() == Difference.NONE) {
498    
499                                    // Chars were added to target only.
500    
501                                    _highlightChars(
502                                            targetList, addedMarkerStart, addedMarkerEnd,
503                                            difference.getAddedStart(), difference.getAddedEnd());
504    
505                                    targetChanged = true;
506                            }
507                            else {
508    
509                                    // Chars were both deleted and added.
510    
511                                    _highlightChars(
512                                            sourceList, deletedMarkerStart, deletedMarkerEnd,
513                                            difference.getDeletedStart(), difference.getDeletedEnd());
514    
515                                    sourceChanged = true;
516    
517                                    _highlightChars(
518                                            targetList, addedMarkerStart, addedMarkerEnd,
519                                            difference.getAddedStart(), difference.getAddedEnd());
520    
521                                    targetChanged = true;
522                            }
523                    }
524    
525                    if (sourceChanged) {
526                            DiffResult sourceResult = new DiffResult(
527                                    sourceChangedLine, _toString(sourceList));
528    
529                            sourceResults.add(sourceResult);
530    
531                            if (!targetChanged) {
532                                    DiffResult targetResult = new DiffResult(
533                                            targetChangedLine, target);
534    
535                                    targetResults.add(targetResult);
536                            }
537                    }
538    
539                    if (targetChanged) {
540                            if (!sourceChanged) {
541                                    DiffResult sourceResult = new DiffResult(
542                                            sourceChangedLine, source);
543    
544                                    sourceResults.add(sourceResult);
545                            }
546    
547                            DiffResult targetResult = new DiffResult(
548                                    targetChangedLine, _toString(targetList));
549    
550                            targetResults.add(targetResult);
551                    }
552    
553                    return true;
554            }
555    
556            private static List<String> _toList(String line) {
557                    String[] lineParts = line.split(StringPool.BLANK);
558    
559                    List<String> result = new ArrayList<String>();
560    
561                    for (int i = 1; i < lineParts.length; i++) {
562                            result.add(lineParts[i]);
563                    }
564    
565                    return result;
566            }
567    
568            private static String _toString(List<String> line) {
569                    if (line.isEmpty()) {
570                            return StringPool.BLANK;
571                    }
572    
573                    StringBundler sb = new StringBundler(line.size());
574    
575                    for (String linePart : line) {
576                            sb.append(linePart);
577                    }
578    
579                    return sb.toString();
580            }
581    
582            private static final int _DIFF_MAX_LINE_LENGTH = 5000;
583    
584    }