Merge branch '3.0'
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / util / Diff.class.php
CommitLineData
bef5fb06
TD
1<?php
2namespace wcf\util;
3
4/**
5 * Diff calculates the longest common subsequence of two given
6 * arrays and is able to generate the differences (added / removed items)
7 * between both arrays as well.
8 *
9 * @author Tim Duesterhus
c839bd49 10 * @copyright 2001-2018 WoltLab GmbH
bef5fb06 11 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
e71525e4 12 * @package WoltLabSuite\Core\Util
bef5fb06
TD
13 */
14class Diff {
15 /**
16 * identifier for added lines
17 * @var string
18 */
19 const ADDED = '+';
20
21 /**
22 * identifier for removed lines
23 * @var string
24 */
25 const REMOVED = '-';
26
27 /**
b2aa772d 28 * identifier for unchanged lines
bef5fb06
TD
29 * @var string
30 */
31 const SAME = ' ';
32
33 /**
34 * original array, as given by the user
35 * @var array
36 */
058cbd6a 37 protected $a = [];
bef5fb06
TD
38
39 /**
40 * modified array, as given by the user
41 * @var array
42 */
058cbd6a 43 protected $b = [];
bef5fb06
TD
44
45 /**
46 * size of a
47 * @var integer
48 */
49 protected $sizeA = 0;
50
51 /**
52 * size of b
53 * @var integer
54 */
55 protected $sizeB = 0;
56
57 /**
58 * calculated diff
59 * @var array
60 */
61 protected $d = null;
62
cbca4c6e
MS
63 /**
64 * Creates a new instance of Diff.
65 *
66 * @param string[] $a original lines of text
67 * @param string[] $b modified lines of text
68 */
bef5fb06
TD
69 public function __construct(array $a, array $b) {
70 $this->a = $a;
71 $this->b = $b;
72
7529b280
TD
73 $this->sizeA = count($this->a);
74 $this->sizeB = count($this->b);
bef5fb06
TD
75 }
76
77 /**
78 * Calculates the longest common subsequence of `$this->a`
79 * and `$this->b` and returns it as an SplFixedArray.
80 *
81 * @return \SplFixedArray Array of all the items in the longest common subsequence.
82 */
83 public function getLCS() {
84 // skip all items at the beginning and the end that are the same
85 // this reduces the size of the table and improves performance
86 $offsetStart = $offsetEnd = 0;
87 while ($offsetStart < $this->sizeA && $offsetStart < $this->sizeB && $this->a[$offsetStart] === $this->b[$offsetStart]) {
88 $offsetStart++;
89 }
5b2b95ff 90 while ($offsetEnd < ($this->sizeA - $offsetStart) && $offsetEnd < ($this->sizeB - $offsetStart) && $this->a[$this->sizeA - 1 - $offsetEnd] === $this->b[$this->sizeB - 1 - $offsetEnd]) {
bef5fb06
TD
91 $offsetEnd++;
92 }
93
7529b280
TD
94 // B starts with A
95 if ($offsetStart === $this->sizeA) {
96 return \SplFixedArray::fromArray($this->a);
97 }
98 // A starts with B
99 if ($offsetStart === $this->sizeB) {
100 return \SplFixedArray::fromArray($this->b);
101 }
102 // A ends with B
103 if ($offsetEnd === $this->sizeB) {
104 return \SplFixedArray::fromArray($this->b);
105 }
106 // B ends with A
107 if ($offsetEnd === $this->sizeA) {
bef5fb06
TD
108 return \SplFixedArray::fromArray($this->a);
109 }
110
111 // allocate table that keeps track of the subsequence lengths
112 // add 1 to fit the line of zeroes at the top and at the left
113 $tableHeight = $this->sizeA + 1 - $offsetStart - $offsetEnd;
114 $tableWidth = $this->sizeB + 1 - $offsetStart - $offsetEnd;
5b2b95ff 115
bef5fb06
TD
116 $table = new \SplFixedArray($tableHeight);
117 for ($i = 0; $i < $tableHeight; $i++) {
118 $table[$i] = new \SplFixedArray($tableWidth);
119 }
120
121 // begin calculating the length of the LCS
122 for ($y = 0; $y < $tableHeight; $y++) {
123 for ($x = 0; $x < $tableWidth; $x++) {
124 // the first row and first column are simply zero
125 if ($y === 0 || $x === 0) {
126 $table[$y][$x] = 0;
127 continue;
128 }
129
130 $valueA = $this->a[$y - 1 + $offsetStart];
131 $valueB = $this->b[$x - 1 + $offsetStart];
132
133 if ($valueA === $valueB) {
134 // both items match, the subsequence becomes longer
135 $table[$y][$x] = $table[$y - 1][$x - 1] + 1;
136 }
137 else {
138 // otherwise the length is the greater length of the entry above and the entry left
139 $table[$y][$x] = max($table[$y][$x - 1], $table[$y - 1][$x]);
140 }
141 }
142 }
143
144 $x = $this->sizeB - $offsetStart - $offsetEnd;
145 $y = $this->sizeA - $offsetStart - $offsetEnd;
146 $lcsLength = $table[$y][$x];
bef5fb06
TD
147
148 // allocate array of the length of the LCS
5b2b95ff 149 $lcs = new \SplFixedArray($lcsLength + $offsetStart + $offsetEnd);
bef5fb06
TD
150
151 // until no more items are left in the LCS
5b2b95ff 152 $i = 0;
bef5fb06
TD
153 while ($table[$y][$x] !== 0) {
154 // go to the very left of the current length
155 if ($table[$y][$x - 1] === $table[$y][$x]) {
156 $x--;
157 continue;
158 }
159
160 // go to the very top of the current length
161 if ($table[$y - 1][$x] === $table[$y][$x]) {
162 $y--;
163 continue;
164 }
165
166 // add the item that incremented the length to the LCS
167 // we save the items in reverse order as we traverse the table from the back
168 $lcs[$lcsLength + $offsetStart - (++$i)] = $this->a[$y - 1 + $offsetStart];
169
170 // and go diagonally to the upper left entry
171 $x--;
172 $y--;
173 }
174
ceabe368
TD
175 for ($i = 0; $i < $offsetStart; $i++) {
176 $lcs[$i] = $this->a[$i];
177 }
178 for ($i = 0; $i < $offsetEnd; $i++) {
179 $lcs[$lcsLength + $offsetStart + $i] = $this->a[$this->sizeA - 1 - ($offsetEnd - 1 - $i)];
180 }
bef5fb06
TD
181
182 return $lcs;
183 }
184
185 /**
186 * Builds the diff out of the longest common subsequence of `$this->a`
187 * and `$this->b` and saves it in `$this->d`.
188 */
189 protected function calculateDiff() {
190 if ($this->d !== null) return;
191 $lcs = $this->getLCS();
192
058cbd6a 193 $this->d = [];
bef5fb06
TD
194 $positionA = 0;
195 $positionB = 0;
196 foreach ($lcs as $item) {
197 // find next matching item in a, every item in between must be removed
198 while ($positionA < $this->sizeA && $this->a[$positionA] !== $item) {
058cbd6a 199 $this->d[] = [self::REMOVED, $this->a[$positionA++]];
bef5fb06
TD
200 }
201
948b2593 202 // find next matching item in b, every item in between must be added
bef5fb06 203 while ($positionB < $this->sizeB && $this->b[$positionB] !== $item) {
058cbd6a 204 $this->d[] = [self::ADDED, $this->b[$positionB++]];
bef5fb06
TD
205 }
206
207 // we are back in our longest common subsequence
058cbd6a 208 $this->d[] = [self::SAME, $item];
bef5fb06
TD
209 $positionA++;
210 $positionB++;
211 }
212
213 // append remaining items of `a` and `b`
214 while ($positionA < $this->sizeA) {
058cbd6a 215 $this->d[] = [self::REMOVED, $this->a[$positionA++]];
bef5fb06
TD
216 }
217 while ($positionB < $this->sizeB) {
058cbd6a 218 $this->d[] = [self::ADDED, $this->b[$positionB++]];
bef5fb06
TD
219 }
220 }
221
222 /**
223 * Returns the raw difference array.
224 *
225 * @return array
226 */
227 public function getRawDiff() {
228 $this->calculateDiff();
229
230 return $this->d;
231 }
232
233 /**
234 * Returns a string like the one generated by unix diff.
235 *
6f37a5f5 236 * @param integer $context
bef5fb06
TD
237 * @return string
238 */
239 public function getUnixDiff($context = 2) {
240 $d = $this->getRawDiff();
241
058cbd6a 242 $result = [];
bef5fb06
TD
243 $result[] = "--- a";
244 $result[] = "+++ b";
245
bef5fb06
TD
246 $leftStart = 1;
247 $rightStart = 1;
248 for ($i = 0, $max = count($d); $i < $max; $i++) {
13b7bb1f 249 list($type, ) = $d[$i];
bef5fb06
TD
250
251 if ($type == self::REMOVED || $type == self::ADDED) {
252 // calculate start of context
253 $start = max($i - $context, 0);
254
255 // calculate start in left array
256 $leftStart -= $i - $start;
257 // ... and in right array
258 $rightStart -= $i - $start;
259
260 // set current context size
261 $inContext = $context;
262
263 // search the end of the current window
264 $plus = $minus = 0;
265 for ($j = $start; $j < $max; $j++) {
13b7bb1f 266 list($type, ) = $d[$j];
bef5fb06
TD
267
268 switch ($type) {
269 case self::REMOVED:
270 // reset context size
271 $inContext = $context;
272 $minus++;
273 break;
274 case self::ADDED:
275 // reset context size
276 $inContext = $context;
277 $plus++;
278 break;
279 case self::SAME:
280 if ($inContext) {
281 // decrement remaining context
282 $inContext--;
283 }
284 else {
285 // context is zero, but this isn't an addition or removal
286 // check whether the next context would overlap
287 for ($k = $j; $k < $max && $k <= $j + $context; $k++) {
288 if ($d[$k][0] != self::SAME) {
289 $inContext = $k - $j;
290 continue 2;
291 }
292 }
293 break 2;
294 }
295 break;
296 }
297 }
298
299 // calculate marker
63b9817b 300 $result[] = '@@ -'. $leftStart .(($j - $plus - $start) > 1 ? ','.($j - $plus - $start) : '').' +'. $rightStart .(($j - $minus - $start) > 1 ? ','.($j - $minus - $start) : '').' @@';
bef5fb06
TD
301
302 // append lines
303 foreach (array_slice($d, $start, $j - $start) as $item) $result[] = implode('', $item);
304
305 // shift the offset by the shown lines
306 $i = $j;
307 $leftStart += $j - $start - $plus;
308 $rightStart += $j - $start - $minus;
309 }
310
311 // line is skipped
312 $leftStart++;
313 $rightStart++;
314 }
315
316 return implode("\n", $result);
317 }
318
319 /**
320 * @see Diff::getUnixDiff()
321 */
322 public function __toString() {
323 return $this->getUnixDiff();
324 }
325}