Commit | Line | Data |
---|---|---|
bef5fb06 TD |
1 | <?php |
2 | namespace wcf\util; | |
3 | ||
4 | /** | |
5 | * Diff calculates the longest common subsequence of two given | |
6 | * arrays and is able to generate the differences (added / removed items) | |
7 | * between both arrays as well. | |
8 | * | |
9 | * @author Tim Duesterhus | |
c839bd49 | 10 | * @copyright 2001-2018 WoltLab GmbH |
bef5fb06 | 11 | * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php> |
e71525e4 | 12 | * @package WoltLabSuite\Core\Util |
bef5fb06 TD |
13 | */ |
14 | class Diff { | |
15 | /** | |
16 | * identifier for added lines | |
17 | * @var string | |
18 | */ | |
19 | const ADDED = '+'; | |
20 | ||
21 | /** | |
22 | * identifier for removed lines | |
23 | * @var string | |
24 | */ | |
25 | const REMOVED = '-'; | |
26 | ||
27 | /** | |
b2aa772d | 28 | * identifier for unchanged lines |
bef5fb06 TD |
29 | * @var string |
30 | */ | |
31 | const SAME = ' '; | |
32 | ||
33 | /** | |
34 | * original array, as given by the user | |
35 | * @var array | |
36 | */ | |
058cbd6a | 37 | protected $a = []; |
bef5fb06 TD |
38 | |
39 | /** | |
40 | * modified array, as given by the user | |
41 | * @var array | |
42 | */ | |
058cbd6a | 43 | protected $b = []; |
bef5fb06 TD |
44 | |
45 | /** | |
46 | * size of a | |
47 | * @var integer | |
48 | */ | |
49 | protected $sizeA = 0; | |
50 | ||
51 | /** | |
52 | * size of b | |
53 | * @var integer | |
54 | */ | |
55 | protected $sizeB = 0; | |
56 | ||
57 | /** | |
58 | * calculated diff | |
59 | * @var array | |
60 | */ | |
61 | protected $d = null; | |
62 | ||
cbca4c6e MS |
63 | /** |
64 | * Creates a new instance of Diff. | |
65 | * | |
66 | * @param string[] $a original lines of text | |
67 | * @param string[] $b modified lines of text | |
68 | */ | |
bef5fb06 TD |
69 | public function __construct(array $a, array $b) { |
70 | $this->a = $a; | |
71 | $this->b = $b; | |
72 | ||
7529b280 TD |
73 | $this->sizeA = count($this->a); |
74 | $this->sizeB = count($this->b); | |
bef5fb06 TD |
75 | } |
76 | ||
77 | /** | |
78 | * Calculates the longest common subsequence of `$this->a` | |
79 | * and `$this->b` and returns it as an SplFixedArray. | |
80 | * | |
81 | * @return \SplFixedArray Array of all the items in the longest common subsequence. | |
82 | */ | |
83 | public function getLCS() { | |
84 | // skip all items at the beginning and the end that are the same | |
85 | // this reduces the size of the table and improves performance | |
86 | $offsetStart = $offsetEnd = 0; | |
87 | while ($offsetStart < $this->sizeA && $offsetStart < $this->sizeB && $this->a[$offsetStart] === $this->b[$offsetStart]) { | |
88 | $offsetStart++; | |
89 | } | |
5b2b95ff | 90 | while ($offsetEnd < ($this->sizeA - $offsetStart) && $offsetEnd < ($this->sizeB - $offsetStart) && $this->a[$this->sizeA - 1 - $offsetEnd] === $this->b[$this->sizeB - 1 - $offsetEnd]) { |
bef5fb06 TD |
91 | $offsetEnd++; |
92 | } | |
93 | ||
7529b280 TD |
94 | // B starts with A |
95 | if ($offsetStart === $this->sizeA) { | |
96 | return \SplFixedArray::fromArray($this->a); | |
97 | } | |
98 | // A starts with B | |
99 | if ($offsetStart === $this->sizeB) { | |
100 | return \SplFixedArray::fromArray($this->b); | |
101 | } | |
102 | // A ends with B | |
103 | if ($offsetEnd === $this->sizeB) { | |
104 | return \SplFixedArray::fromArray($this->b); | |
105 | } | |
106 | // B ends with A | |
107 | if ($offsetEnd === $this->sizeA) { | |
bef5fb06 TD |
108 | return \SplFixedArray::fromArray($this->a); |
109 | } | |
110 | ||
111 | // allocate table that keeps track of the subsequence lengths | |
112 | // add 1 to fit the line of zeroes at the top and at the left | |
113 | $tableHeight = $this->sizeA + 1 - $offsetStart - $offsetEnd; | |
114 | $tableWidth = $this->sizeB + 1 - $offsetStart - $offsetEnd; | |
5b2b95ff | 115 | |
bef5fb06 TD |
116 | $table = new \SplFixedArray($tableHeight); |
117 | for ($i = 0; $i < $tableHeight; $i++) { | |
118 | $table[$i] = new \SplFixedArray($tableWidth); | |
119 | } | |
120 | ||
121 | // begin calculating the length of the LCS | |
122 | for ($y = 0; $y < $tableHeight; $y++) { | |
123 | for ($x = 0; $x < $tableWidth; $x++) { | |
124 | // the first row and first column are simply zero | |
125 | if ($y === 0 || $x === 0) { | |
126 | $table[$y][$x] = 0; | |
127 | continue; | |
128 | } | |
129 | ||
130 | $valueA = $this->a[$y - 1 + $offsetStart]; | |
131 | $valueB = $this->b[$x - 1 + $offsetStart]; | |
132 | ||
133 | if ($valueA === $valueB) { | |
134 | // both items match, the subsequence becomes longer | |
135 | $table[$y][$x] = $table[$y - 1][$x - 1] + 1; | |
136 | } | |
137 | else { | |
138 | // otherwise the length is the greater length of the entry above and the entry left | |
139 | $table[$y][$x] = max($table[$y][$x - 1], $table[$y - 1][$x]); | |
140 | } | |
141 | } | |
142 | } | |
143 | ||
144 | $x = $this->sizeB - $offsetStart - $offsetEnd; | |
145 | $y = $this->sizeA - $offsetStart - $offsetEnd; | |
146 | $lcsLength = $table[$y][$x]; | |
bef5fb06 TD |
147 | |
148 | // allocate array of the length of the LCS | |
5b2b95ff | 149 | $lcs = new \SplFixedArray($lcsLength + $offsetStart + $offsetEnd); |
bef5fb06 TD |
150 | |
151 | // until no more items are left in the LCS | |
5b2b95ff | 152 | $i = 0; |
bef5fb06 TD |
153 | while ($table[$y][$x] !== 0) { |
154 | // go to the very left of the current length | |
155 | if ($table[$y][$x - 1] === $table[$y][$x]) { | |
156 | $x--; | |
157 | continue; | |
158 | } | |
159 | ||
160 | // go to the very top of the current length | |
161 | if ($table[$y - 1][$x] === $table[$y][$x]) { | |
162 | $y--; | |
163 | continue; | |
164 | } | |
165 | ||
166 | // add the item that incremented the length to the LCS | |
167 | // we save the items in reverse order as we traverse the table from the back | |
168 | $lcs[$lcsLength + $offsetStart - (++$i)] = $this->a[$y - 1 + $offsetStart]; | |
169 | ||
170 | // and go diagonally to the upper left entry | |
171 | $x--; | |
172 | $y--; | |
173 | } | |
174 | ||
ceabe368 TD |
175 | for ($i = 0; $i < $offsetStart; $i++) { |
176 | $lcs[$i] = $this->a[$i]; | |
177 | } | |
178 | for ($i = 0; $i < $offsetEnd; $i++) { | |
179 | $lcs[$lcsLength + $offsetStart + $i] = $this->a[$this->sizeA - 1 - ($offsetEnd - 1 - $i)]; | |
180 | } | |
bef5fb06 TD |
181 | |
182 | return $lcs; | |
183 | } | |
184 | ||
185 | /** | |
186 | * Builds the diff out of the longest common subsequence of `$this->a` | |
187 | * and `$this->b` and saves it in `$this->d`. | |
188 | */ | |
189 | protected function calculateDiff() { | |
190 | if ($this->d !== null) return; | |
191 | $lcs = $this->getLCS(); | |
192 | ||
058cbd6a | 193 | $this->d = []; |
bef5fb06 TD |
194 | $positionA = 0; |
195 | $positionB = 0; | |
196 | foreach ($lcs as $item) { | |
197 | // find next matching item in a, every item in between must be removed | |
198 | while ($positionA < $this->sizeA && $this->a[$positionA] !== $item) { | |
058cbd6a | 199 | $this->d[] = [self::REMOVED, $this->a[$positionA++]]; |
bef5fb06 TD |
200 | } |
201 | ||
948b2593 | 202 | // find next matching item in b, every item in between must be added |
bef5fb06 | 203 | while ($positionB < $this->sizeB && $this->b[$positionB] !== $item) { |
058cbd6a | 204 | $this->d[] = [self::ADDED, $this->b[$positionB++]]; |
bef5fb06 TD |
205 | } |
206 | ||
207 | // we are back in our longest common subsequence | |
058cbd6a | 208 | $this->d[] = [self::SAME, $item]; |
bef5fb06 TD |
209 | $positionA++; |
210 | $positionB++; | |
211 | } | |
212 | ||
213 | // append remaining items of `a` and `b` | |
214 | while ($positionA < $this->sizeA) { | |
058cbd6a | 215 | $this->d[] = [self::REMOVED, $this->a[$positionA++]]; |
bef5fb06 TD |
216 | } |
217 | while ($positionB < $this->sizeB) { | |
058cbd6a | 218 | $this->d[] = [self::ADDED, $this->b[$positionB++]]; |
bef5fb06 TD |
219 | } |
220 | } | |
221 | ||
222 | /** | |
223 | * Returns the raw difference array. | |
224 | * | |
225 | * @return array | |
226 | */ | |
227 | public function getRawDiff() { | |
228 | $this->calculateDiff(); | |
229 | ||
230 | return $this->d; | |
231 | } | |
232 | ||
233 | /** | |
234 | * Returns a string like the one generated by unix diff. | |
235 | * | |
6f37a5f5 | 236 | * @param integer $context |
bef5fb06 TD |
237 | * @return string |
238 | */ | |
239 | public function getUnixDiff($context = 2) { | |
240 | $d = $this->getRawDiff(); | |
241 | ||
058cbd6a | 242 | $result = []; |
bef5fb06 TD |
243 | $result[] = "--- a"; |
244 | $result[] = "+++ b"; | |
245 | ||
bef5fb06 TD |
246 | $leftStart = 1; |
247 | $rightStart = 1; | |
248 | for ($i = 0, $max = count($d); $i < $max; $i++) { | |
13b7bb1f | 249 | list($type, ) = $d[$i]; |
bef5fb06 TD |
250 | |
251 | if ($type == self::REMOVED || $type == self::ADDED) { | |
252 | // calculate start of context | |
253 | $start = max($i - $context, 0); | |
254 | ||
255 | // calculate start in left array | |
256 | $leftStart -= $i - $start; | |
257 | // ... and in right array | |
258 | $rightStart -= $i - $start; | |
259 | ||
260 | // set current context size | |
261 | $inContext = $context; | |
262 | ||
263 | // search the end of the current window | |
264 | $plus = $minus = 0; | |
265 | for ($j = $start; $j < $max; $j++) { | |
13b7bb1f | 266 | list($type, ) = $d[$j]; |
bef5fb06 TD |
267 | |
268 | switch ($type) { | |
269 | case self::REMOVED: | |
270 | // reset context size | |
271 | $inContext = $context; | |
272 | $minus++; | |
273 | break; | |
274 | case self::ADDED: | |
275 | // reset context size | |
276 | $inContext = $context; | |
277 | $plus++; | |
278 | break; | |
279 | case self::SAME: | |
280 | if ($inContext) { | |
281 | // decrement remaining context | |
282 | $inContext--; | |
283 | } | |
284 | else { | |
285 | // context is zero, but this isn't an addition or removal | |
286 | // check whether the next context would overlap | |
287 | for ($k = $j; $k < $max && $k <= $j + $context; $k++) { | |
288 | if ($d[$k][0] != self::SAME) { | |
289 | $inContext = $k - $j; | |
290 | continue 2; | |
291 | } | |
292 | } | |
293 | break 2; | |
294 | } | |
295 | break; | |
296 | } | |
297 | } | |
298 | ||
299 | // calculate marker | |
63b9817b | 300 | $result[] = '@@ -'. $leftStart .(($j - $plus - $start) > 1 ? ','.($j - $plus - $start) : '').' +'. $rightStart .(($j - $minus - $start) > 1 ? ','.($j - $minus - $start) : '').' @@'; |
bef5fb06 TD |
301 | |
302 | // append lines | |
303 | foreach (array_slice($d, $start, $j - $start) as $item) $result[] = implode('', $item); | |
304 | ||
305 | // shift the offset by the shown lines | |
306 | $i = $j; | |
307 | $leftStart += $j - $start - $plus; | |
308 | $rightStart += $j - $start - $minus; | |
309 | } | |
310 | ||
311 | // line is skipped | |
312 | $leftStart++; | |
313 | $rightStart++; | |
314 | } | |
315 | ||
316 | return implode("\n", $result); | |
317 | } | |
318 | ||
319 | /** | |
320 | * @see Diff::getUnixDiff() | |
321 | */ | |
322 | public function __toString() { | |
323 | return $this->getUnixDiff(); | |
324 | } | |
325 | } |