[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * 4 * @package diff 5 * @version $Id$ 6 * @copyright (c) 2006 phpBB Group 7 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 8 * 9 */ 10 11 /** 12 * @ignore 13 */ 14 if (!defined('IN_PHPBB')) 15 { 16 exit; 17 } 18 19 /** 20 * Code from pear.php.net, Text_Diff-1.1.0 package 21 * http://pear.php.net/package/Text_Diff/ (native engine) 22 * 23 * Modified by phpBB Group to meet our coding standards 24 * and being able to integrate into phpBB 25 * 26 * Class used internally by Text_Diff to actually compute the diffs. This 27 * class is implemented using native PHP code. 28 * 29 * The algorithm used here is mostly lifted from the perl module 30 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: 31 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip 32 * 33 * More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html 34 * 35 * Some ideas (and a bit of code) are taken from analyze.c, of GNU 36 * diffutils-2.7, which can be found at: 37 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz 38 * 39 * Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from 40 * Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this 41 * code was written by him, and is used/adapted with his permission. 42 * 43 * Copyright 2004-2008 The Horde Project (http://www.horde.org/) 44 * 45 * @author Geoffrey T. Dairiki <dairiki@dairiki.org> 46 * @package diff 47 * 48 * @access private 49 */ 50 class diff_engine 51 { 52 /** 53 * If set to true we trim all lines before we compare them. This ensures that sole space/tab changes do not trigger diffs. 54 */ 55 var $skip_whitespace_changes = true; 56 57 function diff(&$from_lines, &$to_lines, $preserve_cr = true) 58 { 59 // Remove empty lines... 60 // If preserve_cr is true, we basically only change \r\n and bare \r to \n to get the same carriage returns for both files 61 // If it is false, we try to only use \n once per line and ommit all empty lines to be able to get a proper data diff 62 63 if (is_array($from_lines)) 64 { 65 $from_lines = implode("\n", $from_lines); 66 } 67 68 if (is_array($to_lines)) 69 { 70 $to_lines = implode("\n", $to_lines); 71 } 72 73 if ($preserve_cr) 74 { 75 $from_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $from_lines))); 76 $to_lines = explode("\n", str_replace("\r", "\n", str_replace("\r\n", "\n", $to_lines))); 77 } 78 else 79 { 80 $from_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $from_lines)); 81 $to_lines = explode("\n", preg_replace('#[\n\r]+#', "\n", $to_lines)); 82 } 83 84 $n_from = sizeof($from_lines); 85 $n_to = sizeof($to_lines); 86 87 $this->xchanged = $this->ychanged = $this->xv = $this->yv = $this->xind = $this->yind = array(); 88 unset($this->seq, $this->in_seq, $this->lcs); 89 90 // Skip leading common lines. 91 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) 92 { 93 if (trim($from_lines[$skip]) !== trim($to_lines[$skip])) 94 { 95 break; 96 } 97 $this->xchanged[$skip] = $this->ychanged[$skip] = false; 98 } 99 100 // Skip trailing common lines. 101 $xi = $n_from; 102 $yi = $n_to; 103 104 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) 105 { 106 if (trim($from_lines[$xi]) !== trim($to_lines[$yi])) 107 { 108 break; 109 } 110 $this->xchanged[$xi] = $this->ychanged[$yi] = false; 111 } 112 113 // Ignore lines which do not exist in both files. 114 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) 115 { 116 if ($this->skip_whitespace_changes) $xhash[trim($from_lines[$xi])] = 1; else $xhash[$from_lines[$xi]] = 1; 117 } 118 119 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) 120 { 121 $line = ($this->skip_whitespace_changes) ? trim($to_lines[$yi]) : $to_lines[$yi]; 122 123 if (($this->ychanged[$yi] = empty($xhash[$line]))) 124 { 125 continue; 126 } 127 $yhash[$line] = 1; 128 $this->yv[] = $line; 129 $this->yind[] = $yi; 130 } 131 132 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) 133 { 134 $line = ($this->skip_whitespace_changes) ? trim($from_lines[$xi]) : $from_lines[$xi]; 135 136 if (($this->xchanged[$xi] = empty($yhash[$line]))) 137 { 138 continue; 139 } 140 $this->xv[] = $line; 141 $this->xind[] = $xi; 142 } 143 144 // Find the LCS. 145 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); 146 147 // Merge edits when possible. 148 if ($this->skip_whitespace_changes) 149 { 150 $from_lines_clean = array_map('trim', $from_lines); 151 $to_lines_clean = array_map('trim', $to_lines); 152 153 $this->_shift_boundaries($from_lines_clean, $this->xchanged, $this->ychanged); 154 $this->_shift_boundaries($to_lines_clean, $this->ychanged, $this->xchanged); 155 156 unset($from_lines_clean, $to_lines_clean); 157 } 158 else 159 { 160 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); 161 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); 162 } 163 164 // Compute the edit operations. 165 $edits = array(); 166 $xi = $yi = 0; 167 168 while ($xi < $n_from || $yi < $n_to) 169 { 170 // Skip matching "snake". 171 $copy = array(); 172 173 while ($xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) 174 { 175 $copy[] = $from_lines[$xi++]; 176 $yi++; 177 } 178 179 if ($copy) 180 { 181 $edits[] = new diff_op_copy($copy); 182 } 183 184 // Find deletes & adds. 185 $delete = array(); 186 while ($xi < $n_from && $this->xchanged[$xi]) 187 { 188 $delete[] = $from_lines[$xi++]; 189 } 190 191 $add = array(); 192 while ($yi < $n_to && $this->ychanged[$yi]) 193 { 194 $add[] = $to_lines[$yi++]; 195 } 196 197 if ($delete && $add) 198 { 199 $edits[] = new diff_op_change($delete, $add); 200 } 201 else if ($delete) 202 { 203 $edits[] = new diff_op_delete($delete); 204 } 205 else if ($add) 206 { 207 $edits[] = new diff_op_add($add); 208 } 209 } 210 211 return $edits; 212 } 213 214 /** 215 * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, 216 * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized segments. 217 * 218 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of 219 * NCHUNKS+1 (X, Y) indexes giving the diving points between sub 220 * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), 221 * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == 222 * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). 223 * 224 * This function assumes that the first lines of the specified portions of 225 * the two files do not match, and likewise that the last lines do not 226 * match. The caller must trim matching lines from the beginning and end 227 * of the portions it is going to specify. 228 */ 229 function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) 230 { 231 $flip = false; 232 233 if ($xlim - $xoff > $ylim - $yoff) 234 { 235 // Things seems faster (I'm not sure I understand why) when the shortest sequence is in X. 236 $flip = true; 237 list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim); 238 } 239 240 if ($flip) 241 { 242 for ($i = $ylim - 1; $i >= $yoff; $i--) 243 { 244 $ymatches[$this->xv[$i]][] = $i; 245 } 246 } 247 else 248 { 249 for ($i = $ylim - 1; $i >= $yoff; $i--) 250 { 251 $ymatches[$this->yv[$i]][] = $i; 252 } 253 } 254 255 $this->lcs = 0; 256 $this->seq[0]= $yoff - 1; 257 $this->in_seq = array(); 258 $ymids[0] = array(); 259 260 $numer = $xlim - $xoff + $nchunks - 1; 261 $x = $xoff; 262 263 for ($chunk = 0; $chunk < $nchunks; $chunk++) 264 { 265 if ($chunk > 0) 266 { 267 for ($i = 0; $i <= $this->lcs; $i++) 268 { 269 $ymids[$i][$chunk - 1] = $this->seq[$i]; 270 } 271 } 272 273 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks); 274 275 for (; $x < $x1; $x++) 276 { 277 $line = $flip ? $this->yv[$x] : $this->xv[$x]; 278 if (empty($ymatches[$line])) 279 { 280 continue; 281 } 282 $matches = $ymatches[$line]; 283 284 reset($matches); 285 while (list(, $y) = each($matches)) 286 { 287 if (empty($this->in_seq[$y])) 288 { 289 $k = $this->_lcs_pos($y); 290 $ymids[$k] = $ymids[$k - 1]; 291 break; 292 } 293 } 294 295 // no reset() here 296 while (list(, $y) = each($matches)) 297 { 298 if ($y > $this->seq[$k - 1]) 299 { 300 // Optimization: this is a common case: next match is just replacing previous match. 301 $this->in_seq[$this->seq[$k]] = false; 302 $this->seq[$k] = $y; 303 $this->in_seq[$y] = 1; 304 } 305 else if (empty($this->in_seq[$y])) 306 { 307 $k = $this->_lcs_pos($y); 308 $ymids[$k] = $ymids[$k - 1]; 309 } 310 } 311 } 312 } 313 314 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); 315 $ymid = $ymids[$this->lcs]; 316 317 for ($n = 0; $n < $nchunks - 1; $n++) 318 { 319 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); 320 $y1 = $ymid[$n] + 1; 321 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); 322 } 323 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); 324 325 return array($this->lcs, $seps); 326 } 327 328 function _lcs_pos($ypos) 329 { 330 $end = $this->lcs; 331 332 if ($end == 0 || $ypos > $this->seq[$end]) 333 { 334 $this->seq[++$this->lcs] = $ypos; 335 $this->in_seq[$ypos] = 1; 336 return $this->lcs; 337 } 338 339 $beg = 1; 340 while ($beg < $end) 341 { 342 $mid = (int)(($beg + $end) / 2); 343 if ($ypos > $this->seq[$mid]) 344 { 345 $beg = $mid + 1; 346 } 347 else 348 { 349 $end = $mid; 350 } 351 } 352 353 $this->in_seq[$this->seq[$end]] = false; 354 $this->seq[$end] = $ypos; 355 $this->in_seq[$ypos] = 1; 356 357 return $end; 358 } 359 360 /** 361 * Finds LCS of two sequences. 362 * 363 * The results are recorded in the vectors $this->{x,y}changed[], by 364 * storing a 1 in the element for each line that is an insertion or 365 * deletion (ie. is not in the LCS). 366 * 367 * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. 368 * 369 * Note that XLIM, YLIM are exclusive bounds. All line numbers are 370 * origin-0 and discarded lines are not counted. 371 */ 372 function _compareseq($xoff, $xlim, $yoff, $ylim) 373 { 374 // Slide down the bottom initial diagonal. 375 while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) 376 { 377 ++$xoff; 378 ++$yoff; 379 } 380 381 // Slide up the top initial diagonal. 382 while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) 383 { 384 --$xlim; 385 --$ylim; 386 } 387 388 if ($xoff == $xlim || $yoff == $ylim) 389 { 390 $lcs = 0; 391 } 392 else 393 { 394 // This is ad hoc but seems to work well. 395 // $nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); 396 // $nchunks = max(2,min(8,(int)$nchunks)); 397 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; 398 list($lcs, $seps) = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); 399 } 400 401 if ($lcs == 0) 402 { 403 // X and Y sequences have no common subsequence: mark all changed. 404 while ($yoff < $ylim) 405 { 406 $this->ychanged[$this->yind[$yoff++]] = 1; 407 } 408 409 while ($xoff < $xlim) 410 { 411 $this->xchanged[$this->xind[$xoff++]] = 1; 412 } 413 } 414 else 415 { 416 // Use the partitions to split this problem into subproblems. 417 reset($seps); 418 $pt1 = $seps[0]; 419 420 while ($pt2 = next($seps)) 421 { 422 $this->_compareseq($pt1[0], $pt2[0], $pt1[1], $pt2[1]); 423 $pt1 = $pt2; 424 } 425 } 426 } 427 428 /** 429 * Adjusts inserts/deletes of identical lines to join changes as much as possible. 430 * 431 * We do something when a run of changed lines include a line at one end 432 * and has an excluded, identical line at the other. We are free to 433 * choose which identical line is included. 'compareseq' usually chooses 434 * the one at the beginning, but usually it is cleaner to consider the 435 * following identical line to be the "change". 436 * 437 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). 438 */ 439 function _shift_boundaries($lines, &$changed, $other_changed) 440 { 441 $i = 0; 442 $j = 0; 443 444 $len = sizeof($lines); 445 $other_len = sizeof($other_changed); 446 447 while (1) 448 { 449 // Scan forward to find the beginning of another run of 450 // changes. Also keep track of the corresponding point in the other file. 451 // 452 // Throughout this code, $i and $j are adjusted together so that 453 // the first $i elements of $changed and the first $j elements of 454 // $other_changed both contain the same number of zeros (unchanged lines). 455 // 456 // Furthermore, $j is always kept so that $j == $other_len or $other_changed[$j] == false. 457 while ($j < $other_len && $other_changed[$j]) 458 { 459 $j++; 460 } 461 462 while ($i < $len && ! $changed[$i]) 463 { 464 $i++; 465 $j++; 466 467 while ($j < $other_len && $other_changed[$j]) 468 { 469 $j++; 470 } 471 } 472 473 if ($i == $len) 474 { 475 break; 476 } 477 478 $start = $i; 479 480 // Find the end of this run of changes. 481 while (++$i < $len && $changed[$i]) 482 { 483 continue; 484 } 485 486 do 487 { 488 // Record the length of this run of changes, so that we can later determine whether the run has grown. 489 $runlength = $i - $start; 490 491 // Move the changed region back, so long as the previous unchanged line matches the last changed one. 492 // This merges with previous changed regions. 493 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) 494 { 495 $changed[--$start] = 1; 496 $changed[--$i] = false; 497 498 while ($start > 0 && $changed[$start - 1]) 499 { 500 $start--; 501 } 502 503 while ($other_changed[--$j]) 504 { 505 continue; 506 } 507 } 508 509 // Set CORRESPONDING to the end of the changed run, at the last point where it corresponds to a changed run in the 510 // other file. CORRESPONDING == LEN means no such point has been found. 511 $corresponding = $j < $other_len ? $i : $len; 512 513 // Move the changed region forward, so long as the first changed line matches the following unchanged one. 514 // This merges with following changed regions. 515 // Do this second, so that if there are no merges, the changed region is moved forward as far as possible. 516 while ($i < $len && $lines[$start] == $lines[$i]) 517 { 518 $changed[$start++] = false; 519 $changed[$i++] = 1; 520 521 while ($i < $len && $changed[$i]) 522 { 523 $i++; 524 } 525 526 $j++; 527 if ($j < $other_len && $other_changed[$j]) 528 { 529 $corresponding = $i; 530 while ($j < $other_len && $other_changed[$j]) 531 { 532 $j++; 533 } 534 } 535 } 536 } 537 while ($runlength != $i - $start); 538 539 // If possible, move the fully-merged run of changes back to a corresponding run in the other file. 540 while ($corresponding < $i) 541 { 542 $changed[--$start] = 1; 543 $changed[--$i] = 0; 544 545 while ($other_changed[--$j]) 546 { 547 continue; 548 } 549 } 550 } 551 } 552 } 553 554 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Oct 2 15:03:47 2013 | Cross-referenced by PHPXref 0.7.1 |