Differ.php 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. <?php
  2. /*
  3. * This file is part of the Diff package.
  4. *
  5. * (c) Sebastian Bergmann <sebastian@phpunit.de>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace SebastianBergmann\Diff;
  11. use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
  12. use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
  13. use SebastianBergmann\Diff\LCS\MemoryEfficientImplementation;
  14. /**
  15. * Diff implementation.
  16. */
  17. class Differ
  18. {
  19. /**
  20. * @var string
  21. */
  22. private $header;
  23. /**
  24. * @var bool
  25. */
  26. private $showNonDiffLines;
  27. /**
  28. * @param string $header
  29. */
  30. public function __construct($header = "--- Original\n+++ New\n", $showNonDiffLines = true)
  31. {
  32. $this->header = $header;
  33. $this->showNonDiffLines = $showNonDiffLines;
  34. }
  35. /**
  36. * Returns the diff between two arrays or strings as string.
  37. *
  38. * @param array|string $from
  39. * @param array|string $to
  40. * @param LongestCommonSubsequence $lcs
  41. *
  42. * @return string
  43. */
  44. public function diff($from, $to, LongestCommonSubsequence $lcs = null)
  45. {
  46. if (!is_array($from) && !is_string($from)) {
  47. $from = (string) $from;
  48. }
  49. if (!is_array($to) && !is_string($to)) {
  50. $to = (string) $to;
  51. }
  52. $buffer = $this->header;
  53. $diff = $this->diffToArray($from, $to, $lcs);
  54. $inOld = false;
  55. $i = 0;
  56. $old = array();
  57. foreach ($diff as $line) {
  58. if ($line[1] === 0 /* OLD */) {
  59. if ($inOld === false) {
  60. $inOld = $i;
  61. }
  62. } elseif ($inOld !== false) {
  63. if (($i - $inOld) > 5) {
  64. $old[$inOld] = $i - 1;
  65. }
  66. $inOld = false;
  67. }
  68. ++$i;
  69. }
  70. $start = isset($old[0]) ? $old[0] : 0;
  71. $end = count($diff);
  72. if ($tmp = array_search($end, $old)) {
  73. $end = $tmp;
  74. }
  75. $newChunk = true;
  76. for ($i = $start; $i < $end; $i++) {
  77. if (isset($old[$i])) {
  78. $buffer .= "\n";
  79. $newChunk = true;
  80. $i = $old[$i];
  81. }
  82. if ($newChunk) {
  83. if ($this->showNonDiffLines === true) {
  84. $buffer .= "@@ @@\n";
  85. }
  86. $newChunk = false;
  87. }
  88. if ($diff[$i][1] === 1 /* ADDED */) {
  89. $buffer .= '+' . $diff[$i][0] . "\n";
  90. } elseif ($diff[$i][1] === 2 /* REMOVED */) {
  91. $buffer .= '-' . $diff[$i][0] . "\n";
  92. } elseif ($this->showNonDiffLines === true) {
  93. $buffer .= ' ' . $diff[$i][0] . "\n";
  94. }
  95. }
  96. return $buffer;
  97. }
  98. /**
  99. * Returns the diff between two arrays or strings as array.
  100. *
  101. * Each array element contains two elements:
  102. * - [0] => string $token
  103. * - [1] => 2|1|0
  104. *
  105. * - 2: REMOVED: $token was removed from $from
  106. * - 1: ADDED: $token was added to $from
  107. * - 0: OLD: $token is not changed in $to
  108. *
  109. * @param array|string $from
  110. * @param array|string $to
  111. * @param LongestCommonSubsequence $lcs
  112. *
  113. * @return array
  114. */
  115. public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
  116. {
  117. preg_match_all('(\r\n|\r|\n)', $from, $fromMatches);
  118. preg_match_all('(\r\n|\r|\n)', $to, $toMatches);
  119. if (is_string($from)) {
  120. $from = preg_split('(\r\n|\r|\n)', $from);
  121. }
  122. if (is_string($to)) {
  123. $to = preg_split('(\r\n|\r|\n)', $to);
  124. }
  125. $start = array();
  126. $end = array();
  127. $fromLength = count($from);
  128. $toLength = count($to);
  129. $length = min($fromLength, $toLength);
  130. for ($i = 0; $i < $length; ++$i) {
  131. if ($from[$i] === $to[$i]) {
  132. $start[] = $from[$i];
  133. unset($from[$i], $to[$i]);
  134. } else {
  135. break;
  136. }
  137. }
  138. $length -= $i;
  139. for ($i = 1; $i < $length; ++$i) {
  140. if ($from[$fromLength - $i] === $to[$toLength - $i]) {
  141. array_unshift($end, $from[$fromLength - $i]);
  142. unset($from[$fromLength - $i], $to[$toLength - $i]);
  143. } else {
  144. break;
  145. }
  146. }
  147. if ($lcs === null) {
  148. $lcs = $this->selectLcsImplementation($from, $to);
  149. }
  150. $common = $lcs->calculate(array_values($from), array_values($to));
  151. $diff = array();
  152. if (isset($fromMatches[0]) && $toMatches[0] &&
  153. count($fromMatches[0]) === count($toMatches[0]) &&
  154. $fromMatches[0] !== $toMatches[0]) {
  155. $diff[] = array(
  156. '#Warning: Strings contain different line endings!', 0
  157. );
  158. }
  159. foreach ($start as $token) {
  160. $diff[] = array($token, 0 /* OLD */);
  161. }
  162. reset($from);
  163. reset($to);
  164. foreach ($common as $token) {
  165. while ((($fromToken = reset($from)) !== $token)) {
  166. $diff[] = array(array_shift($from), 2 /* REMOVED */);
  167. }
  168. while ((($toToken = reset($to)) !== $token)) {
  169. $diff[] = array(array_shift($to), 1 /* ADDED */);
  170. }
  171. $diff[] = array($token, 0 /* OLD */);
  172. array_shift($from);
  173. array_shift($to);
  174. }
  175. while (($token = array_shift($from)) !== null) {
  176. $diff[] = array($token, 2 /* REMOVED */);
  177. }
  178. while (($token = array_shift($to)) !== null) {
  179. $diff[] = array($token, 1 /* ADDED */);
  180. }
  181. foreach ($end as $token) {
  182. $diff[] = array($token, 0 /* OLD */);
  183. }
  184. return $diff;
  185. }
  186. /**
  187. * @param array $from
  188. * @param array $to
  189. *
  190. * @return LongestCommonSubsequence
  191. */
  192. private function selectLcsImplementation(array $from, array $to)
  193. {
  194. // We do not want to use the time-efficient implementation if its memory
  195. // footprint will probably exceed this value. Note that the footprint
  196. // calculation is only an estimation for the matrix and the LCS method
  197. // will typically allocate a bit more memory than this.
  198. $memoryLimit = 100 * 1024 * 1024;
  199. if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
  200. return new MemoryEfficientImplementation;
  201. }
  202. return new TimeEfficientImplementation;
  203. }
  204. /**
  205. * Calculates the estimated memory footprint for the DP-based method.
  206. *
  207. * @param array $from
  208. * @param array $to
  209. *
  210. * @return int
  211. */
  212. private function calculateEstimatedFootprint(array $from, array $to)
  213. {
  214. $itemSize = PHP_INT_SIZE == 4 ? 76 : 144;
  215. return $itemSize * pow(min(count($from), count($to)), 2);
  216. }
  217. }