Differ.php 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. <?php declare(strict_types=1);
  2. /*
  3. * This file is part of sebastian/diff.
  4. *
  5. * (c) Sebastian Bergmann <sebastian@phpunit.de>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace SebastianBergmann\Diff;
  11. use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
  12. use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
  13. /**
  14. * Diff implementation.
  15. */
  16. final class Differ
  17. {
  18. /**
  19. * @var DiffOutputBuilderInterface
  20. */
  21. private $outputBuilder;
  22. /**
  23. * @param DiffOutputBuilderInterface $outputBuilder
  24. *
  25. * @throws InvalidArgumentException
  26. */
  27. public function __construct($outputBuilder = null)
  28. {
  29. if ($outputBuilder instanceof DiffOutputBuilderInterface) {
  30. $this->outputBuilder = $outputBuilder;
  31. } elseif (null === $outputBuilder) {
  32. $this->outputBuilder = new UnifiedDiffOutputBuilder;
  33. } elseif (\is_string($outputBuilder)) {
  34. // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
  35. // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
  36. // @deprecated
  37. $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
  38. } else {
  39. throw new InvalidArgumentException(
  40. \sprintf(
  41. 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
  42. \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"'
  43. )
  44. );
  45. }
  46. }
  47. /**
  48. * Returns the diff between two arrays or strings as string.
  49. *
  50. * @param array|string $from
  51. * @param array|string $to
  52. * @param LongestCommonSubsequenceCalculator|null $lcs
  53. *
  54. * @return string
  55. */
  56. public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
  57. {
  58. $from = $this->validateDiffInput($from);
  59. $to = $this->validateDiffInput($to);
  60. $diff = $this->diffToArray($from, $to, $lcs);
  61. return $this->outputBuilder->getDiff($diff);
  62. }
  63. /**
  64. * Casts variable to string if it is not a string or array.
  65. *
  66. * @param mixed $input
  67. *
  68. * @return string
  69. */
  70. private function validateDiffInput($input): string
  71. {
  72. if (!\is_array($input) && !\is_string($input)) {
  73. return (string) $input;
  74. }
  75. return $input;
  76. }
  77. /**
  78. * Returns the diff between two arrays or strings as array.
  79. *
  80. * Each array element contains two elements:
  81. * - [0] => mixed $token
  82. * - [1] => 2|1|0
  83. *
  84. * - 2: REMOVED: $token was removed from $from
  85. * - 1: ADDED: $token was added to $from
  86. * - 0: OLD: $token is not changed in $to
  87. *
  88. * @param array|string $from
  89. * @param array|string $to
  90. * @param LongestCommonSubsequenceCalculator $lcs
  91. *
  92. * @return array
  93. */
  94. public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
  95. {
  96. if (\is_string($from)) {
  97. $from = $this->splitStringByLines($from);
  98. } elseif (!\is_array($from)) {
  99. throw new \InvalidArgumentException('"from" must be an array or string.');
  100. }
  101. if (\is_string($to)) {
  102. $to = $this->splitStringByLines($to);
  103. } elseif (!\is_array($to)) {
  104. throw new \InvalidArgumentException('"to" must be an array or string.');
  105. }
  106. list($from, $to, $start, $end) = self::getArrayDiffParted($from, $to);
  107. if ($lcs === null) {
  108. $lcs = $this->selectLcsImplementation($from, $to);
  109. }
  110. $common = $lcs->calculate(\array_values($from), \array_values($to));
  111. $diff = [];
  112. foreach ($start as $token) {
  113. $diff[] = [$token, 0 /* OLD */];
  114. }
  115. \reset($from);
  116. \reset($to);
  117. foreach ($common as $token) {
  118. while (($fromToken = \reset($from)) !== $token) {
  119. $diff[] = [\array_shift($from), 2 /* REMOVED */];
  120. }
  121. while (($toToken = \reset($to)) !== $token) {
  122. $diff[] = [\array_shift($to), 1 /* ADDED */];
  123. }
  124. $diff[] = [$token, 0 /* OLD */];
  125. \array_shift($from);
  126. \array_shift($to);
  127. }
  128. while (($token = \array_shift($from)) !== null) {
  129. $diff[] = [$token, 2 /* REMOVED */];
  130. }
  131. while (($token = \array_shift($to)) !== null) {
  132. $diff[] = [$token, 1 /* ADDED */];
  133. }
  134. foreach ($end as $token) {
  135. $diff[] = [$token, 0 /* OLD */];
  136. }
  137. if ($this->detectUnmatchedLineEndings($diff)) {
  138. \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", 3]);
  139. }
  140. return $diff;
  141. }
  142. /**
  143. * Checks if input is string, if so it will split it line-by-line.
  144. *
  145. * @param string $input
  146. *
  147. * @return array
  148. */
  149. private function splitStringByLines(string $input): array
  150. {
  151. return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  152. }
  153. /**
  154. * @param array $from
  155. * @param array $to
  156. *
  157. * @return LongestCommonSubsequenceCalculator
  158. */
  159. private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
  160. {
  161. // We do not want to use the time-efficient implementation if its memory
  162. // footprint will probably exceed this value. Note that the footprint
  163. // calculation is only an estimation for the matrix and the LCS method
  164. // will typically allocate a bit more memory than this.
  165. $memoryLimit = 100 * 1024 * 1024;
  166. if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
  167. return new MemoryEfficientLongestCommonSubsequenceCalculator;
  168. }
  169. return new TimeEfficientLongestCommonSubsequenceCalculator;
  170. }
  171. /**
  172. * Calculates the estimated memory footprint for the DP-based method.
  173. *
  174. * @param array $from
  175. * @param array $to
  176. *
  177. * @return int|float
  178. */
  179. private function calculateEstimatedFootprint(array $from, array $to)
  180. {
  181. $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
  182. return $itemSize * \min(\count($from), \count($to)) ** 2;
  183. }
  184. /**
  185. * Returns true if line ends don't match in a diff.
  186. *
  187. * @param array $diff
  188. *
  189. * @return bool
  190. */
  191. private function detectUnmatchedLineEndings(array $diff): bool
  192. {
  193. $newLineBreaks = ['' => true];
  194. $oldLineBreaks = ['' => true];
  195. foreach ($diff as $entry) {
  196. if (0 === $entry[1]) { /* OLD */
  197. $ln = $this->getLinebreak($entry[0]);
  198. $oldLineBreaks[$ln] = true;
  199. $newLineBreaks[$ln] = true;
  200. } elseif (1 === $entry[1]) { /* ADDED */
  201. $newLineBreaks[$this->getLinebreak($entry[0])] = true;
  202. } elseif (2 === $entry[1]) { /* REMOVED */
  203. $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
  204. }
  205. }
  206. // if either input or output is a single line without breaks than no warning should be raised
  207. if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
  208. return false;
  209. }
  210. // two way compare
  211. foreach ($newLineBreaks as $break => $set) {
  212. if (!isset($oldLineBreaks[$break])) {
  213. return true;
  214. }
  215. }
  216. foreach ($oldLineBreaks as $break => $set) {
  217. if (!isset($newLineBreaks[$break])) {
  218. return true;
  219. }
  220. }
  221. return false;
  222. }
  223. private function getLinebreak($line): string
  224. {
  225. if (!\is_string($line)) {
  226. return '';
  227. }
  228. $lc = \substr($line, -1);
  229. if ("\r" === $lc) {
  230. return "\r";
  231. }
  232. if ("\n" !== $lc) {
  233. return '';
  234. }
  235. if ("\r\n" === \substr($line, -2)) {
  236. return "\r\n";
  237. }
  238. return "\n";
  239. }
  240. private static function getArrayDiffParted(array &$from, array &$to): array
  241. {
  242. $start = [];
  243. $end = [];
  244. \reset($to);
  245. foreach ($from as $k => $v) {
  246. $toK = \key($to);
  247. if ($toK === $k && $v === $to[$k]) {
  248. $start[$k] = $v;
  249. unset($from[$k], $to[$k]);
  250. } else {
  251. break;
  252. }
  253. }
  254. \end($from);
  255. \end($to);
  256. do {
  257. $fromK = \key($from);
  258. $toK = \key($to);
  259. if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
  260. break;
  261. }
  262. \prev($from);
  263. \prev($to);
  264. $end = [$fromK => $from[$fromK]] + $end;
  265. unset($from[$fromK], $to[$toK]);
  266. } while (true);
  267. return [$from, $to, $start, $end];
  268. }
  269. }