ByteString.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String;
  11. use Symfony\Component\String\Exception\ExceptionInterface;
  12. use Symfony\Component\String\Exception\InvalidArgumentException;
  13. use Symfony\Component\String\Exception\RuntimeException;
  14. /**
  15. * Represents a binary-safe string of bytes.
  16. *
  17. * @author Nicolas Grekas <p@tchwork.com>
  18. * @author Hugo Hamon <hugohamon@neuf.fr>
  19. *
  20. * @throws ExceptionInterface
  21. */
  22. class ByteString extends AbstractString
  23. {
  24. private const ALPHABET_ALPHANUMERIC = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
  25. public function __construct(string $string = '')
  26. {
  27. $this->string = $string;
  28. }
  29. /*
  30. * The following method was derived from code of the Hack Standard Library (v4.40 - 2020-05-03)
  31. *
  32. * https://github.com/hhvm/hsl/blob/80a42c02f036f72a42f0415e80d6b847f4bf62d5/src/random/private.php#L16
  33. *
  34. * Code subject to the MIT license (https://github.com/hhvm/hsl/blob/master/LICENSE).
  35. *
  36. * Copyright (c) 2004-2020, Facebook, Inc. (https://www.facebook.com/)
  37. */
  38. public static function fromRandom(int $length = 16, string $alphabet = null): self
  39. {
  40. if ($length <= 0) {
  41. throw new InvalidArgumentException(sprintf('A strictly positive length is expected, "%d" given.', $length));
  42. }
  43. $alphabet = $alphabet ?? self::ALPHABET_ALPHANUMERIC;
  44. $alphabetSize = \strlen($alphabet);
  45. $bits = (int) ceil(log($alphabetSize, 2.0));
  46. if ($bits <= 0 || $bits > 56) {
  47. throw new InvalidArgumentException('The length of the alphabet must in the [2^1, 2^56] range.');
  48. }
  49. $ret = '';
  50. while ($length > 0) {
  51. $urandomLength = (int) ceil(2 * $length * $bits / 8.0);
  52. $data = random_bytes($urandomLength);
  53. $unpackedData = 0;
  54. $unpackedBits = 0;
  55. for ($i = 0; $i < $urandomLength && $length > 0; ++$i) {
  56. // Unpack 8 bits
  57. $unpackedData = ($unpackedData << 8) | \ord($data[$i]);
  58. $unpackedBits += 8;
  59. // While we have enough bits to select a character from the alphabet, keep
  60. // consuming the random data
  61. for (; $unpackedBits >= $bits && $length > 0; $unpackedBits -= $bits) {
  62. $index = ($unpackedData & ((1 << $bits) - 1));
  63. $unpackedData >>= $bits;
  64. // Unfortunately, the alphabet size is not necessarily a power of two.
  65. // Worst case, it is 2^k + 1, which means we need (k+1) bits and we
  66. // have around a 50% chance of missing as k gets larger
  67. if ($index < $alphabetSize) {
  68. $ret .= $alphabet[$index];
  69. --$length;
  70. }
  71. }
  72. }
  73. }
  74. return new static($ret);
  75. }
  76. public function bytesAt(int $offset): array
  77. {
  78. $str = $this->string[$offset] ?? '';
  79. return '' === $str ? [] : [\ord($str)];
  80. }
  81. public function append(string ...$suffix): parent
  82. {
  83. $str = clone $this;
  84. $str->string .= 1 >= \count($suffix) ? ($suffix[0] ?? '') : implode('', $suffix);
  85. return $str;
  86. }
  87. public function camel(): parent
  88. {
  89. $str = clone $this;
  90. $parts = explode(' ', trim(ucwords(preg_replace('/[^a-zA-Z0-9\x7f-\xff]++/', ' ', $this->string))));
  91. $parts[0] = 1 !== \strlen($parts[0]) && ctype_upper($parts[0]) ? $parts[0] : lcfirst($parts[0]);
  92. $str->string = implode('', $parts);
  93. return $str;
  94. }
  95. public function chunk(int $length = 1): array
  96. {
  97. if (1 > $length) {
  98. throw new InvalidArgumentException('The chunk length must be greater than zero.');
  99. }
  100. if ('' === $this->string) {
  101. return [];
  102. }
  103. $str = clone $this;
  104. $chunks = [];
  105. foreach (str_split($this->string, $length) as $chunk) {
  106. $str->string = $chunk;
  107. $chunks[] = clone $str;
  108. }
  109. return $chunks;
  110. }
  111. public function endsWith($suffix): bool
  112. {
  113. if ($suffix instanceof parent) {
  114. $suffix = $suffix->string;
  115. } elseif (\is_array($suffix) || $suffix instanceof \Traversable) {
  116. return parent::endsWith($suffix);
  117. } else {
  118. $suffix = (string) $suffix;
  119. }
  120. return '' !== $suffix && \strlen($this->string) >= \strlen($suffix) && 0 === substr_compare($this->string, $suffix, -\strlen($suffix), null, $this->ignoreCase);
  121. }
  122. public function equalsTo($string): bool
  123. {
  124. if ($string instanceof parent) {
  125. $string = $string->string;
  126. } elseif (\is_array($string) || $string instanceof \Traversable) {
  127. return parent::equalsTo($string);
  128. } else {
  129. $string = (string) $string;
  130. }
  131. if ('' !== $string && $this->ignoreCase) {
  132. return 0 === strcasecmp($string, $this->string);
  133. }
  134. return $string === $this->string;
  135. }
  136. public function folded(): parent
  137. {
  138. $str = clone $this;
  139. $str->string = strtolower($str->string);
  140. return $str;
  141. }
  142. public function indexOf($needle, int $offset = 0): ?int
  143. {
  144. if ($needle instanceof parent) {
  145. $needle = $needle->string;
  146. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  147. return parent::indexOf($needle, $offset);
  148. } else {
  149. $needle = (string) $needle;
  150. }
  151. if ('' === $needle) {
  152. return null;
  153. }
  154. $i = $this->ignoreCase ? stripos($this->string, $needle, $offset) : strpos($this->string, $needle, $offset);
  155. return false === $i ? null : $i;
  156. }
  157. public function indexOfLast($needle, int $offset = 0): ?int
  158. {
  159. if ($needle instanceof parent) {
  160. $needle = $needle->string;
  161. } elseif (\is_array($needle) || $needle instanceof \Traversable) {
  162. return parent::indexOfLast($needle, $offset);
  163. } else {
  164. $needle = (string) $needle;
  165. }
  166. if ('' === $needle) {
  167. return null;
  168. }
  169. $i = $this->ignoreCase ? strripos($this->string, $needle, $offset) : strrpos($this->string, $needle, $offset);
  170. return false === $i ? null : $i;
  171. }
  172. public function isUtf8(): bool
  173. {
  174. return '' === $this->string || preg_match('//u', $this->string);
  175. }
  176. public function join(array $strings, string $lastGlue = null): parent
  177. {
  178. $str = clone $this;
  179. $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
  180. $str->string = implode($this->string, $strings).$tail;
  181. return $str;
  182. }
  183. public function length(): int
  184. {
  185. return \strlen($this->string);
  186. }
  187. public function lower(): parent
  188. {
  189. $str = clone $this;
  190. $str->string = strtolower($str->string);
  191. return $str;
  192. }
  193. public function match(string $regexp, int $flags = 0, int $offset = 0): array
  194. {
  195. $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
  196. if ($this->ignoreCase) {
  197. $regexp .= 'i';
  198. }
  199. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  200. try {
  201. if (false === $match($regexp, $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
  202. $lastError = preg_last_error();
  203. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  204. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  205. throw new RuntimeException('Matching failed with '.$k.'.');
  206. }
  207. }
  208. throw new RuntimeException('Matching failed with unknown error code.');
  209. }
  210. } finally {
  211. restore_error_handler();
  212. }
  213. return $matches;
  214. }
  215. public function padBoth(int $length, string $padStr = ' '): parent
  216. {
  217. $str = clone $this;
  218. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_BOTH);
  219. return $str;
  220. }
  221. public function padEnd(int $length, string $padStr = ' '): parent
  222. {
  223. $str = clone $this;
  224. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_RIGHT);
  225. return $str;
  226. }
  227. public function padStart(int $length, string $padStr = ' '): parent
  228. {
  229. $str = clone $this;
  230. $str->string = str_pad($this->string, $length, $padStr, \STR_PAD_LEFT);
  231. return $str;
  232. }
  233. public function prepend(string ...$prefix): parent
  234. {
  235. $str = clone $this;
  236. $str->string = (1 >= \count($prefix) ? ($prefix[0] ?? '') : implode('', $prefix)).$str->string;
  237. return $str;
  238. }
  239. public function replace(string $from, string $to): parent
  240. {
  241. $str = clone $this;
  242. if ('' !== $from) {
  243. $str->string = $this->ignoreCase ? str_ireplace($from, $to, $this->string) : str_replace($from, $to, $this->string);
  244. }
  245. return $str;
  246. }
  247. public function replaceMatches(string $fromRegexp, $to): parent
  248. {
  249. if ($this->ignoreCase) {
  250. $fromRegexp .= 'i';
  251. }
  252. if (\is_array($to)) {
  253. if (!\is_callable($to)) {
  254. throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class));
  255. }
  256. $replace = 'preg_replace_callback';
  257. } else {
  258. $replace = $to instanceof \Closure ? 'preg_replace_callback' : 'preg_replace';
  259. }
  260. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  261. try {
  262. if (null === $string = $replace($fromRegexp, $to, $this->string)) {
  263. $lastError = preg_last_error();
  264. foreach (get_defined_constants(true)['pcre'] as $k => $v) {
  265. if ($lastError === $v && '_ERROR' === substr($k, -6)) {
  266. throw new RuntimeException('Matching failed with '.$k.'.');
  267. }
  268. }
  269. throw new RuntimeException('Matching failed with unknown error code.');
  270. }
  271. } finally {
  272. restore_error_handler();
  273. }
  274. $str = clone $this;
  275. $str->string = $string;
  276. return $str;
  277. }
  278. public function reverse(): parent
  279. {
  280. $str = clone $this;
  281. $str->string = strrev($str->string);
  282. return $str;
  283. }
  284. public function slice(int $start = 0, int $length = null): parent
  285. {
  286. $str = clone $this;
  287. $str->string = (string) substr($this->string, $start, $length ?? \PHP_INT_MAX);
  288. return $str;
  289. }
  290. public function snake(): parent
  291. {
  292. $str = $this->camel();
  293. $str->string = strtolower(preg_replace(['/([A-Z]+)([A-Z][a-z])/', '/([a-z\d])([A-Z])/'], '\1_\2', $str->string));
  294. return $str;
  295. }
  296. public function splice(string $replacement, int $start = 0, int $length = null): parent
  297. {
  298. $str = clone $this;
  299. $str->string = substr_replace($this->string, $replacement, $start, $length ?? \PHP_INT_MAX);
  300. return $str;
  301. }
  302. public function split(string $delimiter, int $limit = null, int $flags = null): array
  303. {
  304. if (1 > $limit = $limit ?? \PHP_INT_MAX) {
  305. throw new InvalidArgumentException('Split limit must be a positive integer.');
  306. }
  307. if ('' === $delimiter) {
  308. throw new InvalidArgumentException('Split delimiter is empty.');
  309. }
  310. if (null !== $flags) {
  311. return parent::split($delimiter, $limit, $flags);
  312. }
  313. $str = clone $this;
  314. $chunks = $this->ignoreCase
  315. ? preg_split('{'.preg_quote($delimiter).'}iD', $this->string, $limit)
  316. : explode($delimiter, $this->string, $limit);
  317. foreach ($chunks as &$chunk) {
  318. $str->string = $chunk;
  319. $chunk = clone $str;
  320. }
  321. return $chunks;
  322. }
  323. public function startsWith($prefix): bool
  324. {
  325. if ($prefix instanceof parent) {
  326. $prefix = $prefix->string;
  327. } elseif (!\is_string($prefix)) {
  328. return parent::startsWith($prefix);
  329. }
  330. return '' !== $prefix && 0 === ($this->ignoreCase ? strncasecmp($this->string, $prefix, \strlen($prefix)) : strncmp($this->string, $prefix, \strlen($prefix)));
  331. }
  332. public function title(bool $allWords = false): parent
  333. {
  334. $str = clone $this;
  335. $str->string = $allWords ? ucwords($str->string) : ucfirst($str->string);
  336. return $str;
  337. }
  338. public function toUnicodeString(string $fromEncoding = null): UnicodeString
  339. {
  340. return new UnicodeString($this->toCodePointString($fromEncoding)->string);
  341. }
  342. public function toCodePointString(string $fromEncoding = null): CodePointString
  343. {
  344. $u = new CodePointString();
  345. if (\in_array($fromEncoding, [null, 'utf8', 'utf-8', 'UTF8', 'UTF-8'], true) && preg_match('//u', $this->string)) {
  346. $u->string = $this->string;
  347. return $u;
  348. }
  349. set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
  350. try {
  351. try {
  352. $validEncoding = false !== mb_detect_encoding($this->string, $fromEncoding ?? 'Windows-1252', true);
  353. } catch (InvalidArgumentException $e) {
  354. if (!\function_exists('iconv')) {
  355. throw $e;
  356. }
  357. $u->string = iconv($fromEncoding ?? 'Windows-1252', 'UTF-8', $this->string);
  358. return $u;
  359. }
  360. } finally {
  361. restore_error_handler();
  362. }
  363. if (!$validEncoding) {
  364. throw new InvalidArgumentException(sprintf('Invalid "%s" string.', $fromEncoding ?? 'Windows-1252'));
  365. }
  366. $u->string = mb_convert_encoding($this->string, 'UTF-8', $fromEncoding ?? 'Windows-1252');
  367. return $u;
  368. }
  369. public function trim(string $chars = " \t\n\r\0\x0B\x0C"): parent
  370. {
  371. $str = clone $this;
  372. $str->string = trim($str->string, $chars);
  373. return $str;
  374. }
  375. public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C"): parent
  376. {
  377. $str = clone $this;
  378. $str->string = rtrim($str->string, $chars);
  379. return $str;
  380. }
  381. public function trimStart(string $chars = " \t\n\r\0\x0B\x0C"): parent
  382. {
  383. $str = clone $this;
  384. $str->string = ltrim($str->string, $chars);
  385. return $str;
  386. }
  387. public function upper(): parent
  388. {
  389. $str = clone $this;
  390. $str->string = strtoupper($str->string);
  391. return $str;
  392. }
  393. public function width(bool $ignoreAnsiDecoration = true): int
  394. {
  395. $string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
  396. return (new CodePointString($string))->width($ignoreAnsiDecoration);
  397. }
  398. }