String_.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. <?php declare(strict_types=1);
  2. namespace PhpParser\Node\Scalar;
  3. use PhpParser\Error;
  4. use PhpParser\Node\Scalar;
  5. class String_ extends Scalar {
  6. /* For use in "kind" attribute */
  7. public const KIND_SINGLE_QUOTED = 1;
  8. public const KIND_DOUBLE_QUOTED = 2;
  9. public const KIND_HEREDOC = 3;
  10. public const KIND_NOWDOC = 4;
  11. /** @var string String value */
  12. public string $value;
  13. /** @var array<string, string> Escaped character to its decoded value */
  14. protected static array $replacements = [
  15. '\\' => '\\',
  16. '$' => '$',
  17. 'n' => "\n",
  18. 'r' => "\r",
  19. 't' => "\t",
  20. 'f' => "\f",
  21. 'v' => "\v",
  22. 'e' => "\x1B",
  23. ];
  24. /**
  25. * Constructs a string scalar node.
  26. *
  27. * @param string $value Value of the string
  28. * @param array<string, mixed> $attributes Additional attributes
  29. */
  30. public function __construct(string $value, array $attributes = []) {
  31. $this->attributes = $attributes;
  32. $this->value = $value;
  33. }
  34. public function getSubNodeNames(): array {
  35. return ['value'];
  36. }
  37. /**
  38. * @param array<string, mixed> $attributes
  39. * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
  40. */
  41. public static function fromString(string $str, array $attributes = [], bool $parseUnicodeEscape = true): self {
  42. $attributes['kind'] = ($str[0] === "'" || ($str[1] === "'" && ($str[0] === 'b' || $str[0] === 'B')))
  43. ? Scalar\String_::KIND_SINGLE_QUOTED
  44. : Scalar\String_::KIND_DOUBLE_QUOTED;
  45. $attributes['rawValue'] = $str;
  46. $string = self::parse($str, $parseUnicodeEscape);
  47. return new self($string, $attributes);
  48. }
  49. /**
  50. * @internal
  51. *
  52. * Parses a string token.
  53. *
  54. * @param string $str String token content
  55. * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
  56. *
  57. * @return string The parsed string
  58. */
  59. public static function parse(string $str, bool $parseUnicodeEscape = true): string {
  60. $bLength = 0;
  61. if ('b' === $str[0] || 'B' === $str[0]) {
  62. $bLength = 1;
  63. }
  64. if ('\'' === $str[$bLength]) {
  65. return str_replace(
  66. ['\\\\', '\\\''],
  67. ['\\', '\''],
  68. substr($str, $bLength + 1, -1)
  69. );
  70. } else {
  71. return self::parseEscapeSequences(
  72. substr($str, $bLength + 1, -1), '"', $parseUnicodeEscape
  73. );
  74. }
  75. }
  76. /**
  77. * @internal
  78. *
  79. * Parses escape sequences in strings (all string types apart from single quoted).
  80. *
  81. * @param string $str String without quotes
  82. * @param null|string $quote Quote type
  83. * @param bool $parseUnicodeEscape Whether to parse PHP 7 \u escapes
  84. *
  85. * @return string String with escape sequences parsed
  86. */
  87. public static function parseEscapeSequences(string $str, ?string $quote, bool $parseUnicodeEscape = true): string {
  88. if (null !== $quote) {
  89. $str = str_replace('\\' . $quote, $quote, $str);
  90. }
  91. $extra = '';
  92. if ($parseUnicodeEscape) {
  93. $extra = '|u\{([0-9a-fA-F]+)\}';
  94. }
  95. return preg_replace_callback(
  96. '~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}' . $extra . ')~',
  97. function ($matches) {
  98. $str = $matches[1];
  99. if (isset(self::$replacements[$str])) {
  100. return self::$replacements[$str];
  101. }
  102. if ('x' === $str[0] || 'X' === $str[0]) {
  103. return chr(hexdec(substr($str, 1)));
  104. }
  105. if ('u' === $str[0]) {
  106. $dec = hexdec($matches[2]);
  107. // If it overflowed to float, treat as INT_MAX, it will throw an error anyway.
  108. return self::codePointToUtf8(\is_int($dec) ? $dec : \PHP_INT_MAX);
  109. } else {
  110. return chr(octdec($str));
  111. }
  112. },
  113. $str
  114. );
  115. }
  116. /**
  117. * Converts a Unicode code point to its UTF-8 encoded representation.
  118. *
  119. * @param int $num Code point
  120. *
  121. * @return string UTF-8 representation of code point
  122. */
  123. private static function codePointToUtf8(int $num): string {
  124. if ($num <= 0x7F) {
  125. return chr($num);
  126. }
  127. if ($num <= 0x7FF) {
  128. return chr(($num >> 6) + 0xC0) . chr(($num & 0x3F) + 0x80);
  129. }
  130. if ($num <= 0xFFFF) {
  131. return chr(($num >> 12) + 0xE0) . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
  132. }
  133. if ($num <= 0x1FFFFF) {
  134. return chr(($num >> 18) + 0xF0) . chr((($num >> 12) & 0x3F) + 0x80)
  135. . chr((($num >> 6) & 0x3F) + 0x80) . chr(($num & 0x3F) + 0x80);
  136. }
  137. throw new Error('Invalid UTF-8 codepoint escape sequence: Codepoint too large');
  138. }
  139. public function getType(): string {
  140. return 'Scalar_String';
  141. }
  142. }