TokenPolyfill.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. <?php declare(strict_types=1);
  2. namespace PhpParser\Internal;
  3. if (\PHP_VERSION_ID >= 80000) {
  4. class TokenPolyfill extends \PhpToken {
  5. }
  6. return;
  7. }
  8. /**
  9. * This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill
  10. * PhpToken, because composer might end up picking a different polyfill implementation, which does
  11. * not meet our requirements.
  12. *
  13. * @internal
  14. */
  15. class TokenPolyfill {
  16. /** @var int The ID of the token. Either a T_* constant of a character code < 256. */
  17. public int $id;
  18. /** @var string The textual content of the token. */
  19. public string $text;
  20. /** @var int The 1-based starting line of the token (or -1 if unknown). */
  21. public int $line;
  22. /** @var int The 0-based starting position of the token (or -1 if unknown). */
  23. public int $pos;
  24. /** @var array<int, bool> Tokens ignored by the PHP parser. */
  25. private const IGNORABLE_TOKENS = [
  26. \T_WHITESPACE => true,
  27. \T_COMMENT => true,
  28. \T_DOC_COMMENT => true,
  29. \T_OPEN_TAG => true,
  30. ];
  31. /** @var array<int, bool> Tokens that may be part of a T_NAME_* identifier. */
  32. private static array $identifierTokens;
  33. /**
  34. * Create a Token with the given ID and text, as well optional line and position information.
  35. */
  36. final public function __construct(int $id, string $text, int $line = -1, int $pos = -1) {
  37. $this->id = $id;
  38. $this->text = $text;
  39. $this->line = $line;
  40. $this->pos = $pos;
  41. }
  42. /**
  43. * Get the name of the token. For single-char tokens this will be the token character.
  44. * Otherwise it will be a T_* style name, or null if the token ID is unknown.
  45. */
  46. public function getTokenName(): ?string {
  47. if ($this->id < 256) {
  48. return \chr($this->id);
  49. }
  50. $name = token_name($this->id);
  51. return $name === 'UNKNOWN' ? null : $name;
  52. }
  53. /**
  54. * Check whether the token is of the given kind. The kind may be either an integer that matches
  55. * the token ID, a string that matches the token text, or an array of integers/strings. In the
  56. * latter case, the function returns true if any of the kinds in the array match.
  57. *
  58. * @param int|string|(int|string)[] $kind
  59. */
  60. public function is($kind): bool {
  61. if (\is_int($kind)) {
  62. return $this->id === $kind;
  63. }
  64. if (\is_string($kind)) {
  65. return $this->text === $kind;
  66. }
  67. if (\is_array($kind)) {
  68. foreach ($kind as $entry) {
  69. if (\is_int($entry)) {
  70. if ($this->id === $entry) {
  71. return true;
  72. }
  73. } elseif (\is_string($entry)) {
  74. if ($this->text === $entry) {
  75. return true;
  76. }
  77. } else {
  78. throw new \TypeError(
  79. 'Argument #1 ($kind) must only have elements of type string|int, ' .
  80. gettype($entry) . ' given');
  81. }
  82. }
  83. return false;
  84. }
  85. throw new \TypeError(
  86. 'Argument #1 ($kind) must be of type string|int|array, ' .gettype($kind) . ' given');
  87. }
  88. /**
  89. * Check whether this token would be ignored by the PHP parser. Returns true for T_WHITESPACE,
  90. * T_COMMENT, T_DOC_COMMENT and T_OPEN_TAG, and false for everything else.
  91. */
  92. public function isIgnorable(): bool {
  93. return isset(self::IGNORABLE_TOKENS[$this->id]);
  94. }
  95. /**
  96. * Return the textual content of the token.
  97. */
  98. public function __toString(): string {
  99. return $this->text;
  100. }
  101. /**
  102. * Tokenize the given source code and return an array of tokens.
  103. *
  104. * This performs certain canonicalizations to match the PHP 8.0 token format:
  105. * * Bad characters are represented using T_BAD_CHARACTER rather than omitted.
  106. * * T_COMMENT does not include trailing newlines, instead the newline is part of a following
  107. * T_WHITESPACE token.
  108. * * Namespaced names are represented using T_NAME_* tokens.
  109. *
  110. * @return static[]
  111. */
  112. public static function tokenize(string $code, int $flags = 0): array {
  113. self::init();
  114. $tokens = [];
  115. $line = 1;
  116. $pos = 0;
  117. $origTokens = \token_get_all($code, $flags);
  118. $numTokens = \count($origTokens);
  119. for ($i = 0; $i < $numTokens; $i++) {
  120. $token = $origTokens[$i];
  121. if (\is_string($token)) {
  122. if (\strlen($token) === 2) {
  123. // b" and B" are tokenized as single-char tokens, even though they aren't.
  124. $tokens[] = new static(\ord('"'), $token, $line, $pos);
  125. $pos += 2;
  126. } else {
  127. $tokens[] = new static(\ord($token), $token, $line, $pos);
  128. $pos++;
  129. }
  130. } else {
  131. $id = $token[0];
  132. $text = $token[1];
  133. // Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore.
  134. if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' &&
  135. \preg_match('/(\r\n|\n|\r)$/D', $text, $matches)
  136. ) {
  137. $trailingNewline = $matches[0];
  138. $text = \substr($text, 0, -\strlen($trailingNewline));
  139. $tokens[] = new static($id, $text, $line, $pos);
  140. $pos += \strlen($text);
  141. if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) {
  142. // Move trailing newline into following T_WHITESPACE token, if it already exists.
  143. $origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1];
  144. $origTokens[$i + 1][2]--;
  145. } else {
  146. // Otherwise, we need to create a new T_WHITESPACE token.
  147. $tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos);
  148. $line++;
  149. $pos += \strlen($trailingNewline);
  150. }
  151. continue;
  152. }
  153. // Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and
  154. // T_STRING into a single token.
  155. if (($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id]))) {
  156. $newText = $text;
  157. $lastWasSeparator = $id === \T_NS_SEPARATOR;
  158. for ($j = $i + 1; $j < $numTokens; $j++) {
  159. if ($lastWasSeparator) {
  160. if (!isset(self::$identifierTokens[$origTokens[$j][0]])) {
  161. break;
  162. }
  163. $lastWasSeparator = false;
  164. } else {
  165. if ($origTokens[$j][0] !== \T_NS_SEPARATOR) {
  166. break;
  167. }
  168. $lastWasSeparator = true;
  169. }
  170. $newText .= $origTokens[$j][1];
  171. }
  172. if ($lastWasSeparator) {
  173. // Trailing separator is not part of the name.
  174. $j--;
  175. $newText = \substr($newText, 0, -1);
  176. }
  177. if ($j > $i + 1) {
  178. if ($id === \T_NS_SEPARATOR) {
  179. $id = \T_NAME_FULLY_QUALIFIED;
  180. } elseif ($id === \T_NAMESPACE) {
  181. $id = \T_NAME_RELATIVE;
  182. } else {
  183. $id = \T_NAME_QUALIFIED;
  184. }
  185. $tokens[] = new static($id, $newText, $line, $pos);
  186. $pos += \strlen($newText);
  187. $i = $j - 1;
  188. continue;
  189. }
  190. }
  191. $tokens[] = new static($id, $text, $line, $pos);
  192. $line += \substr_count($text, "\n");
  193. $pos += \strlen($text);
  194. }
  195. }
  196. return $tokens;
  197. }
  198. /** Initialize private static state needed by tokenize(). */
  199. private static function init(): void {
  200. if (isset(self::$identifierTokens)) {
  201. return;
  202. }
  203. // Based on semi_reserved production.
  204. self::$identifierTokens = \array_fill_keys([
  205. \T_STRING,
  206. \T_STATIC, \T_ABSTRACT, \T_FINAL, \T_PRIVATE, \T_PROTECTED, \T_PUBLIC, \T_READONLY,
  207. \T_INCLUDE, \T_INCLUDE_ONCE, \T_EVAL, \T_REQUIRE, \T_REQUIRE_ONCE, \T_LOGICAL_OR, \T_LOGICAL_XOR, \T_LOGICAL_AND,
  208. \T_INSTANCEOF, \T_NEW, \T_CLONE, \T_EXIT, \T_IF, \T_ELSEIF, \T_ELSE, \T_ENDIF, \T_ECHO, \T_DO, \T_WHILE,
  209. \T_ENDWHILE, \T_FOR, \T_ENDFOR, \T_FOREACH, \T_ENDFOREACH, \T_DECLARE, \T_ENDDECLARE, \T_AS, \T_TRY, \T_CATCH,
  210. \T_FINALLY, \T_THROW, \T_USE, \T_INSTEADOF, \T_GLOBAL, \T_VAR, \T_UNSET, \T_ISSET, \T_EMPTY, \T_CONTINUE, \T_GOTO,
  211. \T_FUNCTION, \T_CONST, \T_RETURN, \T_PRINT, \T_YIELD, \T_LIST, \T_SWITCH, \T_ENDSWITCH, \T_CASE, \T_DEFAULT,
  212. \T_BREAK, \T_ARRAY, \T_CALLABLE, \T_EXTENDS, \T_IMPLEMENTS, \T_NAMESPACE, \T_TRAIT, \T_INTERFACE, \T_CLASS,
  213. \T_CLASS_C, \T_TRAIT_C, \T_FUNC_C, \T_METHOD_C, \T_LINE, \T_FILE, \T_DIR, \T_NS_C, \T_HALT_COMPILER, \T_FN,
  214. \T_MATCH,
  215. ], true);
  216. }
  217. }