Parser.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\ExpressionLanguage;
  11. /**
  12. * Parsers a token stream.
  13. *
  14. * This parser implements a "Precedence climbing" algorithm.
  15. *
  16. * @see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
  17. * @see http://en.wikipedia.org/wiki/Operator-precedence_parser
  18. *
  19. * @author Fabien Potencier <fabien@symfony.com>
  20. */
  21. class Parser
  22. {
  23. const OPERATOR_LEFT = 1;
  24. const OPERATOR_RIGHT = 2;
  25. private $stream;
  26. private $unaryOperators;
  27. private $binaryOperators;
  28. private $functions;
  29. private $names;
  30. public function __construct(array $functions)
  31. {
  32. $this->functions = $functions;
  33. $this->unaryOperators = array(
  34. 'not' => array('precedence' => 50),
  35. '!' => array('precedence' => 50),
  36. '-' => array('precedence' => 500),
  37. '+' => array('precedence' => 500),
  38. );
  39. $this->binaryOperators = array(
  40. 'or' => array('precedence' => 10, 'associativity' => self::OPERATOR_LEFT),
  41. '||' => array('precedence' => 10, 'associativity' => self::OPERATOR_LEFT),
  42. 'and' => array('precedence' => 15, 'associativity' => self::OPERATOR_LEFT),
  43. '&&' => array('precedence' => 15, 'associativity' => self::OPERATOR_LEFT),
  44. '|' => array('precedence' => 16, 'associativity' => self::OPERATOR_LEFT),
  45. '^' => array('precedence' => 17, 'associativity' => self::OPERATOR_LEFT),
  46. '&' => array('precedence' => 18, 'associativity' => self::OPERATOR_LEFT),
  47. '==' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  48. '===' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  49. '!=' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  50. '!==' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  51. '<' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  52. '>' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  53. '>=' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  54. '<=' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  55. 'not in' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  56. 'in' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  57. 'matches' => array('precedence' => 20, 'associativity' => self::OPERATOR_LEFT),
  58. '..' => array('precedence' => 25, 'associativity' => self::OPERATOR_LEFT),
  59. '+' => array('precedence' => 30, 'associativity' => self::OPERATOR_LEFT),
  60. '-' => array('precedence' => 30, 'associativity' => self::OPERATOR_LEFT),
  61. '~' => array('precedence' => 40, 'associativity' => self::OPERATOR_LEFT),
  62. '*' => array('precedence' => 60, 'associativity' => self::OPERATOR_LEFT),
  63. '/' => array('precedence' => 60, 'associativity' => self::OPERATOR_LEFT),
  64. '%' => array('precedence' => 60, 'associativity' => self::OPERATOR_LEFT),
  65. '**' => array('precedence' => 200, 'associativity' => self::OPERATOR_RIGHT),
  66. );
  67. }
  68. /**
  69. * Converts a token stream to a node tree.
  70. *
  71. * The valid names is an array where the values
  72. * are the names that the user can use in an expression.
  73. *
  74. * If the variable name in the compiled PHP code must be
  75. * different, define it as the key.
  76. *
  77. * For instance, ['this' => 'container'] means that the
  78. * variable 'container' can be used in the expression
  79. * but the compiled code will use 'this'.
  80. *
  81. * @param TokenStream $stream A token stream instance
  82. * @param array $names An array of valid names
  83. *
  84. * @return Node\Node A node tree
  85. *
  86. * @throws SyntaxError
  87. */
  88. public function parse(TokenStream $stream, $names = array())
  89. {
  90. $this->stream = $stream;
  91. $this->names = $names;
  92. $node = $this->parseExpression();
  93. if (!$stream->isEOF()) {
  94. throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s"', $stream->current->type, $stream->current->value), $stream->current->cursor, $stream->getExpression());
  95. }
  96. return $node;
  97. }
  98. public function parseExpression($precedence = 0)
  99. {
  100. $expr = $this->getPrimary();
  101. $token = $this->stream->current;
  102. while ($token->test(Token::OPERATOR_TYPE) && isset($this->binaryOperators[$token->value]) && $this->binaryOperators[$token->value]['precedence'] >= $precedence) {
  103. $op = $this->binaryOperators[$token->value];
  104. $this->stream->next();
  105. $expr1 = $this->parseExpression(self::OPERATOR_LEFT === $op['associativity'] ? $op['precedence'] + 1 : $op['precedence']);
  106. $expr = new Node\BinaryNode($token->value, $expr, $expr1);
  107. $token = $this->stream->current;
  108. }
  109. if (0 === $precedence) {
  110. return $this->parseConditionalExpression($expr);
  111. }
  112. return $expr;
  113. }
  114. protected function getPrimary()
  115. {
  116. $token = $this->stream->current;
  117. if ($token->test(Token::OPERATOR_TYPE) && isset($this->unaryOperators[$token->value])) {
  118. $operator = $this->unaryOperators[$token->value];
  119. $this->stream->next();
  120. $expr = $this->parseExpression($operator['precedence']);
  121. return $this->parsePostfixExpression(new Node\UnaryNode($token->value, $expr));
  122. }
  123. if ($token->test(Token::PUNCTUATION_TYPE, '(')) {
  124. $this->stream->next();
  125. $expr = $this->parseExpression();
  126. $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'An opened parenthesis is not properly closed');
  127. return $this->parsePostfixExpression($expr);
  128. }
  129. return $this->parsePrimaryExpression();
  130. }
  131. protected function parseConditionalExpression($expr)
  132. {
  133. while ($this->stream->current->test(Token::PUNCTUATION_TYPE, '?')) {
  134. $this->stream->next();
  135. if (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
  136. $expr2 = $this->parseExpression();
  137. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ':')) {
  138. $this->stream->next();
  139. $expr3 = $this->parseExpression();
  140. } else {
  141. $expr3 = new Node\ConstantNode(null);
  142. }
  143. } else {
  144. $this->stream->next();
  145. $expr2 = $expr;
  146. $expr3 = $this->parseExpression();
  147. }
  148. $expr = new Node\ConditionalNode($expr, $expr2, $expr3);
  149. }
  150. return $expr;
  151. }
  152. public function parsePrimaryExpression()
  153. {
  154. $token = $this->stream->current;
  155. switch ($token->type) {
  156. case Token::NAME_TYPE:
  157. $this->stream->next();
  158. switch ($token->value) {
  159. case 'true':
  160. case 'TRUE':
  161. return new Node\ConstantNode(true);
  162. case 'false':
  163. case 'FALSE':
  164. return new Node\ConstantNode(false);
  165. case 'null':
  166. case 'NULL':
  167. return new Node\ConstantNode(null);
  168. default:
  169. if ('(' === $this->stream->current->value) {
  170. if (false === isset($this->functions[$token->value])) {
  171. throw new SyntaxError(sprintf('The function "%s" does not exist', $token->value), $token->cursor, $this->stream->getExpression());
  172. }
  173. $node = new Node\FunctionNode($token->value, $this->parseArguments());
  174. } else {
  175. if (!\in_array($token->value, $this->names, true)) {
  176. throw new SyntaxError(sprintf('Variable "%s" is not valid', $token->value), $token->cursor, $this->stream->getExpression());
  177. }
  178. // is the name used in the compiled code different
  179. // from the name used in the expression?
  180. if (\is_int($name = array_search($token->value, $this->names))) {
  181. $name = $token->value;
  182. }
  183. $node = new Node\NameNode($name);
  184. }
  185. }
  186. break;
  187. case Token::NUMBER_TYPE:
  188. case Token::STRING_TYPE:
  189. $this->stream->next();
  190. return new Node\ConstantNode($token->value);
  191. default:
  192. if ($token->test(Token::PUNCTUATION_TYPE, '[')) {
  193. $node = $this->parseArrayExpression();
  194. } elseif ($token->test(Token::PUNCTUATION_TYPE, '{')) {
  195. $node = $this->parseHashExpression();
  196. } else {
  197. throw new SyntaxError(sprintf('Unexpected token "%s" of value "%s"', $token->type, $token->value), $token->cursor, $this->stream->getExpression());
  198. }
  199. }
  200. return $this->parsePostfixExpression($node);
  201. }
  202. public function parseArrayExpression()
  203. {
  204. $this->stream->expect(Token::PUNCTUATION_TYPE, '[', 'An array element was expected');
  205. $node = new Node\ArrayNode();
  206. $first = true;
  207. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
  208. if (!$first) {
  209. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'An array element must be followed by a comma');
  210. // trailing ,?
  211. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, ']')) {
  212. break;
  213. }
  214. }
  215. $first = false;
  216. $node->addElement($this->parseExpression());
  217. }
  218. $this->stream->expect(Token::PUNCTUATION_TYPE, ']', 'An opened array is not properly closed');
  219. return $node;
  220. }
  221. public function parseHashExpression()
  222. {
  223. $this->stream->expect(Token::PUNCTUATION_TYPE, '{', 'A hash element was expected');
  224. $node = new Node\ArrayNode();
  225. $first = true;
  226. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
  227. if (!$first) {
  228. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'A hash value must be followed by a comma');
  229. // trailing ,?
  230. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '}')) {
  231. break;
  232. }
  233. }
  234. $first = false;
  235. // a hash key can be:
  236. //
  237. // * a number -- 12
  238. // * a string -- 'a'
  239. // * a name, which is equivalent to a string -- a
  240. // * an expression, which must be enclosed in parentheses -- (1 + 2)
  241. if ($this->stream->current->test(Token::STRING_TYPE) || $this->stream->current->test(Token::NAME_TYPE) || $this->stream->current->test(Token::NUMBER_TYPE)) {
  242. $key = new Node\ConstantNode($this->stream->current->value);
  243. $this->stream->next();
  244. } elseif ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
  245. $key = $this->parseExpression();
  246. } else {
  247. $current = $this->stream->current;
  248. throw new SyntaxError(sprintf('A hash key must be a quoted string, a number, a name, or an expression enclosed in parentheses (unexpected token "%s" of value "%s"', $current->type, $current->value), $current->cursor, $this->stream->getExpression());
  249. }
  250. $this->stream->expect(Token::PUNCTUATION_TYPE, ':', 'A hash key must be followed by a colon (:)');
  251. $value = $this->parseExpression();
  252. $node->addElement($value, $key);
  253. }
  254. $this->stream->expect(Token::PUNCTUATION_TYPE, '}', 'An opened hash is not properly closed');
  255. return $node;
  256. }
  257. public function parsePostfixExpression($node)
  258. {
  259. $token = $this->stream->current;
  260. while (Token::PUNCTUATION_TYPE == $token->type) {
  261. if ('.' === $token->value) {
  262. $this->stream->next();
  263. $token = $this->stream->current;
  264. $this->stream->next();
  265. if (
  266. Token::NAME_TYPE !== $token->type
  267. &&
  268. // Operators like "not" and "matches" are valid method or property names,
  269. //
  270. // In other words, besides NAME_TYPE, OPERATOR_TYPE could also be parsed as a property or method.
  271. // This is because operators are processed by the lexer prior to names. So "not" in "foo.not()" or "matches" in "foo.matches" will be recognized as an operator first.
  272. // But in fact, "not" and "matches" in such expressions shall be parsed as method or property names.
  273. //
  274. // And this ONLY works if the operator consists of valid characters for a property or method name.
  275. //
  276. // Other types, such as STRING_TYPE and NUMBER_TYPE, can't be parsed as property nor method names.
  277. //
  278. // As a result, if $token is NOT an operator OR $token->value is NOT a valid property or method name, an exception shall be thrown.
  279. (Token::OPERATOR_TYPE !== $token->type || !preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $token->value))
  280. ) {
  281. throw new SyntaxError('Expected name', $token->cursor, $this->stream->getExpression());
  282. }
  283. $arg = new Node\ConstantNode($token->value);
  284. $arguments = new Node\ArgumentsNode();
  285. if ($this->stream->current->test(Token::PUNCTUATION_TYPE, '(')) {
  286. $type = Node\GetAttrNode::METHOD_CALL;
  287. foreach ($this->parseArguments()->nodes as $n) {
  288. $arguments->addElement($n);
  289. }
  290. } else {
  291. $type = Node\GetAttrNode::PROPERTY_CALL;
  292. }
  293. $node = new Node\GetAttrNode($node, $arg, $arguments, $type);
  294. } elseif ('[' === $token->value) {
  295. if ($node instanceof Node\GetAttrNode && Node\GetAttrNode::METHOD_CALL === $node->attributes['type'] && \PHP_VERSION_ID < 50400) {
  296. throw new SyntaxError('Array calls on a method call is only supported on PHP 5.4+', $token->cursor, $this->stream->getExpression());
  297. }
  298. $this->stream->next();
  299. $arg = $this->parseExpression();
  300. $this->stream->expect(Token::PUNCTUATION_TYPE, ']');
  301. $node = new Node\GetAttrNode($node, $arg, new Node\ArgumentsNode(), Node\GetAttrNode::ARRAY_CALL);
  302. } else {
  303. break;
  304. }
  305. $token = $this->stream->current;
  306. }
  307. return $node;
  308. }
  309. /**
  310. * Parses arguments.
  311. */
  312. public function parseArguments()
  313. {
  314. $args = array();
  315. $this->stream->expect(Token::PUNCTUATION_TYPE, '(', 'A list of arguments must begin with an opening parenthesis');
  316. while (!$this->stream->current->test(Token::PUNCTUATION_TYPE, ')')) {
  317. if (!empty($args)) {
  318. $this->stream->expect(Token::PUNCTUATION_TYPE, ',', 'Arguments must be separated by a comma');
  319. }
  320. $args[] = $this->parseExpression();
  321. }
  322. $this->stream->expect(Token::PUNCTUATION_TYPE, ')', 'A list of arguments must be closed by a parenthesis');
  323. return new Node\Node($args);
  324. }
  325. }