SlideShare a Scribd company logo
@asgrim
Climbing the
Abstract Syntax Tree
James Titcumb
CodeiD PHP Odessa 2017
$ whoami
James Titcumb
www.jamestitcumb.com
www.roave.com
@asgrim
@asgrim
How PHP works
PHP code
OpCache
Execute (VM)
Lexer + Parser
Compiler
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
zend_language_scanner.c
@asgrim
The PHP Parser
zend_language_parser.y
@asgrim
zend_language_parser.y
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
if_stmt
@asgrim
Zend_language_parser.y (PHP 7.0.10)
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
zend_language_parser.y (PHP 5.6.26)
T_IF parenthesis_expr { zend_do_if_cond(&$2, &$1 TSRMLS_CC); }
statement { zend_do_if_after_statement(&$1, 1 TSRMLS_CC); }
void zend_do_if_cond(const znode *cond, znode *closing_bracket_token TSRMLS_DC)
{
int if_cond_op_number = get_next_op_number(CG(active_op_array));
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = ZEND_JMPZ;
SET_NODE(opline->op1, cond);
closing_bracket_token->u.op.opline_num = if_cond_op_number;
SET_UNUSED(opline->op2);
INC_BPC(CG(active_op_array));
}
@asgrim
AST is new in PHP 7+
@asgrim
How PHP works
PHP code
OpCache
Execute (VM)
Lexer + Parser
Compiler
@asgrim
Let’s simplify!
@asgrim
First… WTF is AST?
@asgrim
AST is just a data structure
@asgrim
PHP code
<?php
echo "Hello world";
@asgrim
An AST representation
Echo statement
`-- String, value "Hello world"
@asgrim
PHP code
<?php
echo "Hello " . "world";
@asgrim
An AST representation
Echo statement
`-- Concat
|-- Left
| `-- String, value "Hello "
`-- Right
`-- String, value "world"
@asgrim
PHP code
<?php
$a = 5;
$b = 3;
echo $a + ($b * 2);
@asgrim
An AST representation
Assign statement
|-- Variable $a
`-- Integer, value 5
Assign statement
|-- Variable $b
`-- Integer, value 3
Echo statement
`-- Add operation
|-- Left
| `-- Variable $a
`-- Right
`-- Multiply operation
|-- Left
| `-- Variable $b
`-- Right
`-- Integer, value 2
@asgrim
Why?
@asgrim
Faster!
@asgrim
AST compilation
Statements
EchoAssign
Scalar
value: (int)5
Variable
name: $a
Assign
Scalar
value: (int)3
Variable
name: $b
Add op
Right operandLeft operand
Variable
name: $a
Multiply op
Right operandLeft operand
Variable
name: $b
Scalar
value: (int)2
@asgrim
AST compilation: pre-order traversal
Statements
EchoAssign
Scalar
value: (int)5
Variable
name: $a
Assign
Scalar
value: (int)3
Variable
name: $b
Add op
Right operandLeft operand
Variable
name: $a
Multiply op
Right operandLeft operand
Variable
name: $b
Scalar
value: (int)2
@asgrim
Pre-order traversal: Polish notation
Assign(Variable $a, Scalar 5)
Assign(Variable $b, Scalar 3)
Echo (
Add(
Variable $a,
Multiply( $b, 2 )
)
)
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand
Right operand
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand Right operand
Operator Left operand
Right operand
@asgrim
Reverse Polish Notation
1 2 3 * +
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
7
@asgrim
Let’s write a compiler (!!!)
In three easy steps…
@asgrim
Warning: do not use in production
@asgrim
View > Source
https://github.com/asgrim/basic-maths-compiler
@asgrim
Define the language
Tokens
● T_ADD (+)
● T_SUBTRACT (-)
● T_MULTIPLY (/)
● T_DIVIDE (*)
● T_INTEGER (d)
● T_WHITESPACE (s+)
@asgrim
Step 1: Writing a simple lexer
@asgrim
Using regular expressions
private static $matches = [
'/^(+)/' => Token::T_ADD,
'/^(-)/' => Token::T_SUBTRACT,
'/^(*)/' => Token::T_MULTIPLY,
'/^(/)/' => Token::T_DIVIDE,
'/^(d+)/' => Token::T_INTEGER,
'/^(s+)/' => Token::T_WHITESPACE,
];
@asgrim
Step through the input string
public function __invoke(string $input) : array
{
$tokens = [];
$offset = 0;
while ($offset < strlen($input)) {
$focus = substr($input, $offset);
$result = $this->match($focus);
$tokens[] = $result;
$offset += strlen($result->getLexeme());
}
return $tokens;
}
@asgrim
The matching method
private function match(string $input) : Token
{
foreach (self::$matches as $pattern => $token) {
if (preg_match($pattern, $input, $matches)) {
return new Token($token, $matches[1]);
}
}
throw new RuntimeException(sprintf(
'Unmatched token, next 15 chars were: %s', substr($input, 0, 15)
));
}
@asgrim
Step 2: Parsing the tokens
@asgrim
Order tokens by operator precedence
/**
* Higher number is higher precedence.
* @var int[]
*/
private static $operatorPrecedence = [
Token::T_SUBTRACT => 0,
Token::T_ADD => 1,
Token::T_DIVIDE => 2,
Token::T_MULTIPLY => 3,
];
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
// Clean up by moving any remaining operators onto the token stack
while (count($operators)) {
$stack[] = array_pop($operators);
}
return $stack;
@asgrim
Order tokens by operator precedence
1 + 2 * 3
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1
+
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2
+
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3 *
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3 * +
+
Output stack
Operator stack
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
NodeBinaryOpAdd (
NodeScalarIntegerValue(1),
NodeBinaryOpMultiply (
NodeScalarIntegerValue(2),
NodeScalarIntegerValue(3)
)
)
@asgrim
Step 3: Executing the AST
@asgrim
Compile & execute AST
private function compileNode(NodeInterface $node)
{
if ($node instanceof NodeBinaryOpAbstractBinaryOp) {
return $this->compileBinaryOp($node);
}
if ($node instanceof NodeScalarIntegerValue) {
return $node->getValue();
}
}
@asgrim
Compile & execute AST
private function compileBinaryOp(NodeBinaryOpAbstractBinaryOp $node)
{
$left = $this->compileNode($node->getLeft());
$right = $this->compileNode($node->getRight());
switch (get_class($node)) {
case NodeBinaryOpAdd::class:
return $left + $right;
case NodeBinaryOpSubtract::class:
return $left - $right;
case NodeBinaryOpMultiply::class:
return $left * $right;
case NodeBinaryOpDivide::class:
return $left / $right;
}
}
@asgrim
What does this mean for me?
@asgrim
AST in userland
@asgrim
php-ast extension
https://github.com/nikic/php-ast
@asgrim
php-ast example usage
<?php
require 'path/to/util.php';
$code = <<<'EOC'
<?php
$var = 42;
EOC;
echo ast_dump(astparse_code($code, $version=35)), "n";
// Output:
AST_STMT_LIST
0: AST_ASSIGN
var: AST_VAR
name: "var"
expr: 42
@asgrim
astkit
https://github.com/sgolemon/astkit
@asgrim
astkit example usage
$if = AstKit::parseString(<<<EOD
if (true) {
echo "This is a triumph.n";
} else {
echo "The cake is a lie.n";
}
EOD
);
$if->execute(); // First run, program is as-seen above
$const = $if->getChild(0)->getChild(0);
// Replace the "true" constant in the condition with false
$const->graft(0, false);
// Can also graft other AstKit nodes, instead of constants
$if->execute(); // Second run now takes the else path
@asgrim
PhpParser
https://github.com/nikic/PHP-Parser
@asgrim
PHP Parser
<?php
use PhpParserParserFactory;
$parser = (new ParserFactory)
->create(ParserFactory::PREFER_PHP7);
print_r($parser->parse(
file_get_contents('ast-demo-src.php')
));
@asgrim
Better Reflection
https://github.com/Roave/BetterReflection
@asgrim
Better Reflection workflow
Reflector
Source
Locator
PhpParser
Reflection
@asgrim
PHP Reflection
$reflection = new ReflectionClass(
MyExampleClass::class
);
$this->assertSame(
'ExampleClass',
$reflection->getShortName()
);
@asgrim
Better Reflection
$reflection = ReflectionClass::createFromName(
MyExampleClass::class
);
$this->assertSame(
'ExampleClass',
$reflection->getShortName()
);
@asgrim
ReflectionClass::createFromName()
// In ReflectionClass :
public static function createFromName($className)
{
return ClassReflector::buildDefaultReflector()->reflect($className);
}
@asgrim
ClassReflector::buildDefaultReflector()
// In ClassReflector :
public static function buildDefaultReflector()
{
return new self(new AggregateSourceLocator([
new PhpInternalSourceLocator(),
new EvaledCodeSourceLocator(),
new AutoloadSourceLocator(),
]));
}
@asgrim
Given a class structure...
<?php
class Foo
{
private $bar;
public function thing()
{
}
}
@asgrim
… we get the AST!
Class, name Foo
|-- Statements
| |-- Property, name bar
| | |-- Type [private]
| | `-- Attributes [start line: 7, end line: 9]
| `-- Method, name thing
| |-- Type [public]
| |-- Parameters [...]
| |-- Statements [...]
| `-- Attributes [start line: 7, end line: 9]
`-- Attributes [start line: 3, end line: 10]
@asgrim
What can I use Better Reflection for?
@asgrim
Monkey patching example
class MyClass
{
public function foo()
{
return 5;
}
}
@asgrim
Monkey patching example
use RoaveBetterReflectionReflectorClassReflector;
use RoaveBetterReflectionSourceLocatorTypeSingleFileSourceLocator;
use RoaveBetterReflectionUtilAutoloadClassLoader;
use RoaveBetterReflectionUtilAutoloadClassLoaderMethodFileCacheLoader;
$loader = new ClassLoader(FileCacheLoader::defaultFileCacheLoader(__DIR__));
// Create the reflection first (without loading)
$classInfo = (new ClassReflector(
new SingleFileSourceLocator(__DIR__ . '/MyClass.php')
))->reflect('MyClass');
$loader->addClass($classInfo);
@asgrim
Monkey patching example
use RoaveBetterReflectionReflectorClassReflector;
use RoaveBetterReflectionSourceLocatorTypeSingleFileSourceLocator;
use RoaveBetterReflectionUtilAutoloadClassLoader;
use RoaveBetterReflectionUtilAutoloadClassLoaderMethodFileCacheLoader;
$loader = new ClassLoader(FileCacheLoader::defaultFileCacheLoader(__DIR__));
// Create the reflection first (without loading)
$classInfo = (new ClassReflector(
new SingleFileSourceLocator(__DIR__ . '/MyClass.php')
))->reflect('MyClass');
$loader->addClass($classInfo);
@asgrim
Monkey patching example
// Override the body...!
$classInfo->getMethod('foo')->setBodyFromClosure(
function () {
return 4;
}
);
$c = new MyClass();
echo $c->foo() . "n"; // should be 4...!?!??
@asgrim
Monkey patching example
// Override the body...!
$classInfo->getMethod('foo')->setBodyFromClosure(
function () {
return 4;
}
);
$c = new MyClass();
echo $c->foo() . "n"; // returns 4
@asgrim
To summarise
● For PHP engine:
○ AST is an efficient data structure to represent code
○ AST means faster compilation (ignoring opcache)
○ Separation in PHP engine for parser and compiler
○ https://wiki.php.net/rfc/abstract_syntax_tree
● Concepts can be used in userland
○ PHP Parser library - https://github.com/nikic/php-parser
○ Better Reflection - https://github.com/Roave/BetterReflection
■ Reflect on not-yet-loaded files
■ Monkey patching in userland code (!)
○ Static analysis opportunities
■ Better Reflection
■ Exakat static analysis (uses own AST)
■ Phan (uses php-ext)
Any questions?
James Titcumb
@asgrim

More Related Content

What's hot (20)

PDF
Climbing the Abstract Syntax Tree (Southeast PHP 2018)
James Titcumb
 
PDF
Climbing the Abstract Syntax Tree (Midwest PHP 2020)
James Titcumb
 
PDF
Climbing the Abstract Syntax Tree (PHP UK 2018)
James Titcumb
 
PDF
Climbing the Abstract Syntax Tree (php[world] 2019)
James Titcumb
 
PDF
Interpret this... (PHPem 2016)
James Titcumb
 
PPT
Class 4 - PHP Arrays
Ahmed Swilam
 
PDF
Sorting arrays in PHP
Vineet Kumar Saini
 
PDF
Barcelona.pm Curs1211 sess01
Javier Arturo Rodríguez
 
PDF
Dades i operadors
Alex Muntada Duran
 
PDF
What's New in Perl? v5.10 - v5.16
Ricardo Signes
 
PDF
Arrays in PHP
Vineet Kumar Saini
 
PPTX
Php 2
vivlinux
 
PPT
Php Using Arrays
mussawir20
 
PDF
Php array
Nikul Shah
 
PDF
OSDC.TW - Gutscript for PHP haters
Lin Yo-An
 
PPT
Arrays in PHP
Compare Infobase Limited
 
PDF
Perl6 one-liners
Andrew Shitov
 
PPTX
07 php
CBRIARCSC
 
TXT
Bouncingballs sh
Ben Pope
 
PDF
Perl Bag of Tricks - Baltimore Perl mongers
brian d foy
 
Climbing the Abstract Syntax Tree (Southeast PHP 2018)
James Titcumb
 
Climbing the Abstract Syntax Tree (Midwest PHP 2020)
James Titcumb
 
Climbing the Abstract Syntax Tree (PHP UK 2018)
James Titcumb
 
Climbing the Abstract Syntax Tree (php[world] 2019)
James Titcumb
 
Interpret this... (PHPem 2016)
James Titcumb
 
Class 4 - PHP Arrays
Ahmed Swilam
 
Sorting arrays in PHP
Vineet Kumar Saini
 
Barcelona.pm Curs1211 sess01
Javier Arturo Rodríguez
 
Dades i operadors
Alex Muntada Duran
 
What's New in Perl? v5.10 - v5.16
Ricardo Signes
 
Arrays in PHP
Vineet Kumar Saini
 
Php 2
vivlinux
 
Php Using Arrays
mussawir20
 
Php array
Nikul Shah
 
OSDC.TW - Gutscript for PHP haters
Lin Yo-An
 
Perl6 one-liners
Andrew Shitov
 
07 php
CBRIARCSC
 
Bouncingballs sh
Ben Pope
 
Perl Bag of Tricks - Baltimore Perl mongers
brian d foy
 

Similar to Climbing the Abstract Syntax Tree (CodeiD PHP Odessa 2017) (20)

TXT
Pop3ck sh
Ben Pope
 
PPTX
Php functions
JIGAR MAKHIJA
 
ZIP
Round PEG, Round Hole - Parsing Functionally
Sean Cribbs
 
PPTX
Perl6 a whistle stop tour
Simon Proctor
 
PDF
Perl6 a whistle stop tour
Simon Proctor
 
PDF
... now write an interpreter (PHPem 2016)
James Titcumb
 
TXT
Gta v savegame
hozayfa999
 
PDF
How to write code you won't hate tomorrow
Pete McFarlane
 
PDF
Top 10 php classic traps
Damien Seguy
 
KEY
Achieving Parsing Sanity In Erlang
Sean Cribbs
 
KEY
Hidden treasures of Ruby
Tom Crinson
 
PDF
The Perl6 Type System
abrummett
 
PDF
Hacking Parse.y with ujihisa
ujihisa
 
PDF
Perl6 grammars
Andrew Shitov
 
TXT
Tgh.pl
iskabom
 
PDF
Good Evils In Perl
Kang-min Liu
 
PDF
Symfony2 - extending the console component
Hugo Hamon
 
PDF
Hacking parse.y (RubyKansai38)
ujihisa
 
PDF
PHP for Adults: Clean Code and Object Calisthenics
Guilherme Blanco
 
PDF
Create Custom Post Type Plugin
Jan Wilson
 
Pop3ck sh
Ben Pope
 
Php functions
JIGAR MAKHIJA
 
Round PEG, Round Hole - Parsing Functionally
Sean Cribbs
 
Perl6 a whistle stop tour
Simon Proctor
 
Perl6 a whistle stop tour
Simon Proctor
 
... now write an interpreter (PHPem 2016)
James Titcumb
 
Gta v savegame
hozayfa999
 
How to write code you won't hate tomorrow
Pete McFarlane
 
Top 10 php classic traps
Damien Seguy
 
Achieving Parsing Sanity In Erlang
Sean Cribbs
 
Hidden treasures of Ruby
Tom Crinson
 
The Perl6 Type System
abrummett
 
Hacking Parse.y with ujihisa
ujihisa
 
Perl6 grammars
Andrew Shitov
 
Tgh.pl
iskabom
 
Good Evils In Perl
Kang-min Liu
 
Symfony2 - extending the console component
Hugo Hamon
 
Hacking parse.y (RubyKansai38)
ujihisa
 
PHP for Adults: Clean Code and Object Calisthenics
Guilherme Blanco
 
Create Custom Post Type Plugin
Jan Wilson
 
Ad

More from James Titcumb (20)

PDF
Living the Best Life on a Legacy Project (phpday 2022).pdf
James Titcumb
 
PDF
Tips for Tackling a Legacy Codebase (ScotlandPHP 2021)
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps (Bulgaria 2019)
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps (php[world] 2019)
James Titcumb
 
PDF
Crafting Quality PHP Applications (PHP Joburg Oct 2019)
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps - PHP UK 2019
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps (ScotlandPHP 2018)
James Titcumb
 
PDF
Kicking off with Zend Expressive and Doctrine ORM (PHP South Africa 2018)
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps (PHP South Africa 2018)
James Titcumb
 
PDF
Crafting Quality PHP Applications (PHPkonf 2018)
James Titcumb
 
PDF
Best practices for crafting high quality PHP apps (PHP Yorkshire 2018)
James Titcumb
 
PDF
Crafting Quality PHP Applications: an overview (PHPSW March 2018)
James Titcumb
 
PDF
Kicking off with Zend Expressive and Doctrine ORM (PHP MiNDS March 2018)
James Titcumb
 
PDF
Crafting Quality PHP Applications (PHP Benelux 2018)
James Titcumb
 
PDF
Crafting Quality PHP Applications (ConFoo YVR 2017)
James Titcumb
 
PDF
Dip Your Toes in the Sea of Security (ConFoo YVR 2017)
James Titcumb
 
PDF
Kicking off with Zend Expressive and Doctrine ORM (ConFoo YVR 2017)
James Titcumb
 
PDF
Dip Your Toes in the Sea of Security (IPC Fall 2017)
James Titcumb
 
PDF
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
James Titcumb
 
PDF
Climbing the Abstract Syntax Tree (PHP South Africa 2017)
James Titcumb
 
Living the Best Life on a Legacy Project (phpday 2022).pdf
James Titcumb
 
Tips for Tackling a Legacy Codebase (ScotlandPHP 2021)
James Titcumb
 
Best practices for crafting high quality PHP apps (Bulgaria 2019)
James Titcumb
 
Best practices for crafting high quality PHP apps (php[world] 2019)
James Titcumb
 
Crafting Quality PHP Applications (PHP Joburg Oct 2019)
James Titcumb
 
Best practices for crafting high quality PHP apps - PHP UK 2019
James Titcumb
 
Best practices for crafting high quality PHP apps (ScotlandPHP 2018)
James Titcumb
 
Kicking off with Zend Expressive and Doctrine ORM (PHP South Africa 2018)
James Titcumb
 
Best practices for crafting high quality PHP apps (PHP South Africa 2018)
James Titcumb
 
Crafting Quality PHP Applications (PHPkonf 2018)
James Titcumb
 
Best practices for crafting high quality PHP apps (PHP Yorkshire 2018)
James Titcumb
 
Crafting Quality PHP Applications: an overview (PHPSW March 2018)
James Titcumb
 
Kicking off with Zend Expressive and Doctrine ORM (PHP MiNDS March 2018)
James Titcumb
 
Crafting Quality PHP Applications (PHP Benelux 2018)
James Titcumb
 
Crafting Quality PHP Applications (ConFoo YVR 2017)
James Titcumb
 
Dip Your Toes in the Sea of Security (ConFoo YVR 2017)
James Titcumb
 
Kicking off with Zend Expressive and Doctrine ORM (ConFoo YVR 2017)
James Titcumb
 
Dip Your Toes in the Sea of Security (IPC Fall 2017)
James Titcumb
 
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
James Titcumb
 
Climbing the Abstract Syntax Tree (PHP South Africa 2017)
James Titcumb
 
Ad

Recently uploaded (20)

PDF
Transcript: New from BookNet Canada for 2025: BNC BiblioShare - Tech Forum 2025
BookNet Canada
 
PPTX
Q2 FY26 Tableau User Group Leader Quarterly Call
lward7
 
PDF
Bitcoin for Millennials podcast with Bram, Power Laws of Bitcoin
Stephen Perrenod
 
DOCX
Cryptography Quiz: test your knowledge of this important security concept.
Rajni Bhardwaj Grover
 
PDF
Using FME to Develop Self-Service CAD Applications for a Major UK Police Force
Safe Software
 
PDF
Biography of Daniel Podor.pdf
Daniel Podor
 
PDF
Smart Trailers 2025 Update with History and Overview
Paul Menig
 
PDF
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
PDF
July Patch Tuesday
Ivanti
 
PDF
Newgen 2022-Forrester Newgen TEI_13 05 2022-The-Total-Economic-Impact-Newgen-...
darshakparmar
 
PDF
Advancing WebDriver BiDi support in WebKit
Igalia
 
PDF
POV_ Why Enterprises Need to Find Value in ZERO.pdf
darshakparmar
 
PDF
Reverse Engineering of Security Products: Developing an Advanced Microsoft De...
nwbxhhcyjv
 
PPTX
The Project Compass - GDG on Campus MSIT
dscmsitkol
 
PDF
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
PPTX
From Sci-Fi to Reality: Exploring AI Evolution
Svetlana Meissner
 
PPTX
OpenID AuthZEN - Analyst Briefing July 2025
David Brossard
 
PDF
Newgen Beyond Frankenstein_Build vs Buy_Digital_version.pdf
darshakparmar
 
PPTX
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
DOCX
Python coding for beginners !! Start now!#
Rajni Bhardwaj Grover
 
Transcript: New from BookNet Canada for 2025: BNC BiblioShare - Tech Forum 2025
BookNet Canada
 
Q2 FY26 Tableau User Group Leader Quarterly Call
lward7
 
Bitcoin for Millennials podcast with Bram, Power Laws of Bitcoin
Stephen Perrenod
 
Cryptography Quiz: test your knowledge of this important security concept.
Rajni Bhardwaj Grover
 
Using FME to Develop Self-Service CAD Applications for a Major UK Police Force
Safe Software
 
Biography of Daniel Podor.pdf
Daniel Podor
 
Smart Trailers 2025 Update with History and Overview
Paul Menig
 
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
July Patch Tuesday
Ivanti
 
Newgen 2022-Forrester Newgen TEI_13 05 2022-The-Total-Economic-Impact-Newgen-...
darshakparmar
 
Advancing WebDriver BiDi support in WebKit
Igalia
 
POV_ Why Enterprises Need to Find Value in ZERO.pdf
darshakparmar
 
Reverse Engineering of Security Products: Developing an Advanced Microsoft De...
nwbxhhcyjv
 
The Project Compass - GDG on Campus MSIT
dscmsitkol
 
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
From Sci-Fi to Reality: Exploring AI Evolution
Svetlana Meissner
 
OpenID AuthZEN - Analyst Briefing July 2025
David Brossard
 
Newgen Beyond Frankenstein_Build vs Buy_Digital_version.pdf
darshakparmar
 
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
Python coding for beginners !! Start now!#
Rajni Bhardwaj Grover
 

Climbing the Abstract Syntax Tree (CodeiD PHP Odessa 2017)