Parsing, Compiling and Static Metaprogramming Patrick Dubroy Google Munich @dubroy
Writing programs that manipulate programs as data Metaprogramming
Static Metaprogramming Writing programs that manipulate code
COMPILER
COMPILER Code Code
COMPILER C++ 1011
C++ 1011 PARSER CODEGEN Parse Tree
Program FunctionDeclaration Identifier Body ReturnStatement Literal “42”
{ "type": "Program", "body": [ { "type": "FunctionDeclaration", "id": {
"type": "Identifier", "name": "getAnswer" }, "params": [], "defaults": [], "body": { "type": "BlockStatement", "body": [ { "type": "ReturnStatement", "argument": { "type": "Literal", "value": 42, "raw": "42" } } ] }, } ] }
function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var
errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var
errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } var ast = esprima.parse(code, parseOptions);
function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var
errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } });
function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var
errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var
errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } var ast = esprima.parse(code);
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } });
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body
FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement
FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } return escodegen.generate(ast);
function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:
function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }
addLogging(" \ function foo(a, b) { \ var x =
'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);
addLogging(" \ function foo(a, b) { \ var x =
'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);
Parser Generators
PARSER GENERATOR Language Grammar Parser
Formal Grammars
var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr
= expr [-+] term / term \ term = term [*/] factor / factor \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10'); ~/node_modules/pegjs/lib/peg.js:3316 throw new PEG.GrammarError( ^ PEG.GrammarError: Left recursion detected for rule "expr".
An AltJS Language in 5 minutes
expr = term ([-+] term)*
expr = term ([-+] term)* / decl
expr = term ([-+] term)* / decl decl = ident
' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)* > parser.parse('x := 2+5. y := 3') [[["x"]," := ",[["2",[]],[["+",["5",[]]]]]],[[".",[], [["y"]," := ",[["3",[]],[]]]]]]
program = expr? ('.' [ \\n]* expr)* expr = term
([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; }
program = expr? ('.' [ \\n]* expr)* expr = term
([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
program = expr? ('.' [ \\n]* expr)* expr = term
([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2,,+,5,;",[[".",[],"var y = 3,,;"]]]
program = expr? ('.' [ \\n]* expr)* expr = t:term
rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
program = expr? ('.' [ \\n]* expr)* expr = t:term
rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
program = expr? ('.' [ \\n]* expr)* expr = t:term
rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2+5;",[[".",[],"var y = 3;"]]]
program = expr? ('.' [ \\n]* expr)* expr = t:term
rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*
{ return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]
program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*
{ return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')
program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*
{ return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') var x = 2+5; var y = 3;
Resources github.com/pdubroy/jsconfeu-talk Slides: goo.gl/qs4Gna Esprima: esprima.org PEG.js: pegjs.majda.cz