Parsing, Compiling, and Static Metaprogramming

9 min read Original article ↗
  • Parsing, Compiling and Static Metaprogramming Patrick Dubroy Google Munich @dubroy

  • Writing programs that manipulate programs as data Metaprogramming

  • Static Metaprogramming Writing programs that manipulate code

  • COMPILER

  • COMPILER Code Code

  • COMPILER C++ 1011

  • C++ 1011 PARSER CODEGEN Parse Tree

  • Program FunctionDeclaration Identifier Body ReturnStatement Literal “42”

  • { "type": "Program", "body": [ { "type": "FunctionDeclaration", "id": {

    "type": "Identifier", "name": "getAnswer" }, "params": [], "defaults": [], "body": { "type": "BlockStatement", "body": [ { "type": "ReturnStatement", "argument": { "type": "Literal", "value": 42, "raw": "42" } } ] }, } ] }

  • function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }

  • function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } var ast = esprima.parse(code, parseOptions);

  • function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } });

  • function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }

  • function checkStyle(code, filename) { var ast = esprima.parse(code, parseOptions); var

    errors = []; estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'VariableDeclaration') checkVariableNames(node, errors); } }); return formatErrors(code, errors, filename); } function checkVariableNames(node, errors) { _.each(node.declarations, function(decl) { if (decl.id.name.indexOf('_') >= 0) { return errors.push({ location: decl.loc, message: 'Use camelCase, not hacker_style!' }); } }); }

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } var ast = esprima.parse(code);

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } estraverse.traverse(ast, { enter: function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } });

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }

  • FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body

  • FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement

  • FunctionDeclaration Identifier BlockStatement Statement Statement ... Array .body .body Statement

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); } return escodegen.generate(ast);

  • function addLogging(code) { var ast = esprima.parse(code); estraverse.traverse(ast, { enter:

    function(node, parent) { if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { addBeforeCode(node); } } }); return escodegen.generate(ast); } function addBeforeCode(node) { var name = node.id ? node.id.name : '<anonymous function>'; var beforeCode = "console.log('Entering " + name + "()');"; var beforeNodes = esprima.parse(beforeCode).body; node.body.body = beforeNodes.concat(node.body.body); }

  • addLogging(" \ function foo(a, b) { \ var x =

    'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);

  • addLogging(" \ function foo(a, b) { \ var x =

    'blah'; \ var y = (function () { \ return 3; \ })(); \ } \ foo(1, 'wut', 3); \ "); function foo(a, b) { console.log('Entering foo()'); var x = 'blah'; var y = function () { console.log('Entering <anonymous function>()'); return 3; }(); } foo(1, 'wut', 3);

  • Parser Generators

  • PARSER GENERATOR Language Grammar Parser

  • Formal Grammars

  • var PEG = require('pegjs'); var parser = PEG.buildParser(" \ expr

    = expr [-+] term / term \ term = term [*/] factor / factor \ factor = '(' expr ')' / number \ number = [0-9]+ \ "); parser.parse('1+10'); ~/node_modules/pegjs/lib/peg.js:3316 throw new PEG.GrammarError( ^ PEG.GrammarError: Left recursion detected for rule "expr".

  • An AltJS Language in 5 minutes

  • expr = term ([-+] term)*

  • expr = term ([-+] term)* / decl

  • expr = term ([-+] term)* / decl decl = ident

    ' := ' expr ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] program = expr? ('.' [ \\n]* expr)* > parser.parse('x := 2+5. y := 3') [[["x"]," := ",[["2",[]],[["+",["5",[]]]]]],[[".",[], [["y"]," := ",[["3",[]],[]]]]]]

  • program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; }

  • program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')

  • program = expr? ('.' [ \\n]* expr)* expr = term

    ([-+] term)* / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2,,+,5,;",[[".",[],"var y = 3,,;"]]]

  • program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]

  • program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')

  • program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') ["var x = 2+5;",[[".",[],"var y = 3;"]]]

  • program = expr? ('.' [ \\n]* expr)* expr = t:term

    rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]

  • program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z]

  • program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3')

  • program = e:expr? rest:(('.' [\\n ]* e:expr){ return e; })*

    { return [e].concat(rest).join('\n'); } expr = t:term rest:([-+] term)* { return flatten(t.concat(rest)); } / decl decl = id:ident ' := ' e:expr { return 'var ' + id + ' = ' + e.join('') + ';'; } ident = (digit / letter / '_')+ digit = [0-9] letter = [a-zA-Z] > parser.parse('x := 2+5. y := 3') var x = 2+5; var y = 3;

  • Resources github.com/pdubroy/jsconfeu-talk Slides: goo.gl/qs4Gna Esprima: esprima.org PEG.js: pegjs.majda.cz

  • Danke!