A question about string matching #12

CMCDragonkai · 2016-01-15T04:05:01Z

I am very confused by this behaviour. Here's a simple lexer:

module.exports = Lexer;

// This is a general lexer, which takes a source string, and returns an array of tokens.
// lexers must prefer to match the longest match first before trying the shorter match
// this is true for every production rule here
// it appears that `""` cannot be used here, not sure why
// abbreviations:
// t => token
// c => character
// l => letter
// u => uppercase
// *l => * list
ometa Lexer {

    // list of tokens
    lexer = token*:ts ^end -> ts,

    // token object
    token = ^spaces 
            ( reservedWords 
            | reservedSymbols 
            | primitiveTypes 
            | constant 
            | id 
            ):t 
            ^spaces
        -> t,

    // words that have been reserved
    reservedWords = ( let 
                    | return 
                    | ifControl 
                    | thenControl 
                    | elseControl 
                    ):t -> t,

    // symbols usually referring to operators, or subexpression or substatements
    reservedSymbols = ( relOp 
                      | assignOp 
                      | boolOp 
                      | para  
                      | scope 
                      | statementEnd  
                      ):t -> t, 

    // primitive types in the language
    primitiveTypes = ( booleans 
                     | number 
                     | string
                     ):t -> t,

    para = '(' -> (new ParaOpenT())
         | ')' -> (new ParaCloseT()),

    scope = '{' -> (new ScopeOpenT()) 
          | '}' -> (new ScopeCloseT()),

    let = ``let'' -> (new LetT()),
    assignOp = ``='' -> (new AssignT()),

    return = ``return'' -> (new ReturnT()),

    statementEnd = ';' -> (new StatementEndT()),

    relOp = ``<='' -> (new RelOpT('LTEQ')) 
          | ``<'' -> (new RelOpT('LT'))
          | ``>='' -> (new RelOpT('GTEQ'))
          | ``>'' -> (new RelOpT('GT'))
          | ``=='' -> (new RelOpT('EQ')),

    boolOp = ``&&'' -> (new BoolOpT('AND')) 
           | ``||'' -> (new BoolOpT('OR'))
           | ``!'' -> (new BoolOpT('NOT')),

    booleans = ``true'' -> (new TrueT())
         | ``false'' -> (new FalseT()),

    digit  = ^digit:dv -> parseInt(dv), 
    number = number:nt digit:dv -> (new NumberT(nt.value * 10 + dv)) 
           | digit:dv           -> (new NumberT(dv)), 

    constantBody = ^upper:u -> u 
                 | ^digit:dv -> ("" + dv),
    constant = ^upper:u constantBody*:ucl -> (new ConstantT (u + ucl.join(''))),

    idBody = ^letter:l -> l  
           | ^digit:dv -> ("" + dv), 
    id = ^letter:l idBody*:cl 
        -> (new IdT (l + cl.join(''))), 

    string = '"' (~'"' ^anything)*:cl '"' 
        -> (new StringT (cl.length == 0 ? "" : cl.join(''))), 

    ifControl = ``if'' -> (new IfT()),

    thenControl = ``then'' -> (new ThenT()),

    elseControl = ``else'' -> (new ElseT()),

    END

}

function LetT () {};

function AssignT () {};

function ReturnT () {};

function TrueT () {};

function FalseT () {};

function RelOpT (op) {
    this.op = op;
}

function BoolOpT (op) {
    this.op = op;
}

function NumberT (value) {
    this.value = value;
}

function ConstantT (lexeme) {
    this.lexeme = lexeme;
}

function IdT (lexeme) {
    this.lexeme = lexeme;
}

function StringT (string) {
    this.string = string;
}

function IfT () {}

function ThenT () {}

function ElseT () {}

function ParaOpenT () {}

function ParaCloseT () {}

function ScopeOpenT () {}

function ScopeCloseT () {}

function StatementEndT () {}

var main = function () {

    var readline = require('readline');

    var rl = readline.createInterface({
        input  : process.stdin,
        output : process.stdout
    });

    var errorHandler = function (meta, error) {
        console.log('FAILURE!');
        console.log(error.OMeta);
        process.exit(1);
    };

    rl.setPrompt('CODE> ');
    rl.prompt();

    rl.on('line', function (line) {
        var listOfTokens = Lexer.matchAll(line, 'lexer', [], errorHandler);
        console.log('LEXED:');
        console.log(listOfTokens);
        rl.prompt();
    });

    rl.on('close', function () {
        process.exit(0);
    });

};

if (require.main === module) {
    main();
}

Notice how I have to match strings using:

``...''

If instead I try using "...", I end up with:

C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:145
                                throw e;
                                ^

RangeError: Maximum call stack size exceeded
    at Object.OMeta._apply (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:372:13)
    at Object.<anonymous> (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:812:17)
    at lookupFunc (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:640:17)
    at lookup (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:142:12)
    at Object.OMeta._many (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:646:18)
    at Object.OMeta.spaces (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:811:16)
    at Object.OMeta._superApplyWithArgs (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:439:12)
    at Object.OMeta._extend.token (C:\Users\CMCDragonkai\Projects\Matrix-Architect\ometa_examples\control_flow\lexer.run.js:18:19)
    at Object.OMeta._applyWithArgs (C:\Users\CMCDragonkai\.node\node_modules\ometa-js\lib\ometajs\core.js:421:12)
    at Object.OMeta._extend.ifControl (C:\Users\CMCDragonkai\Projects\Matrix-Architect\ometa_examples\control_flow\lexer.run.js:275:18)

It happens at runtime, not compile time. The conversion works, just the execution doesn't.

The text was updated successfully, but these errors were encountered:

Page- · 2016-01-15T12:49:44Z

Ok, so the issue is the "..." is translated into token('...') which by default is spaces seq('...'), whereas ```...''is translated intoseq('...')` which by default is the same as `'.' '.' '.'`.

My guess is that you are getting the maximum call stack exceeded with "..." due to redefining token and ending up with infinite recursion due to it - you can see the default definition for token here: https://github.com/Page-/ometa-js/blob/34e81b4f9b3de05bdfcb4e6e09d55b69a8914d35/lib/ometajs/core.js#L874 in ometajs it would be:

token :cs =
    spaces
    seq(cs)

CMCDragonkai · 2016-01-26T06:15:53Z

I think that makes sense. I'll try changing it later, but maybe there should be some feedback saying that some production rule got redefined. But also, shouldn't the "" translation call the parent object's token like ^token, instead of recursing into my custom token? This seems like a scoping leak, or a failure of modularity (action at a distance).

Also why does token need whitespace ahead of the cs? I would of thought that "..." should be exactly equivalent to '.' '.' '.'.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

A question about string matching #12

A question about string matching #12

CMCDragonkai commented Jan 15, 2016

Page- commented Jan 15, 2016

CMCDragonkai commented Jan 26, 2016

A question about string matching #12

A question about string matching #12

Comments

CMCDragonkai commented Jan 15, 2016

Page- commented Jan 15, 2016

CMCDragonkai commented Jan 26, 2016