"use strict"; // https://www.ics.uci.edu/~pattis/ICS-33/lectures/ebnf.pdf Object.defineProperty(exports, "__esModule", { value: true }); exports.Parser = exports.findRuleByName = exports.parseRuleName = exports.escapeRegExp = exports.readToken = void 0; const UPPER_SNAKE_RE = /^[A-Z0-9_]+$/; const decorationRE = /(\?|\+|\*)$/; const preDecorationRE = /^(@|&|!)/; const WS_RULE = 'WS'; const TokenError_1 = require("./TokenError"); function readToken(txt, expr) { let result = expr.exec(txt); if (result && result.index == 0) { if (result[0].length == 0 && expr.source.length > 0) return null; return { type: null, text: result[0], rest: txt.substr(result[0].length), start: 0, end: result[0].length - 1, fullText: result[0], errors: [], children: [], parent: null }; } return null; } exports.readToken = readToken; function escapeRegExp(str) { return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&'); } exports.escapeRegExp = escapeRegExp; function fixRest(token) { token.rest = ''; token.children && token.children.forEach(c => fixRest(c)); } function fixPositions(token, start) { token.start += start; token.end += start; token.children && token.children.forEach(c => fixPositions(c, token.start)); } function agregateErrors(errors, token) { if (token.errors && token.errors.length) token.errors.forEach(err => errors.push(err)); token.children && token.children.forEach(tok => agregateErrors(errors, tok)); } function parseRuleName(name) { let postDecoration = decorationRE.exec(name); let preDecoration = preDecorationRE.exec(name); let postDecorationText = (postDecoration && postDecoration[0]) || ''; let preDecorationText = (preDecoration && preDecoration[0]) || ''; let out = { raw: name, name: name.replace(decorationRE, '').replace(preDecorationRE, ''), isOptional: postDecorationText == '?' || postDecorationText == '*', allowRepetition: postDecorationText == '+' || postDecorationText == '*', atLeastOne: postDecorationText == '+', lookupPositive: preDecorationText == '&', lookupNegative: preDecorationText == '!', pinned: preDecorationText == '@', lookup: false, isLiteral: false }; out.isLiteral = out.name[0] == "'" || out.name[0] == '"'; out.lookup = out.lookupNegative || out.lookupPositive; return out; } exports.parseRuleName = parseRuleName; function findRuleByName(name, parser) { let parsed = parseRuleName(name); return parser.cachedRules[parsed.name] || null; } exports.findRuleByName = findRuleByName; /// Removes all the nodes starting with 'RULE_' function stripRules(token, re) { if (token.children) { let localRules = token.children.filter(x => x.type && re.test(x.type)); for (let i = 0; i < localRules.length; i++) { let indexOnChildren = token.children.indexOf(localRules[i]); if (indexOnChildren != -1) { token.children.splice(indexOnChildren, 1); } } token.children.forEach(c => stripRules(c, re)); } } const ignoreMissingRules = ['EOF']; class Parser { constructor(grammarRules, options) { this.grammarRules = grammarRules; this.options = options; this.cachedRules = {}; this.debug = options ? options.debug === true : false; let errors = []; let neededRules = []; grammarRules.forEach(rule => { let parsedName = parseRuleName(rule.name); if (parsedName.name in this.cachedRules) { errors.push('Duplicated rule ' + parsedName.name); return; } else { this.cachedRules[parsedName.name] = rule; } if (!rule.bnf || !rule.bnf.length) { let error = 'Missing rule content, rule: ' + rule.name; if (errors.indexOf(error) == -1) errors.push(error); } else { rule.bnf.forEach(options => { if (typeof options[0] === 'string') { let parsed = parseRuleName(options[0]); if (parsed.name == rule.name) { let error = 'Left recursion is not allowed, rule: ' + rule.name; if (errors.indexOf(error) == -1) errors.push(error); } } options.forEach(option => { if (typeof option == 'string') { let name = parseRuleName(option); if (!name.isLiteral && neededRules.indexOf(name.name) == -1 && ignoreMissingRules.indexOf(name.name) == -1) neededRules.push(name.name); } }); }); } if (WS_RULE == rule.name) rule.implicitWs = false; if (rule.implicitWs) { if (neededRules.indexOf(WS_RULE) == -1) neededRules.push(WS_RULE); } if (rule.recover) { if (neededRules.indexOf(rule.recover) == -1) neededRules.push(rule.recover); } }); neededRules.forEach(ruleName => { if (!(ruleName in this.cachedRules)) { errors.push('Missing rule ' + ruleName); } }); if (errors.length) throw new Error(errors.join('\n')); } getAST(txt, target) { if (!target) { target = this.grammarRules.filter(x => !x.fragment && x.name.indexOf('%') != 0)[0].name; } let result = this.parse(txt, target); if (result) { agregateErrors(result.errors, result); fixPositions(result, 0); // REMOVE ALL THE TAGS MATCHING /^%/ stripRules(result, /^%/); if (!this.options || !this.options.keepUpperRules) stripRules(result, UPPER_SNAKE_RE); let rest = result.rest; if (rest) { new TokenError_1.TokenError('Unexpected end of input: \n' + rest, result); } fixRest(result); result.rest = rest; } return result; } emitSource() { return 'CANNOT EMIT SOURCE FROM BASE Parser'; } parse(txt, target, recursion = 0) { let out = null; let type = parseRuleName(target); let expr; let printable = this.debug && /*!isLiteral &*/ !UPPER_SNAKE_RE.test(type.name); printable && console.log(new Array(recursion).join('│ ') + 'Trying to get ' + target + ' from ' + JSON.stringify(txt.split('\n')[0])); let realType = type.name; let targetLex = findRuleByName(type.name, this); if (type.name == 'EOF') { if (txt.length) { return null; } else if (txt.length == 0) { return { type: 'EOF', text: '', rest: '', start: 0, end: 0, fullText: '', errors: [], children: [], parent: null }; } } try { if (!targetLex && type.isLiteral) { let src = type.name.trim(); if (src.startsWith('"')) { src = JSON.parse(src); } else if (src.startsWith("'")) { src = src.replace(/^'(.+)'$/, '$1').replace(/\\'/g, "'"); } if (src === '') { return { type: '%%EMPTY%%', text: '', rest: txt, start: 0, end: 0, fullText: '', errors: [], children: [], parent: null }; } expr = new RegExp(escapeRegExp(src)); realType = null; } } catch (e) { if (e instanceof ReferenceError) { console.error(e); } return null; } if (expr) { let result = readToken(txt, expr); if (result) { result.type = realType; return result; } } else { let options = targetLex.bnf; if (options instanceof Array) { options.forEach(phases => { if (out) return; let pinned = null; let tmp = { type: type.name, text: '', children: [], end: 0, errors: [], fullText: '', parent: null, start: 0, rest: txt }; if (targetLex.fragment) tmp.fragment = true; let tmpTxt = txt; let position = 0; let allOptional = phases.length > 0; let foundSomething = false; for (let i = 0; i < phases.length; i++) { if (typeof phases[i] == 'string') { let localTarget = parseRuleName(phases[i]); allOptional = allOptional && localTarget.isOptional; let got; let foundAtLeastOne = false; do { got = null; if (targetLex.implicitWs) { got = this.parse(tmpTxt, localTarget.name, recursion + 1); if (!got) { let WS; do { WS = this.parse(tmpTxt, WS_RULE, recursion + 1); if (WS) { tmp.text = tmp.text + WS.text; tmp.end = tmp.text.length; WS.parent = tmp; tmp.children.push(WS); tmpTxt = tmpTxt.substr(WS.text.length); position += WS.text.length; } else { break; } } while (WS && WS.text.length); } } got = got || this.parse(tmpTxt, localTarget.name, recursion + 1); // rule ::= "true" ![a-zA-Z] // negative lookup, if it does not match, we should continue if (localTarget.lookupNegative) { if (got) return /* cancel this path */; break; } if (localTarget.lookupPositive) { if (!got) return; } if (!got) { if (localTarget.isOptional) break; if (localTarget.atLeastOne && foundAtLeastOne) break; } if (got && targetLex.pinned == i + 1) { pinned = got; printable && console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' PINNED'); } if (!got) got = this.parseRecovery(targetLex, tmpTxt, recursion + 1); if (!got) { if (pinned) { out = tmp; got = { type: 'SyntaxError', text: tmpTxt, children: [], end: tmpTxt.length, errors: [], fullText: '', parent: null, start: 0, rest: '' }; if (tmpTxt.length) { new TokenError_1.TokenError(`Unexpected end of input. Expecting ${localTarget.name} Got: ${tmpTxt}`, got); } else { new TokenError_1.TokenError(`Unexpected end of input. Missing ${localTarget.name}`, got); } printable && console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text)); } else { return; } } foundAtLeastOne = true; foundSomething = true; if (got.type == '%%EMPTY%%') { break; } got.start += position; got.end += position; if (!localTarget.lookupPositive && got.type) { if (got.fragment) { got.children && got.children.forEach(x => { x.start += position; x.end += position; x.parent = tmp; tmp.children.push(x); }); } else { got.parent = tmp; tmp.children.push(got); } } if (localTarget.lookup) got.lookup = true; printable && console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text)); // Eat it from the input stream, only if it is not a lookup if (!localTarget.lookup && !got.lookup) { tmp.text = tmp.text + got.text; tmp.end = tmp.text.length; tmpTxt = tmpTxt.substr(got.text.length); position += got.text.length; } tmp.rest = tmpTxt; } while (got && localTarget.allowRepetition && tmpTxt.length && !got.lookup); } /* IS A REGEXP */ else { let got = readToken(tmpTxt, phases[i]); if (!got) { return; } printable && console.log(new Array(recursion + 1).join('│ ') + '└> ' + JSON.stringify(got.text) + phases[i].source); foundSomething = true; got.start += position; got.end += position; tmp.text = tmp.text + got.text; tmp.end = tmp.text.length; tmpTxt = tmpTxt.substr(got.text.length); position += got.text.length; tmp.rest = tmpTxt; } } if (foundSomething) { out = tmp; printable && console.log(new Array(recursion).join('│ ') + '├<─┴< PUSHING ' + out.type + ' ' + JSON.stringify(out.text)); } }); } if (out && targetLex.simplifyWhenOneChildren && out.children.length == 1) { out = out.children[0]; } } if (!out) { printable && console.log(target + ' NOT RESOLVED FROM ' + txt); } return out; } parseRecovery(recoverableToken, tmpTxt, recursion) { if (recoverableToken.recover && tmpTxt.length) { let printable = this.debug; printable && console.log(new Array(recursion + 1).join('│ ') + 'Trying to recover until token ' + recoverableToken.recover + ' from ' + JSON.stringify(tmpTxt.split('\n')[0] + tmpTxt.split('\n')[1])); let tmp = { type: 'SyntaxError', text: '', children: [], end: 0, errors: [], fullText: '', parent: null, start: 0, rest: '' }; let got; do { got = this.parse(tmpTxt, recoverableToken.recover, recursion + 1); if (got) { new TokenError_1.TokenError('Unexpected input: "' + tmp.text + `" Expecting: ${recoverableToken.name}`, tmp); break; } else { tmp.text = tmp.text + tmpTxt[0]; tmp.end = tmp.text.length; tmpTxt = tmpTxt.substr(1); } } while (!got && tmpTxt.length > 0); if (tmp.text.length > 0 && got) { printable && console.log(new Array(recursion + 1).join('│ ') + 'Recovered text: ' + JSON.stringify(tmp.text)); return tmp; } } return null; } } exports.Parser = Parser; exports.default = Parser; //# sourceMappingURL=Parser.js.map