botbook/node_modules/ebnf/dist/Parser.js

456 lines
19 KiB
JavaScript
Raw Normal View History

2024-09-04 13:13:15 -03:00
"use strict";
// https://www.ics.uci.edu/~pattis/ICS-33/lectures/ebnf.pdf
Object.defineProperty(exports, "__esModule", { value: true });
exports.Parser = exports.findRuleByName = exports.parseRuleName = exports.escapeRegExp = exports.readToken = void 0;
const UPPER_SNAKE_RE = /^[A-Z0-9_]+$/;
const decorationRE = /(\?|\+|\*)$/;
const preDecorationRE = /^(@|&|!)/;
const WS_RULE = 'WS';
const TokenError_1 = require("./TokenError");
function readToken(txt, expr) {
let result = expr.exec(txt);
if (result && result.index == 0) {
if (result[0].length == 0 && expr.source.length > 0)
return null;
return {
type: null,
text: result[0],
rest: txt.substr(result[0].length),
start: 0,
end: result[0].length - 1,
fullText: result[0],
errors: [],
children: [],
parent: null
};
}
return null;
}
exports.readToken = readToken;
function escapeRegExp(str) {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, '\\$&');
}
exports.escapeRegExp = escapeRegExp;
function fixRest(token) {
token.rest = '';
token.children && token.children.forEach(c => fixRest(c));
}
function fixPositions(token, start) {
token.start += start;
token.end += start;
token.children && token.children.forEach(c => fixPositions(c, token.start));
}
function agregateErrors(errors, token) {
if (token.errors && token.errors.length)
token.errors.forEach(err => errors.push(err));
token.children && token.children.forEach(tok => agregateErrors(errors, tok));
}
function parseRuleName(name) {
let postDecoration = decorationRE.exec(name);
let preDecoration = preDecorationRE.exec(name);
let postDecorationText = (postDecoration && postDecoration[0]) || '';
let preDecorationText = (preDecoration && preDecoration[0]) || '';
let out = {
raw: name,
name: name.replace(decorationRE, '').replace(preDecorationRE, ''),
isOptional: postDecorationText == '?' || postDecorationText == '*',
allowRepetition: postDecorationText == '+' || postDecorationText == '*',
atLeastOne: postDecorationText == '+',
lookupPositive: preDecorationText == '&',
lookupNegative: preDecorationText == '!',
pinned: preDecorationText == '@',
lookup: false,
isLiteral: false
};
out.isLiteral = out.name[0] == "'" || out.name[0] == '"';
out.lookup = out.lookupNegative || out.lookupPositive;
return out;
}
exports.parseRuleName = parseRuleName;
function findRuleByName(name, parser) {
let parsed = parseRuleName(name);
return parser.cachedRules[parsed.name] || null;
}
exports.findRuleByName = findRuleByName;
/// Removes all the nodes starting with 'RULE_'
function stripRules(token, re) {
if (token.children) {
let localRules = token.children.filter(x => x.type && re.test(x.type));
for (let i = 0; i < localRules.length; i++) {
let indexOnChildren = token.children.indexOf(localRules[i]);
if (indexOnChildren != -1) {
token.children.splice(indexOnChildren, 1);
}
}
token.children.forEach(c => stripRules(c, re));
}
}
const ignoreMissingRules = ['EOF'];
class Parser {
constructor(grammarRules, options) {
this.grammarRules = grammarRules;
this.options = options;
this.cachedRules = {};
this.debug = options ? options.debug === true : false;
let errors = [];
let neededRules = [];
grammarRules.forEach(rule => {
let parsedName = parseRuleName(rule.name);
if (parsedName.name in this.cachedRules) {
errors.push('Duplicated rule ' + parsedName.name);
return;
}
else {
this.cachedRules[parsedName.name] = rule;
}
if (!rule.bnf || !rule.bnf.length) {
let error = 'Missing rule content, rule: ' + rule.name;
if (errors.indexOf(error) == -1)
errors.push(error);
}
else {
rule.bnf.forEach(options => {
if (typeof options[0] === 'string') {
let parsed = parseRuleName(options[0]);
if (parsed.name == rule.name) {
let error = 'Left recursion is not allowed, rule: ' + rule.name;
if (errors.indexOf(error) == -1)
errors.push(error);
}
}
options.forEach(option => {
if (typeof option == 'string') {
let name = parseRuleName(option);
if (!name.isLiteral &&
neededRules.indexOf(name.name) == -1 &&
ignoreMissingRules.indexOf(name.name) == -1)
neededRules.push(name.name);
}
});
});
}
if (WS_RULE == rule.name)
rule.implicitWs = false;
if (rule.implicitWs) {
if (neededRules.indexOf(WS_RULE) == -1)
neededRules.push(WS_RULE);
}
if (rule.recover) {
if (neededRules.indexOf(rule.recover) == -1)
neededRules.push(rule.recover);
}
});
neededRules.forEach(ruleName => {
if (!(ruleName in this.cachedRules)) {
errors.push('Missing rule ' + ruleName);
}
});
if (errors.length)
throw new Error(errors.join('\n'));
}
getAST(txt, target) {
if (!target) {
target = this.grammarRules.filter(x => !x.fragment && x.name.indexOf('%') != 0)[0].name;
}
let result = this.parse(txt, target);
if (result) {
agregateErrors(result.errors, result);
fixPositions(result, 0);
// REMOVE ALL THE TAGS MATCHING /^%/
stripRules(result, /^%/);
if (!this.options || !this.options.keepUpperRules)
stripRules(result, UPPER_SNAKE_RE);
let rest = result.rest;
if (rest) {
new TokenError_1.TokenError('Unexpected end of input: \n' + rest, result);
}
fixRest(result);
result.rest = rest;
}
return result;
}
emitSource() {
return 'CANNOT EMIT SOURCE FROM BASE Parser';
}
parse(txt, target, recursion = 0) {
let out = null;
let type = parseRuleName(target);
let expr;
let printable = this.debug && /*!isLiteral &*/ !UPPER_SNAKE_RE.test(type.name);
printable &&
console.log(new Array(recursion).join('│ ') + 'Trying to get ' + target + ' from ' + JSON.stringify(txt.split('\n')[0]));
let realType = type.name;
let targetLex = findRuleByName(type.name, this);
if (type.name == 'EOF') {
if (txt.length) {
return null;
}
else if (txt.length == 0) {
return {
type: 'EOF',
text: '',
rest: '',
start: 0,
end: 0,
fullText: '',
errors: [],
children: [],
parent: null
};
}
}
try {
if (!targetLex && type.isLiteral) {
let src = type.name.trim();
if (src.startsWith('"')) {
src = JSON.parse(src);
}
else if (src.startsWith("'")) {
src = src.replace(/^'(.+)'$/, '$1').replace(/\\'/g, "'");
}
if (src === '') {
return {
type: '%%EMPTY%%',
text: '',
rest: txt,
start: 0,
end: 0,
fullText: '',
errors: [],
children: [],
parent: null
};
}
expr = new RegExp(escapeRegExp(src));
realType = null;
}
}
catch (e) {
if (e instanceof ReferenceError) {
console.error(e);
}
return null;
}
if (expr) {
let result = readToken(txt, expr);
if (result) {
result.type = realType;
return result;
}
}
else {
let options = targetLex.bnf;
if (options instanceof Array) {
options.forEach(phases => {
if (out)
return;
let pinned = null;
let tmp = {
type: type.name,
text: '',
children: [],
end: 0,
errors: [],
fullText: '',
parent: null,
start: 0,
rest: txt
};
if (targetLex.fragment)
tmp.fragment = true;
let tmpTxt = txt;
let position = 0;
let allOptional = phases.length > 0;
let foundSomething = false;
for (let i = 0; i < phases.length; i++) {
if (typeof phases[i] == 'string') {
let localTarget = parseRuleName(phases[i]);
allOptional = allOptional && localTarget.isOptional;
let got;
let foundAtLeastOne = false;
do {
got = null;
if (targetLex.implicitWs) {
got = this.parse(tmpTxt, localTarget.name, recursion + 1);
if (!got) {
let WS;
do {
WS = this.parse(tmpTxt, WS_RULE, recursion + 1);
if (WS) {
tmp.text = tmp.text + WS.text;
tmp.end = tmp.text.length;
WS.parent = tmp;
tmp.children.push(WS);
tmpTxt = tmpTxt.substr(WS.text.length);
position += WS.text.length;
}
else {
break;
}
} while (WS && WS.text.length);
}
}
got = got || this.parse(tmpTxt, localTarget.name, recursion + 1);
// rule ::= "true" ![a-zA-Z]
// negative lookup, if it does not match, we should continue
if (localTarget.lookupNegative) {
if (got)
return /* cancel this path */;
break;
}
if (localTarget.lookupPositive) {
if (!got)
return;
}
if (!got) {
if (localTarget.isOptional)
break;
if (localTarget.atLeastOne && foundAtLeastOne)
break;
}
if (got && targetLex.pinned == i + 1) {
pinned = got;
printable && console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' PINNED');
}
if (!got)
got = this.parseRecovery(targetLex, tmpTxt, recursion + 1);
if (!got) {
if (pinned) {
out = tmp;
got = {
type: 'SyntaxError',
text: tmpTxt,
children: [],
end: tmpTxt.length,
errors: [],
fullText: '',
parent: null,
start: 0,
rest: ''
};
if (tmpTxt.length) {
new TokenError_1.TokenError(`Unexpected end of input. Expecting ${localTarget.name} Got: ${tmpTxt}`, got);
}
else {
new TokenError_1.TokenError(`Unexpected end of input. Missing ${localTarget.name}`, got);
}
printable &&
console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text));
}
else {
return;
}
}
foundAtLeastOne = true;
foundSomething = true;
if (got.type == '%%EMPTY%%') {
break;
}
got.start += position;
got.end += position;
if (!localTarget.lookupPositive && got.type) {
if (got.fragment) {
got.children &&
got.children.forEach(x => {
x.start += position;
x.end += position;
x.parent = tmp;
tmp.children.push(x);
});
}
else {
got.parent = tmp;
tmp.children.push(got);
}
}
if (localTarget.lookup)
got.lookup = true;
printable &&
console.log(new Array(recursion + 1).join('│ ') + '└─ ' + got.type + ' ' + JSON.stringify(got.text));
// Eat it from the input stream, only if it is not a lookup
if (!localTarget.lookup && !got.lookup) {
tmp.text = tmp.text + got.text;
tmp.end = tmp.text.length;
tmpTxt = tmpTxt.substr(got.text.length);
position += got.text.length;
}
tmp.rest = tmpTxt;
} while (got && localTarget.allowRepetition && tmpTxt.length && !got.lookup);
} /* IS A REGEXP */
else {
let got = readToken(tmpTxt, phases[i]);
if (!got) {
return;
}
printable &&
console.log(new Array(recursion + 1).join('│ ') + '└> ' + JSON.stringify(got.text) + phases[i].source);
foundSomething = true;
got.start += position;
got.end += position;
tmp.text = tmp.text + got.text;
tmp.end = tmp.text.length;
tmpTxt = tmpTxt.substr(got.text.length);
position += got.text.length;
tmp.rest = tmpTxt;
}
}
if (foundSomething) {
out = tmp;
printable &&
console.log(new Array(recursion).join('│ ') + '├<─┴< PUSHING ' + out.type + ' ' + JSON.stringify(out.text));
}
});
}
if (out && targetLex.simplifyWhenOneChildren && out.children.length == 1) {
out = out.children[0];
}
}
if (!out) {
printable && console.log(target + ' NOT RESOLVED FROM ' + txt);
}
return out;
}
parseRecovery(recoverableToken, tmpTxt, recursion) {
if (recoverableToken.recover && tmpTxt.length) {
let printable = this.debug;
printable &&
console.log(new Array(recursion + 1).join('│ ') +
'Trying to recover until token ' +
recoverableToken.recover +
' from ' +
JSON.stringify(tmpTxt.split('\n')[0] + tmpTxt.split('\n')[1]));
let tmp = {
type: 'SyntaxError',
text: '',
children: [],
end: 0,
errors: [],
fullText: '',
parent: null,
start: 0,
rest: ''
};
let got;
do {
got = this.parse(tmpTxt, recoverableToken.recover, recursion + 1);
if (got) {
new TokenError_1.TokenError('Unexpected input: "' + tmp.text + `" Expecting: ${recoverableToken.name}`, tmp);
break;
}
else {
tmp.text = tmp.text + tmpTxt[0];
tmp.end = tmp.text.length;
tmpTxt = tmpTxt.substr(1);
}
} while (!got && tmpTxt.length > 0);
if (tmp.text.length > 0 && got) {
printable && console.log(new Array(recursion + 1).join('│ ') + 'Recovered text: ' + JSON.stringify(tmp.text));
return tmp;
}
}
return null;
}
}
exports.Parser = Parser;
exports.default = Parser;
//# sourceMappingURL=Parser.js.map