import CMMode from './CodeMirrorMode.js'; function execAt(str, reg, i) { reg.lastIndex = i; return reg.exec(str); } function unescape(match) { if(match[1] === 'n') { return '\n'; } return match[1]; } const TOKENS = [ {end: /(?=\n)|$/y, omit: true, start: /#/y}, { baseToken: {q: true}, end: /"/y, escape: /\\(.)/y, escapeWith: unescape, start: /"/y, }, {baseToken: {v: '...'}, start: /\.\.\./y}, {end: /(?=[ \t\r\n:+~\-*!<>,])|$/y, start: /(?=[^ \t\r\n:+~\-*!<>,])/y}, { end: /(?=[^~\-<>x])|[-~]x|[<>](?=x)|$/y, includeEnd: true, start: /(?=[~\-<])/y, }, {baseToken: {v: ','}, start: /,/y}, {baseToken: {v: ':'}, start: /:/y}, {baseToken: {v: '!'}, start: /!/y}, {baseToken: {v: '+'}, start: /\+/y}, {baseToken: {v: '*'}, start: /\*/y}, {baseToken: {v: '\n'}, start: /\n/y}, ]; function tokFindBegin(src, i) { for(let j = 0; j < TOKENS.length; ++ j) { const block = TOKENS[j]; const match = execAt(src, block.start, i); if(match) { return { appendSpace: '', appendValue: '', end: !block.end, newBlock: block, skip: match[0].length, }; } } return { appendSpace: src[i], appendValue: '', end: false, newBlock: null, skip: 1, }; } function tokContinuePart(src, i, block) { if(block.escape) { const match = execAt(src, block.escape, i); if(match) { return { appendSpace: '', appendValue: block.escapeWith(match), end: false, newBlock: null, skip: match[0].length, }; } } const match = execAt(src, block.end, i); if(match) { return { appendSpace: '', appendValue: block.includeEnd ? match[0] : '', end: true, newBlock: null, skip: match[0].length, }; } return { appendSpace: '', appendValue: src[i], end: false, newBlock: null, skip: 1, }; } function tokAdvance(src, i, block) { if(block) { return tokContinuePart(src, i, block); } else { return tokFindBegin(src, i); } } function copyPos(pos) { return {ch: pos.ch, i: pos.i, ln: pos.ln}; } function advancePos(pos, src, steps) { for(let i = 0; i < steps; ++ i) { ++ pos.ch; if(src[pos.i + i] === '\n') { ++ pos.ln; pos.ch = 0; } } pos.i += steps; } class TokenState { constructor(src) { this.src = src; this.block = null; this.token = null; this.pos = {ch: 0, i: 0, ln: 0}; this.reset(); } isOver() { return this.pos.i > this.src.length; } reset() { this.token = {b: null, e: null, q: false, s: '', v: ''}; this.block = null; } beginToken(advance) { this.block = advance.newBlock; Object.assign(this.token, this.block.baseToken); this.token.b = copyPos(this.pos); } endToken() { let tok = null; if(!this.block.omit) { this.token.e = copyPos(this.pos); tok = this.token; } this.reset(); return tok; } advance() { const advance = tokAdvance(this.src, this.pos.i, this.block); if(advance.newBlock) { this.beginToken(advance); } this.token.s += advance.appendSpace; this.token.v += advance.appendValue; advancePos(this.pos, this.src, advance.skip); if(advance.end) { return this.endToken(); } else { return null; } } } function posStr(pos) { return 'line ' + (pos.ln + 1) + ', character ' + pos.ch; } export default class Tokeniser { tokenise(src) { const tokens = []; const state = new TokenState(src); while(!state.isOver()) { const token = state.advance(); if(token) { tokens.push(token); } } if(state.block) { throw new Error( 'Unterminated literal (began at ' + posStr(state.token.b) + ')' ); } return tokens; } getCodeMirrorMode(arrows) { return new CMMode(TOKENS, arrows); } splitLines(tokens) { const lines = []; let line = []; tokens.forEach((token) => { if(!token.q && token.v === '\n') { if(line.length > 0) { lines.push(line); line = []; } } else { line.push(token); } }); if(line.length > 0) { lines.push(line); } return lines; } }