189 lines
5.0 KiB
JavaScript
189 lines
5.0 KiB
JavaScript
defineDescribe('Sequence Tokeniser', ['./Tokeniser'], (Tokeniser) => {
|
|
'use strict';
|
|
|
|
const tokeniser = new Tokeniser();
|
|
|
|
describe('.tokenise', () => {
|
|
it('converts the source into atomic tokens', () => {
|
|
const input = 'foo bar -> baz';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: 'bar', q: false},
|
|
{s: ' ', v: '->', q: false},
|
|
{s: ' ', v: 'baz', q: false},
|
|
]);
|
|
});
|
|
|
|
it('splits tokens at flexible boundaries', () => {
|
|
const input = 'foo bar->baz';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: 'bar', q: false},
|
|
{s: '', v: '->', q: false},
|
|
{s: '', v: 'baz', q: false},
|
|
]);
|
|
});
|
|
|
|
it('parses newlines as tokens', () => {
|
|
const input = 'foo bar\nbaz';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: 'bar', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'baz', q: false},
|
|
]);
|
|
});
|
|
|
|
it('parses quoted newlines as quoted tokens', () => {
|
|
const input = 'foo "\n" baz';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: '\n', q: true},
|
|
{s: ' ', v: 'baz', q: false},
|
|
]);
|
|
});
|
|
|
|
it('removes leading and trailing whitespace', () => {
|
|
const input = ' foo \t bar\t\n baz';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: ' ', v: 'foo', q: false},
|
|
{s: ' \t ', v: 'bar', q: false},
|
|
{s: '\t', v: '\n', q: false},
|
|
{s: ' ', v: 'baz', q: false},
|
|
]);
|
|
});
|
|
|
|
it('parses quoted strings as single tokens', () => {
|
|
const input = 'foo "zig zag" \'abc def\'';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: 'zig zag', q: true},
|
|
{s: ' ', v: 'abc def', q: true},
|
|
]);
|
|
});
|
|
|
|
it('ignores comments', () => {
|
|
const input = 'foo # bar baz\nzig';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'zig', q: false},
|
|
]);
|
|
});
|
|
|
|
it('ignores quotes within comments', () => {
|
|
const input = 'foo # bar "\'baz\nzig';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'zig', q: false},
|
|
]);
|
|
});
|
|
|
|
it('interprets special characters within quoted strings', () => {
|
|
const input = 'foo "zig\\" zag\\n"';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: 'zig" zag\n', q: true},
|
|
]);
|
|
});
|
|
|
|
it('maintains whitespace and newlines within quoted strings', () => {
|
|
const input = 'foo " zig\n zag "';
|
|
const tokens = tokeniser.tokenise(input);
|
|
expect(tokens).toEqual([
|
|
{s: '', v: 'foo', q: false},
|
|
{s: ' ', v: ' zig\n zag ', q: true},
|
|
]);
|
|
});
|
|
|
|
it('rejects unterminated quoted values', () => {
|
|
expect(() => tokeniser.tokenise('"nope')).toThrow();
|
|
});
|
|
});
|
|
|
|
describe('.splitLines', () => {
|
|
it('combines tokens', () => {
|
|
const lines = tokeniser.splitLines([
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
]);
|
|
expect(lines).toEqual([
|
|
[{s: '', v: 'abc', q: false}, {s: '', v: 'd', q: false}],
|
|
]);
|
|
});
|
|
|
|
it('splits at newlines', () => {
|
|
const lines = tokeniser.splitLines([
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'e', q: false},
|
|
]);
|
|
expect(lines).toEqual([
|
|
[{s: '', v: 'abc', q: false}, {s: '', v: 'd', q: false}],
|
|
[{s: '', v: 'e', q: false}],
|
|
]);
|
|
});
|
|
|
|
it('ignores multiple newlines', () => {
|
|
const lines = tokeniser.splitLines([
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'e', q: false},
|
|
]);
|
|
expect(lines).toEqual([
|
|
[{s: '', v: 'abc', q: false}, {s: '', v: 'd', q: false}],
|
|
[{s: '', v: 'e', q: false}],
|
|
]);
|
|
});
|
|
|
|
it('ignores trailing newlines', () => {
|
|
const lines = tokeniser.splitLines([
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
{s: '', v: 'e', q: false},
|
|
{s: '', v: '\n', q: false},
|
|
]);
|
|
expect(lines).toEqual([
|
|
[{s: '', v: 'abc', q: false}, {s: '', v: 'd', q: false}],
|
|
[{s: '', v: 'e', q: false}],
|
|
]);
|
|
});
|
|
|
|
it('handles quoted newlines as regular tokens', () => {
|
|
const lines = tokeniser.splitLines([
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
{s: '', v: '\n', q: true},
|
|
{s: '', v: 'e', q: false},
|
|
]);
|
|
expect(lines).toEqual([
|
|
[
|
|
{s: '', v: 'abc', q: false},
|
|
{s: '', v: 'd', q: false},
|
|
{s: '', v: '\n', q: true},
|
|
{s: '', v: 'e', q: false},
|
|
],
|
|
]);
|
|
});
|
|
|
|
it('handles empty input', () => {
|
|
const lines = tokeniser.splitLines([]);
|
|
expect(lines).toEqual([]);
|
|
});
|
|
});
|
|
});
|