Support backslash-escaping markdown syntax [#54]

This commit is contained in:
David Evans 2018-05-06 21:23:07 +01:00
parent 13be0b210e
commit 011d8c6979
8 changed files with 244 additions and 110 deletions

View File

@ -3874,8 +3874,11 @@
if(match[1] === 'n') {
return '\n';
}
if('"\\'.indexOf(match[1]) !== -1) {
return match[1];
}
return '\u001B' + match[1];
}
const TOKENS = [
{end: /(?=\n)|$/y, omit: true, start: /#/y},
@ -3901,6 +3904,10 @@
{baseToken: {v: '\n'}, start: /\n/y},
];
/* eslint-disable no-control-regex */ // Removing control characters is the aim
const CONTROL_CHARS = /[\x00-\x08\x0E-\x1F]/g;
/* eslint-enable no-control-regex */
function tokFindBegin(src, i) {
for(let j = 0; j < TOKENS.length; ++ j) {
const block = TOKENS[j];
@ -3981,7 +3988,7 @@
class TokenState {
constructor(src) {
this.src = src;
this.src = src.replace(CONTROL_CHARS, '');
this.block = null;
this.token = null;
this.pos = {ch: 0, i: 0, ln: 0};
@ -4220,6 +4227,11 @@
},
];
const WHITE = /[\f\n\r\t\v ]+/g;
const WHITE_END = /^[\t-\r ]+|[\t-\r ]+$/g;
const ESC = -2;
function findNext(line, p, active) {
const virtLine = ' ' + line + ' ';
let styleIndex = -1;
@ -4241,6 +4253,17 @@
}
});
const escIndex = virtLine.indexOf('\u001B', p + 1);
if(escIndex !== -1 && escIndex < bestStart) {
styleIndex = ESC;
bestStart = escIndex;
bestEnd = escIndex + 1;
}
if(styleIndex === -1) {
return null;
}
return {end: bestEnd - 1, start: bestStart - 1, styleIndex};
}
@ -4268,51 +4291,60 @@
}
function shrinkWhitespace(text) {
return text.replace(/[\f\n\r\t\v ]+/g, ' ');
return text.replace(WHITE, ' ');
}
function trimCollapsible(text) {
return text.replace(/^[\f\n\r\t\v ]+|[\f\n\r\t\v ]+$/g, '');
return text.replace(WHITE_END, '');
}
function parseMarkdown(text) {
if(!text) {
function findStyles(line, active, toggleCallback, textCallback) {
let ln = line;
let p = 0;
let s = 0;
let match = null;
while((match = findNext(ln, p, active))) {
const {styleIndex, start, end} = match;
if(styleIndex === ESC) {
ln = ln.substr(0, start) + ln.substr(end);
p = start + 1;
} else {
if(start > s) {
textCallback(ln.substring(s, start));
}
active[styleIndex] = !active[styleIndex];
toggleCallback(styleIndex);
s = end;
p = end;
}
}
if(s < ln.length) {
textCallback(ln.substr(s));
}
}
function parseMarkdown(markdown) {
if(!markdown) {
return [];
}
const active = STYLES.map(() => false);
let activeCount = 0;
let attrs = null;
const lines = trimCollapsible(text).split('\n');
const result = [];
lines.forEach((line) => {
const ln = shrinkWhitespace(trimCollapsible(line));
const lines = trimCollapsible(markdown).split('\n');
return lines.map((line) => {
const parts = [];
let p = 0;
for(;;) {
const {styleIndex, start, end} = findNext(ln, p, active);
if(styleIndex === -1) {
break;
}
if(active[styleIndex]) {
active[styleIndex] = false;
-- activeCount;
} else {
active[styleIndex] = true;
++ activeCount;
}
if(start > p) {
parts.push({attrs, text: ln.substring(p, start)});
}
findStyles(
shrinkWhitespace(trimCollapsible(line)),
active,
(styleIndex) => {
activeCount += active[styleIndex] ? 1 : -1;
attrs = combineAttrs(activeCount, active);
p = end;
}
if(p < ln.length) {
parts.push({attrs, text: ln.substr(p)});
}
result.push(parts);
},
(text) => parts.push({attrs, text})
);
return parts;
});
return result;
}
const BLOCK_TYPES = new Map();

File diff suppressed because one or more lines are too long

View File

@ -3874,8 +3874,11 @@
if(match[1] === 'n') {
return '\n';
}
if('"\\'.indexOf(match[1]) !== -1) {
return match[1];
}
return '\u001B' + match[1];
}
const TOKENS = [
{end: /(?=\n)|$/y, omit: true, start: /#/y},
@ -3901,6 +3904,10 @@
{baseToken: {v: '\n'}, start: /\n/y},
];
/* eslint-disable no-control-regex */ // Removing control characters is the aim
const CONTROL_CHARS = /[\x00-\x08\x0E-\x1F]/g;
/* eslint-enable no-control-regex */
function tokFindBegin(src, i) {
for(let j = 0; j < TOKENS.length; ++ j) {
const block = TOKENS[j];
@ -3981,7 +3988,7 @@
class TokenState {
constructor(src) {
this.src = src;
this.src = src.replace(CONTROL_CHARS, '');
this.block = null;
this.token = null;
this.pos = {ch: 0, i: 0, ln: 0};
@ -4220,6 +4227,11 @@
},
];
const WHITE = /[\f\n\r\t\v ]+/g;
const WHITE_END = /^[\t-\r ]+|[\t-\r ]+$/g;
const ESC = -2;
function findNext(line, p, active) {
const virtLine = ' ' + line + ' ';
let styleIndex = -1;
@ -4241,6 +4253,17 @@
}
});
const escIndex = virtLine.indexOf('\u001B', p + 1);
if(escIndex !== -1 && escIndex < bestStart) {
styleIndex = ESC;
bestStart = escIndex;
bestEnd = escIndex + 1;
}
if(styleIndex === -1) {
return null;
}
return {end: bestEnd - 1, start: bestStart - 1, styleIndex};
}
@ -4268,51 +4291,60 @@
}
function shrinkWhitespace(text) {
return text.replace(/[\f\n\r\t\v ]+/g, ' ');
return text.replace(WHITE, ' ');
}
function trimCollapsible(text) {
return text.replace(/^[\f\n\r\t\v ]+|[\f\n\r\t\v ]+$/g, '');
return text.replace(WHITE_END, '');
}
function parseMarkdown(text) {
if(!text) {
function findStyles(line, active, toggleCallback, textCallback) {
let ln = line;
let p = 0;
let s = 0;
let match = null;
while((match = findNext(ln, p, active))) {
const {styleIndex, start, end} = match;
if(styleIndex === ESC) {
ln = ln.substr(0, start) + ln.substr(end);
p = start + 1;
} else {
if(start > s) {
textCallback(ln.substring(s, start));
}
active[styleIndex] = !active[styleIndex];
toggleCallback(styleIndex);
s = end;
p = end;
}
}
if(s < ln.length) {
textCallback(ln.substr(s));
}
}
function parseMarkdown(markdown) {
if(!markdown) {
return [];
}
const active = STYLES.map(() => false);
let activeCount = 0;
let attrs = null;
const lines = trimCollapsible(text).split('\n');
const result = [];
lines.forEach((line) => {
const ln = shrinkWhitespace(trimCollapsible(line));
const lines = trimCollapsible(markdown).split('\n');
return lines.map((line) => {
const parts = [];
let p = 0;
for(;;) {
const {styleIndex, start, end} = findNext(ln, p, active);
if(styleIndex === -1) {
break;
}
if(active[styleIndex]) {
active[styleIndex] = false;
-- activeCount;
} else {
active[styleIndex] = true;
++ activeCount;
}
if(start > p) {
parts.push({attrs, text: ln.substring(p, start)});
}
findStyles(
shrinkWhitespace(trimCollapsible(line)),
active,
(styleIndex) => {
activeCount += active[styleIndex] ? 1 : -1;
attrs = combineAttrs(activeCount, active);
p = end;
}
if(p < ln.length) {
parts.push({attrs, text: ln.substr(p)});
}
result.push(parts);
},
(text) => parts.push({attrs, text})
);
return parts;
});
return result;
}
const BLOCK_TYPES = new Map();

View File

@ -62,6 +62,11 @@ const STYLES = [
},
];
const WHITE = /[\f\n\r\t\v ]+/g;
const WHITE_END = /^[\t-\r ]+|[\t-\r ]+$/g;
const ESC = -2;
function findNext(line, p, active) {
const virtLine = ' ' + line + ' ';
let styleIndex = -1;
@ -83,6 +88,17 @@ function findNext(line, p, active) {
}
});
const escIndex = virtLine.indexOf('\u001B', p + 1);
if(escIndex !== -1 && escIndex < bestStart) {
styleIndex = ESC;
bestStart = escIndex;
bestEnd = escIndex + 1;
}
if(styleIndex === -1) {
return null;
}
return {end: bestEnd - 1, start: bestStart - 1, styleIndex};
}
@ -110,49 +126,58 @@ function combineAttrs(activeCount, active) {
}
function shrinkWhitespace(text) {
return text.replace(/[\f\n\r\t\v ]+/g, ' ');
return text.replace(WHITE, ' ');
}
function trimCollapsible(text) {
return text.replace(/^[\f\n\r\t\v ]+|[\f\n\r\t\v ]+$/g, '');
return text.replace(WHITE_END, '');
}
export default function parseMarkdown(text) {
if(!text) {
function findStyles(line, active, toggleCallback, textCallback) {
let ln = line;
let p = 0;
let s = 0;
let match = null;
while((match = findNext(ln, p, active))) {
const {styleIndex, start, end} = match;
if(styleIndex === ESC) {
ln = ln.substr(0, start) + ln.substr(end);
p = start + 1;
} else {
if(start > s) {
textCallback(ln.substring(s, start));
}
active[styleIndex] = !active[styleIndex];
toggleCallback(styleIndex);
s = end;
p = end;
}
}
if(s < ln.length) {
textCallback(ln.substr(s));
}
}
export default function parseMarkdown(markdown) {
if(!markdown) {
return [];
}
const active = STYLES.map(() => false);
let activeCount = 0;
let attrs = null;
const lines = trimCollapsible(text).split('\n');
const result = [];
lines.forEach((line) => {
const ln = shrinkWhitespace(trimCollapsible(line));
const lines = trimCollapsible(markdown).split('\n');
return lines.map((line) => {
const parts = [];
let p = 0;
for(;;) {
const {styleIndex, start, end} = findNext(ln, p, active);
if(styleIndex === -1) {
break;
}
if(active[styleIndex]) {
active[styleIndex] = false;
-- activeCount;
} else {
active[styleIndex] = true;
++ activeCount;
}
if(start > p) {
parts.push({attrs, text: ln.substring(p, start)});
}
findStyles(
shrinkWhitespace(trimCollapsible(line)),
active,
(styleIndex) => {
activeCount += active[styleIndex] ? 1 : -1;
attrs = combineAttrs(activeCount, active);
p = end;
}
if(p < ln.length) {
parts.push({attrs, text: ln.substr(p)});
}
result.push(parts);
},
(text) => parts.push({attrs, text})
);
return parts;
});
return result;
}

View File

@ -35,6 +35,14 @@ describe('Markdown Parser', () => {
]);
});
it('removes escape characters', () => {
const formatted = parser('a\u001Bb');
expect(formatted).toEqual([
[{attrs: null, text: 'ab'}],
]);
});
it('replaces sequences of whitespace with a single space', () => {
const formatted = parser('abc \t \v def');
@ -67,6 +75,14 @@ describe('Markdown Parser', () => {
]]);
});
it('ignores style characters if preceded by an escape', () => {
const formatted = parser('a \u001B*\u001B*b\u001B*\u001B* c');
expect(formatted).toEqual([[
{attrs: null, text: 'a **b** c'},
]]);
});
it('ignores styling marks inside words', () => {
const formatted = parser('a**b**c__d__e');

View File

@ -9,7 +9,10 @@ function unescape(match) {
if(match[1] === 'n') {
return '\n';
}
if('"\\'.indexOf(match[1]) !== -1) {
return match[1];
}
return '\u001B' + match[1];
}
const TOKENS = [
@ -36,6 +39,10 @@ const TOKENS = [
{baseToken: {v: '\n'}, start: /\n/y},
];
/* eslint-disable no-control-regex */ // Removing control characters is the aim
const CONTROL_CHARS = /[\x00-\x08\x0E-\x1F]/g;
/* eslint-enable no-control-regex */
function tokFindBegin(src, i) {
for(let j = 0; j < TOKENS.length; ++ j) {
const block = TOKENS[j];
@ -116,7 +123,7 @@ function advancePos(pos, src, steps) {
class TokenState {
constructor(src) {
this.src = src;
this.src = src.replace(CONTROL_CHARS, '');
this.block = null;
this.token = null;
this.pos = {ch: 0, i: 0, ln: 0};

View File

@ -192,12 +192,31 @@ describe('Sequence Tokeniser', () => {
});
it('interprets special characters within quoted strings', () => {
const input = 'foo "zig\\" zag\\n"';
const input = 'foo "zig\\" \\\\zag\\n"';
const tokens = tokeniser.tokenise(input);
expect(tokens).toEqual([
token({q: false, s: '', v: 'foo'}),
token({q: true, s: ' ', v: 'zig" zag\n'}),
token({q: true, s: ' ', v: 'zig" \\zag\n'}),
]);
});
it('propagates backslashes as escape characters', () => {
const input = '"zig \\ zag"';
const tokens = tokeniser.tokenise(input);
expect(tokens).toEqual([
token({q: true, s: '', v: 'zig \u001B zag'}),
]);
});
it('removes control characters everywhere', () => {
const input = 'a\u001Bb\u0001c "a\u001Bb\u0001c"';
const tokens = tokeniser.tokenise(input);
expect(tokens).toEqual([
token({q: false, s: '', v: 'abc'}),
token({q: true, s: ' ', v: 'abc'}),
]);
});

View File

@ -1,5 +1,8 @@
<svg width="103.81134033203125" height="124" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="-51.905670166015625 -119 103.81134033203125 124"><metadata>title "Foo **Bar** I&lt;sup&gt;2&lt;/sup&gt;
_Baz_ `Zig` I&lt;sub&gt;2&lt;/sub&gt;
**_`Zag`_** &lt;u&gt;&lt;o&gt;~Abc~&lt;/o&gt;&lt;/u&gt;
&lt;highlight&gt;Back&lt;/highlight&gt; &lt;red&gt;Text&lt;/red&gt;"
</metadata><defs><filter id="R0highlight"><feMorphology in="SourceAlpha" operator="dilate" radius="4"></feMorphology><feGaussianBlur edgeMode="none" stdDeviation="3, 1.5"></feGaussianBlur><feComponentTransfer><feFuncA intercept="-70" slope="100" type="linear"></feFuncA></feComponentTransfer><feComponentTransfer><feFuncR intercept="1" slope="0" type="linear"></feFuncR><feFuncG intercept="0.875" slope="0" type="linear"></feFuncG><feFuncB intercept="0" slope="0" type="linear"></feFuncB><feFuncA slope="0.8" type="linear"></feFuncA></feComponentTransfer><feMerge><feMergeNode></feMergeNode><feMergeNode in="SourceGraphic"></feMergeNode></feMerge></filter></defs><defs><mask id="R0FullMask" maskUnits="userSpaceOnUse"><rect fill="#FFFFFF" height="124" width="103.81134033203125" x="-51.905670166015625" y="-119"></rect></mask><mask id="R0LineMask" maskUnits="userSpaceOnUse"><rect fill="#FFFFFF" height="124" width="103.81134033203125" x="-51.905670166015625" y="-119"></rect></mask></defs><g></g><g><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-94">Foo <tspan font-weight="bolder">Bar</tspan> I<tspan baseline-shift="70%" font-size="0.6em">2</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-68"><tspan font-style="italic">Baz</tspan> <tspan font-family="Courier New,Liberation Mono,monospace">Zig</tspan> I<tspan baseline-shift="-20%" font-size="0.6em">2</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-42"><tspan font-style="italic" font-weight="bolder" font-family="Courier New,Liberation Mono,monospace">Zag</tspan> <tspan text-decoration="line-through overline underline">Abc</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-16"><tspan filter="url(#R0highlight)">Back</tspan> <tspan fill="#DD0000">Text</tspan></text></g><g></g><g mask="url(#R0FullMask)"><g mask="url(#R0LineMask)"></g><g></g><g></g></g></svg>
<svg width="132.474609375" height="150" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="-66.2373046875 -145 132.474609375 150"><metadata>title "
Re **Bo** _It_ `Mo` I&lt;sup&gt;2&lt;/sup&gt;
**_`Comb`_** &lt;u&gt;&lt;o&gt;~Strike~&lt;/o&gt;&lt;/u&gt; I&lt;sub&gt;2&lt;/sub&gt;
&lt;highlight&gt;Back&lt;/highlight&gt; &lt;red&gt;Text&lt;/red&gt;
\&lt;b&gt;esc&lt;/b&gt;
\\&lt;b&gt;no-esc&lt;/b&gt; 😎
"
</metadata><defs><filter id="R0highlight"><feMorphology in="SourceAlpha" operator="dilate" radius="4"></feMorphology><feGaussianBlur edgeMode="none" stdDeviation="3, 1.5"></feGaussianBlur><feComponentTransfer><feFuncA intercept="-70" slope="100" type="linear"></feFuncA></feComponentTransfer><feComponentTransfer><feFuncR intercept="1" slope="0" type="linear"></feFuncR><feFuncG intercept="0.875" slope="0" type="linear"></feFuncG><feFuncB intercept="0" slope="0" type="linear"></feFuncB><feFuncA slope="0.8" type="linear"></feFuncA></feComponentTransfer><feMerge><feMergeNode></feMergeNode><feMergeNode in="SourceGraphic"></feMergeNode></feMerge></filter></defs><defs><mask id="R0FullMask" maskUnits="userSpaceOnUse"><rect fill="#FFFFFF" height="150" width="132.474609375" x="-66.2373046875" y="-145"></rect></mask><mask id="R0LineMask" maskUnits="userSpaceOnUse"><rect fill="#FFFFFF" height="150" width="132.474609375" x="-66.2373046875" y="-145"></rect></mask></defs><g></g><g><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-120">Re <tspan font-weight="bolder">Bo</tspan> <tspan font-style="italic">It</tspan> <tspan font-family="Courier New,Liberation Mono,monospace">Mo</tspan> I<tspan baseline-shift="70%" font-size="0.6em">2</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-94"><tspan font-style="italic" font-weight="bolder" font-family="Courier New,Liberation Mono,monospace">Comb</tspan> <tspan text-decoration="line-through overline underline">Strike</tspan> I<tspan baseline-shift="-20%" font-size="0.6em">2</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-68"><tspan filter="url(#R0highlight)">Back</tspan> <tspan fill="#DD0000">Text</tspan></text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-42">&lt;b&gt;esc&lt;/b&gt;</text><text x="0" class="title" font-family="Helvetica,Arial,Liberation Sans,sans-serif" font-size="20" line-height="1.3" text-anchor="middle" y="-16">\<tspan font-weight="bolder">no-esc</tspan> 😎</text></g><g></g><g mask="url(#R0FullMask)"><g mask="url(#R0LineMask)"></g><g></g><g></g></g></svg>

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.8 KiB