Add multline blockquote extension

Adds the ability to enclose multiple lines in a blockquote.
Such as:

>>>
Paragraph one

Paragraph two
>>>
This commit is contained in:
digitalMoksha 2023-06-13 11:47:47 -05:00
parent bb104e688d
commit c8c518da53
12 changed files with 771 additions and 2 deletions

View File

@ -34,6 +34,8 @@ if [ x"$SPEC" = "xtrue" ]; then
# python3 roundtrip_tests.py --spec extensions-table-prefer-style-attributes.txt "$PROGRAM_ARG --table-prefer-style-attributes" --extensions "table strikethrough autolink tagfilter footnotes tasklist" || failed=1
python3 roundtrip_tests.py --spec extensions-full-info-string.txt "$PROGRAM_ARG --full-info-string" \
|| failed=1
python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/multiline_blockquote.txt "$PROGRAM_ARG" \
|| failed=1
python3 spec_tests.py --no-normalize --spec regression.txt "$PROGRAM_ARG" \
|| failed=1

View File

@ -377,6 +377,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
NodeValue::FootnoteReference(ref nfr) => {
self.format_footnote_reference(nfr.name.as_bytes(), entering)
}
NodeValue::MultilineBlockQuote(..) => self.format_block_quote(entering),
};
true
}

View File

@ -993,6 +993,17 @@ impl<'o> HtmlFormatter<'o> {
self.output.write_all(b"</li>\n")?;
}
}
NodeValue::MultilineBlockQuote(_) => {
if entering {
self.cr()?;
self.output.write_all(b"<blockquote")?;
self.render_sourcepos(node)?;
self.output.write_all(b">\n")?;
} else {
self.cr()?;
self.output.write_all(b"</blockquote>\n")?;
}
}
}
Ok(false)
}

View File

@ -7,6 +7,8 @@ use std::convert::TryFrom;
#[cfg(feature = "shortcodes")]
use crate::parser::shortcodes::NodeShortCode;
use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
/// The core AST node enum.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NodeValue {
@ -151,6 +153,19 @@ pub enum NodeValue {
#[cfg(feature = "shortcodes")]
/// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
ShortCode(NodeShortCode),
/// **Block**. A [multiline block quote](https://github.github.com/gfm/#block-quotes). Spans multiple
/// lines and contains other **blocks**.
///
/// ``` md
/// >>>
/// A paragraph.
///
/// - item one
/// - item two
/// >>>
/// ```
MultilineBlockQuote(NodeMultilineBlockQuote),
}
/// Alignment of a single table cell.
@ -391,6 +406,7 @@ impl NodeValue {
| NodeValue::TableRow(..)
| NodeValue::TableCell
| NodeValue::TaskItem(..)
| NodeValue::MultilineBlockQuote(_)
)
}
@ -464,6 +480,7 @@ impl NodeValue {
NodeValue::FootnoteReference(..) => "footnote_reference",
#[cfg(feature = "shortcodes")]
NodeValue::ShortCode(_) => "shortcode",
NodeValue::MultilineBlockQuote(_) => "block_quote",
}
}
}
@ -647,6 +664,10 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
| NodeValue::HtmlInline(..)
),
NodeValue::MultilineBlockQuote(_) => {
child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
}
_ => false,
}
}

View File

@ -4,6 +4,8 @@ mod inlines;
pub mod shortcodes;
mod table;
pub mod multiline_block_quote;
use crate::adapters::SyntaxHighlighterAdapter;
use crate::arena_tree::Node;
use crate::ctype::{isdigit, isspace};
@ -25,6 +27,7 @@ use std::str;
use typed_arena::Arena;
use crate::adapters::HeadingAdapter;
use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
use self::inlines::RefMap;
@ -963,6 +966,16 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
return (false, container, should_continue);
}
}
NodeValue::MultilineBlockQuote(..) => {
if !self.parse_multiline_block_quote_prefix(
line,
container,
ast,
&mut should_continue,
) {
return (false, container, should_continue);
}
}
_ => {}
}
}
@ -985,7 +998,25 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
self.find_first_nonspace(line);
let indented = self.indent >= CODE_INDENT;
if !indented && line[self.first_nonspace] == b'>' {
if !indented
&& unwrap_into(
scanners::open_multiline_block_quote_fence(&line[self.first_nonspace..]),
&mut matched,
)
{
let first_nonspace = self.first_nonspace;
let offset = self.offset;
let nmbc = NodeMultilineBlockQuote {
fence_length: matched,
fence_offset: first_nonspace - offset,
};
*container = self.add_child(
container,
NodeValue::MultilineBlockQuote(nmbc),
self.first_nonspace + 1,
);
self.advance_offset(line, first_nonspace + matched - offset, false);
} else if !indented && line[self.first_nonspace] == b'>' {
let blockquote_startpos = self.first_nonspace;
let offset = self.first_nonspace + 1 - self.offset;
@ -1444,6 +1475,51 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
}
}
fn parse_multiline_block_quote_prefix(
&mut self,
line: &[u8],
container: &'a AstNode<'a>,
ast: &mut Ast,
should_continue: &mut bool,
) -> bool {
let (fence_length, fence_offset) = match ast.value {
NodeValue::MultilineBlockQuote(ref node_value) => {
(node_value.fence_length, node_value.fence_offset)
}
_ => unreachable!(),
};
let matched = if self.indent <= 3 && line[self.first_nonspace] == b'>' {
scanners::close_multiline_block_quote_fence(&line[self.first_nonspace..]).unwrap_or(0)
} else {
0
};
if matched >= fence_length {
*should_continue = false;
self.advance_offset(line, matched, false);
// The last child, like an indented codeblock, could be left open.
// Make sure it's finalized.
if nodes::last_child_is_open(container) {
let child = container.last_child().unwrap();
let child_ast = &mut *child.data.borrow_mut();
self.finalize_borrowed(child, child_ast).unwrap();
}
self.current = self.finalize_borrowed(container, ast).unwrap();
return false;
}
let mut i = fence_offset;
while i > 0 && strings::is_space_or_tab(line[self.offset]) {
self.advance_offset(line, 1, true);
i -= 1;
}
true
}
fn add_child(
&mut self,
mut parent: &'a AstNode<'a>,
@ -1484,6 +1560,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
container.first_child().is_some()
|| container.data.borrow().sourcepos.start.line != self.line_number
}
NodeValue::MultilineBlockQuote(..) => false,
_ => true,
};
@ -1664,6 +1741,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
NodeValue::Document => true,
NodeValue::CodeBlock(ref ncb) => ncb.fenced,
NodeValue::Heading(ref nh) => nh.setext,
NodeValue::MultilineBlockQuote(..) => true,
_ => false,
} {
ast.sourcepos.end = (self.line_number, self.curline_end_col).into();

View File

@ -0,0 +1,9 @@
/// The metadata of a multiline blockquote.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NodeMultilineBlockQuote {
/// The length of the fence.
pub fence_length: usize,
/// ??? The indentation level of the code within the block.
pub fence_offset: usize,
}

View File

@ -376,6 +376,28 @@ pub fn shortcode(s: &[u8]) -> Option<usize> {
*/
}
pub fn open_multiline_block_quote_fence(s: &[u8]) -> Option<usize> {
let mut cursor = 0;
let mut marker = 0;
let mut ctxmarker = 0;
let len = s.len();
/*!re2c
[>]{3,} / [ \t]*[\r\n] { return Some(cursor); }
* { return None; }
*/
}
pub fn close_multiline_block_quote_fence(s: &[u8]) -> Option<usize> {
let mut cursor = 0;
let mut marker = 0;
let mut ctxmarker = 0;
let len = s.len();
/*!re2c
[>]{3,} / [ \t]*[\r\n] { return Some(cursor); }
* { return None; }
*/
}
// Returns both the length of the match, and the tasklist character.
pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> {
let mut cursor = 0;

312
src/scanners.rs generated
View File

@ -22548,6 +22548,318 @@ pub fn shortcode(s: &[u8]) -> Option<usize> {
}
}
pub fn open_multiline_block_quote_fence(s: &[u8]) -> Option<usize> {
let mut cursor = 0;
let mut marker = 0;
let mut ctxmarker = 0;
let len = s.len();
{
#[allow(unused_assignments)]
let mut yych: u8 = 0;
let mut yystate: usize = 0;
'yyl: loop {
match yystate {
0 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
cursor += 1;
match yych {
0x3E => {
yystate = 3;
continue 'yyl;
}
_ => {
yystate = 1;
continue 'yyl;
}
}
}
1 => {
yystate = 2;
continue 'yyl;
}
2 => {
return None;
}
3 => {
marker = cursor;
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x3E => {
cursor += 1;
yystate = 4;
continue 'yyl;
}
_ => {
yystate = 2;
continue 'yyl;
}
}
}
4 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x3E => {
cursor += 1;
yystate = 6;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
5 => {
cursor = marker;
yystate = 2;
continue 'yyl;
}
6 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x09 | 0x20 => {
ctxmarker = cursor;
cursor += 1;
yystate = 7;
continue 'yyl;
}
0x0A | 0x0D => {
ctxmarker = cursor;
cursor += 1;
yystate = 8;
continue 'yyl;
}
0x3E => {
cursor += 1;
yystate = 6;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
7 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x09 | 0x20 => {
cursor += 1;
yystate = 7;
continue 'yyl;
}
0x0A | 0x0D => {
cursor += 1;
yystate = 8;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
8 => {
cursor = ctxmarker;
{
return Some(cursor);
}
}
_ => {
panic!("internal lexer error")
}
}
}
}
}
pub fn close_multiline_block_quote_fence(s: &[u8]) -> Option<usize> {
let mut cursor = 0;
let mut marker = 0;
let mut ctxmarker = 0;
let len = s.len();
{
#[allow(unused_assignments)]
let mut yych: u8 = 0;
let mut yystate: usize = 0;
'yyl: loop {
match yystate {
0 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
cursor += 1;
match yych {
0x3E => {
yystate = 3;
continue 'yyl;
}
_ => {
yystate = 1;
continue 'yyl;
}
}
}
1 => {
yystate = 2;
continue 'yyl;
}
2 => {
return None;
}
3 => {
marker = cursor;
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x3E => {
cursor += 1;
yystate = 4;
continue 'yyl;
}
_ => {
yystate = 2;
continue 'yyl;
}
}
}
4 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x3E => {
cursor += 1;
yystate = 6;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
5 => {
cursor = marker;
yystate = 2;
continue 'yyl;
}
6 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x09 | 0x20 => {
ctxmarker = cursor;
cursor += 1;
yystate = 7;
continue 'yyl;
}
0x0A | 0x0D => {
ctxmarker = cursor;
cursor += 1;
yystate = 8;
continue 'yyl;
}
0x3E => {
cursor += 1;
yystate = 6;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
7 => {
yych = unsafe {
if cursor < len {
*s.get_unchecked(cursor)
} else {
0
}
};
match yych {
0x09 | 0x20 => {
cursor += 1;
yystate = 7;
continue 'yyl;
}
0x0A | 0x0D => {
cursor += 1;
yystate = 8;
continue 'yyl;
}
_ => {
yystate = 5;
continue 'yyl;
}
}
}
8 => {
cursor = ctxmarker;
{
return Some(cursor);
}
}
_ => {
panic!("internal lexer error")
}
}
}
}
}
// Returns both the length of the match, and the tasklist character.
pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> {
let mut cursor = 0;

View File

@ -212,5 +212,6 @@ fn exercise_full_api() {
let _: String = nfr.name;
let _: u32 = nfr.ix;
}
nodes::NodeValue::MultilineBlockQuote(_) => {}
}
}

View File

@ -0,0 +1,311 @@
---
title: GitLab Flavored Markdown Spec
version: 0.1
date: '2023-12-18'
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
...
## Multi-line Blockquotes
Simple container
```````````````````````````````` example
>>>
*content*
>>>
.
<blockquote>
<p><em>content</em></p>
</blockquote>
````````````````````````````````
Can contain block elements
```````````````````````````````` example
>>>
### heading
-----------
>>>
.
<blockquote>
<h3>heading</h3>
<hr />
</blockquote>
````````````````````````````````
Ending marker can be longer
```````````````````````````````` example
>>>>>>
hello world
>>>>>>>>>>>
normal
.
<blockquote>
<p>hello world</p>
</blockquote>
<p>normal</p>
````````````````````````````````
Nested blockquotes
```````````````````````````````` example
>>>>>
>>>>
foo
>>>>
>>>>>
.
<blockquote>
<blockquote>
<p>foo</p>
</blockquote>
</blockquote>
````````````````````````````````
Incorrectly nested blockquotes
```````````````````````````````` example
>>>>
this block is closed with 5 markers below
>>>>>
auto-closed blocks
>>>>>
>>>>
.
<blockquote>
<p>this block is closed with 5 markers below</p>
</blockquote>
<p>auto-closed blocks</p>
<blockquote>
<blockquote>
</blockquote>
</blockquote>
````````````````````````````````
Marker can be indented up to 3 spaces
```````````````````````````````` example
>>>>
first-level blockquote
>>>
second-level blockquote
>>>
>>>>
regular paragraph
.
<blockquote>
<p>first-level blockquote</p>
<blockquote>
<p>second-level blockquote</p>
</blockquote>
</blockquote>
<p>regular paragraph</p>
````````````````````````````````
Fours spaces makes it a code block
```````````````````````````````` example
>>>
content
>>>
.
<pre><code>&gt;&gt;&gt;
content
&gt;&gt;&gt;
</code></pre>
````````````````````````````````
Detection of embedded 4 spaces code block starts in the
column the blockquote starts, not from the beginning of
the line.
```````````````````````````````` example
>>>
code block
>>>
.
<blockquote>
<pre><code>code block
</code></pre>
</blockquote>
````````````````````````````````
```````````````````````````````` example
>>>>
content
>>>
code block
>>>
>>>>
.
<blockquote>
<p>content</p>
<blockquote>
<pre><code>code block
</code></pre>
</blockquote>
</blockquote>
````````````````````````````````
Closing marker can't have text on the same line
```````````````````````````````` example
>>>
foo
>>> arg=123
.
<blockquote>
<p>foo</p>
<blockquote>
<blockquote>
<blockquote>
<p>arg=123</p>
</blockquote>
</blockquote>
</blockquote>
</blockquote>
````````````````````````````````
Blockquotes self-close at the end of the document
```````````````````````````````` example
>>>
foo
.
<blockquote>
<p>foo</p>
</blockquote>
````````````````````````````````
They should terminate paragraphs
```````````````````````````````` example
blah blah
>>>
content
>>>
.
<p>blah blah</p>
<blockquote>
<p>content</p>
</blockquote>
````````````````````````````````
They can be nested in lists
```````````````````````````````` example
- >>>
- foo
>>>
.
<ul>
<li>
<blockquote>
<ul>
<li>foo</li>
</ul>
</blockquote>
</li>
</ul>
````````````````````````````````
Or in blockquotes
```````````````````````````````` example
> >>>
> foo
>> bar
> baz
> >>>
.
<blockquote>
<blockquote>
<p>foo</p>
<blockquote>
<p>bar
baz</p>
</blockquote>
</blockquote>
</blockquote>
````````````````````````````````
List indentation
```````````````````````````````` example
- >>>
foo
bar
>>>
- >>>
foo
bar
>>>
.
<ul>
<li>
<blockquote>
<p>foo
bar</p>
</blockquote>
</li>
<li>
<blockquote>
<p>foo
bar</p>
</blockquote>
</li>
</ul>
````````````````````````````````
Ignored inside code blocks:
```````````````````````````````` example
```txt
# Code
>>>
# Code
>>>
# Code
```
.
<pre><code class="language-txt"># Code
&gt;&gt;&gt;
# Code
&gt;&gt;&gt;
# Code
</code></pre>
````````````````````````````````
Does not require a leading or trailing blank line
```````````````````````````````` example
Some text
>>>
A quote
>>>
Some other text
.
<p>Some text</p>
<blockquote>
<p>A quote</p>
</blockquote>
<p>Some other text</p>
````````````````````````````````

View File

@ -172,6 +172,7 @@ impl<'o> XmlFormatter<'o> {
}
NodeValue::FrontMatter(_) => (),
NodeValue::BlockQuote => {}
NodeValue::MultilineBlockQuote(..) => {}
NodeValue::Item(..) => {}
NodeValue::DescriptionList => {}
NodeValue::DescriptionItem(..) => (),

2
vendor/cmark-gfm vendored

@ -1 +1 @@
Subproject commit 587a12bb54d95ac37241377e6ddc93ea0e45439b
Subproject commit 2f13eeedfe9906c72a1843b03552550af7bee29a