scanners: begin port to re2c
This commit is contained in:
parent
8e6f193491
commit
3f070259fe
|
@ -0,0 +1,2 @@
|
|||
src/scanners.rs linguist-generated
|
||||
src/scanners.re linguist-language=Rust
|
5
Makefile
5
Makefile
|
@ -1,6 +1,5 @@
|
|||
docker:
|
||||
docker build -t comrak $(CURDIR)/script
|
||||
docker run --privileged -t -i -v $(CURDIR):/src/comrak -v $(HOME)/.cargo/registry:/root/.cargo/registry -w /src/comrak comrak /bin/bash
|
||||
src/scanners.rs: src/scanners.re
|
||||
re2rust -W -Werror --case-insensitive -i --no-generation-date -8 --encoding-policy substitute -o $@ $<
|
||||
|
||||
bench:
|
||||
cargo build --release
|
||||
|
|
|
@ -0,0 +1,234 @@
|
|||
use memchr::memmem;
|
||||
use std::str;
|
||||
use pest::Parser;
|
||||
use pest_derive::Parser;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
const _LEXER: &str = include_str!("lexer.pest");
|
||||
|
||||
#[derive(Parser)]
|
||||
#[grammar = "lexer.pest"]
|
||||
struct Lexer;
|
||||
|
||||
#[inline(always)]
|
||||
fn search(rule: Rule, line: &[u8]) -> Option<usize> {
|
||||
if let Ok(pairs) = Lexer::parse(rule, unsafe { str::from_utf8_unchecked(line) }) {
|
||||
Some(pairs.last().unwrap().as_span().end())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
#[inline(always)]
|
||||
fn is_match(rule: Rule, line: &[u8]) -> bool {
|
||||
Lexer::parse(rule, unsafe { str::from_utf8_unchecked(line) }).is_ok()
|
||||
}
|
||||
|
||||
// TODO: consider dropping all the #[inline(always)], we probably don't know
|
||||
// better than rustc.
|
||||
|
||||
/*!re2c
|
||||
re2c:define:YYCTYPE = u8;
|
||||
re2c:define:YYPEEK = "*s.get_unchecked(cursor)";
|
||||
re2c:define:YYSKIP = "cursor += 1;";
|
||||
re2c:define:YYBACKUP = "marker = cursor;";
|
||||
re2c:define:YYRESTORE = "cursor = marker;";
|
||||
re2c:yyfill:enable = 0;
|
||||
*/
|
||||
|
||||
#[inline(always)]
|
||||
pub fn atx_heading_start(s: &[u8]) -> Option<usize> {
|
||||
let mut cursor = 0;
|
||||
let mut marker = 0;
|
||||
/*!re2c
|
||||
[#]{1,6} ([ \t]+|[\r\n]) { return Some(cursor); }
|
||||
* { return None; }
|
||||
*/
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_end_1(s: &[u8]) -> bool {
|
||||
let mut cursor = 0;
|
||||
let mut marker = 0;
|
||||
/*!re2c
|
||||
[^\n\x00]* [<] [/] ('script'|'pre'|'textarea'|'style') [>] { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_end_2(line: &[u8]) -> bool {
|
||||
memmem::find(line, b"-->").is_some()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_end_3(line: &[u8]) -> bool {
|
||||
memmem::find(line, b"?>").is_some()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_end_4(line: &[u8]) -> bool {
|
||||
line.contains(&b'>')
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_end_5(line: &[u8]) -> bool {
|
||||
memmem::find(line, b"]]>").is_some()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn open_code_fence(line: &[u8]) -> Option<usize> {
|
||||
if line[0] != b'`' && line[0] != b'~' {
|
||||
return None;
|
||||
}
|
||||
search(Rule::open_code_fence, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn close_code_fence(line: &[u8]) -> Option<usize> {
|
||||
if line[0] != b'`' && line[0] != b'~' {
|
||||
return None;
|
||||
}
|
||||
search(Rule::close_code_fence, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_start(line: &[u8]) -> Option<usize> {
|
||||
const STR2: &'static [u8] = b"<!--";
|
||||
const STR3: &'static [u8] = b"<?";
|
||||
const STR5: &'static [u8] = b"<![CDATA[";
|
||||
|
||||
if !line.starts_with(b"<") {
|
||||
return None;
|
||||
}
|
||||
|
||||
if is_match(Rule::html_block_start_1, line) {
|
||||
Some(1)
|
||||
} else if line.starts_with(STR2) {
|
||||
Some(2)
|
||||
} else if line.starts_with(STR3) {
|
||||
Some(3)
|
||||
} else if is_match(Rule::html_block_start_4, line) {
|
||||
Some(4)
|
||||
} else if line.starts_with(STR5) {
|
||||
Some(5)
|
||||
} else if is_match(Rule::html_block_start_6, line) {
|
||||
Some(6)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_block_start_7(line: &[u8]) -> Option<usize> {
|
||||
if is_match(Rule::html_block_start_7, line) {
|
||||
Some(7)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub enum SetextChar {
|
||||
Equals,
|
||||
Hyphen,
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn setext_heading_line(line: &[u8]) -> Option<SetextChar> {
|
||||
if (line[0] == b'=' || line[0] == b'-') && is_match(Rule::setext_heading_line, line) {
|
||||
if line[0] == b'=' {
|
||||
Some(SetextChar::Equals)
|
||||
} else {
|
||||
Some(SetextChar::Hyphen)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn footnote_definition(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::footnote_definition, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn scheme(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::scheme_rule, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn autolink_uri(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::autolink_uri, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn autolink_email(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::autolink_email, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_tag(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::html_tag, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_comment(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::html_comment, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_processing_instruction(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::html_processing_instruction, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_declaration(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::html_declaration, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn html_cdata(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::html_cdata, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn spacechars(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::spacechars, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn link_title(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::link_title, line)
|
||||
}
|
||||
|
||||
#[cfg(feature = "shortcodes")]
|
||||
#[inline(always)]
|
||||
pub fn shortcode(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::shortcode_rule, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn table_start(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::table_start, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn table_cell(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::table_cell, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn table_cell_end(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::table_cell_end, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn table_row_end(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::table_row_end, line)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn dangerous_url(line: &[u8]) -> Option<usize> {
|
||||
search(Rule::dangerous_url, line)
|
||||
}
|
||||
|
||||
// vim: set ft=rust:
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue