1
Fork 0

Avoid roundtripping through pulldown-cmark

Roundtripping markdown is actually quite hard.
We don't actually require that.
All we need is once parsing the markdown to find the right marker and
the headings.
We then manually generate markdown and all other content can be copied
unparsed again.
This commit is contained in:
Jan-Erik Rediger 2022-01-25 20:57:43 +01:00
parent e759070dc8
commit adde0c8cfb
10 changed files with 56 additions and 71 deletions

16
Cargo.lock generated
View file

@ -845,8 +845,7 @@ dependencies = [
"log", "log",
"mdbook", "mdbook",
"pretty_assertions", "pretty_assertions",
"pulldown-cmark 0.8.0", "pulldown-cmark 0.9.1",
"pulldown-cmark-to-cmark",
"serde_json", "serde_json",
"toml", "toml",
] ]
@ -1205,9 +1204,9 @@ dependencies = [
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.8.0" version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" checksum = "34f197a544b0c9ab3ae46c359a7ec9cbbb5c7bf97054266fecb7ead794a181d6"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"getopts", "getopts",
@ -1215,15 +1214,6 @@ dependencies = [
"unicase", "unicase",
] ]
[[package]]
name = "pulldown-cmark-to-cmark"
version = "6.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95048382115a9da7be92ad51c84064d585b7da17472dcaa7f5eed8853c4c3707"
dependencies = [
"pulldown-cmark 0.8.0",
]
[[package]] [[package]]
name = "quick-error" name = "quick-error"
version = "1.2.3" version = "1.2.3"

View file

@ -10,9 +10,7 @@ edition = "2018"
[dependencies] [dependencies]
mdbook = "0.4.10" mdbook = "0.4.10"
pulldown-cmark = "0.8.0" pulldown-cmark = "0.9.1"
pulldown-cmark-to-cmark = "6.0.2"
env_logger = "0.8.4"
log = "0.4.11" log = "0.4.11"
clap = "2.33.3" clap = "2.33.3"
serde_json = "1.0.57" serde_json = "1.0.57"
@ -20,3 +18,4 @@ toml = "0.5.6"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "0.6.1" pretty_assertions = "0.6.1"
env_logger = "0.8.4"

View file

@ -8,7 +8,6 @@ use mdbook::errors::{Error, Result};
use mdbook::preprocess::{Preprocessor, PreprocessorContext}; use mdbook::preprocess::{Preprocessor, PreprocessorContext};
use pulldown_cmark::Tag::*; use pulldown_cmark::Tag::*;
use pulldown_cmark::{Event, Options, Parser}; use pulldown_cmark::{Event, Options, Parser};
use pulldown_cmark_to_cmark::{cmark_with_options, Options as COptions};
use toml::value::Table; use toml::value::Table;
pub struct Toc; pub struct Toc;
@ -110,6 +109,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
let mut toc_iter = toc.iter().peekable(); let mut toc_iter = toc.iter().peekable();
// Start from the level of the first header. // Start from the level of the first header.
let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
let mut last_lower = match toc_iter.peek() { let mut last_lower = match toc_iter.peek() {
Some((lvl, _, _)) => *lvl, Some((lvl, _, _)) => *lvl,
None => 0, None => 0,
@ -127,7 +127,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
}); });
for (level, name, slug) in toc { for (level, name, slug) in toc {
let width = 2 * (level - 1) as usize; let width = 2 * (level - min_level) as usize;
writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap(); writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap();
} }
@ -135,7 +135,6 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
} }
fn add_toc(content: &str, cfg: &Config) -> Result<String> { fn add_toc(content: &str, cfg: &Config) -> Result<String> {
let mut buf = String::with_capacity(content.len());
let mut toc_found = false; let mut toc_found = false;
let mut toc_content = vec![]; let mut toc_content = vec![];
@ -150,40 +149,41 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
opts.insert(Options::ENABLE_TASKLISTS); opts.insert(Options::ENABLE_TASKLISTS);
let mark: Vec<Event> = Parser::new(&cfg.marker).collect(); let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
let mut mark_start = -1; let mut mark_start = None;
let mut mark_end = 0..0;
let mut mark_loc = 0; let mut mark_loc = 0;
let mut c = -1;
for e in Parser::new_ext(&content, opts) { for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
c += 1; log::trace!("Event: {:?} (span: {:?})", e, span);
log::trace!("Event: {:?}", e);
if !toc_found { if !toc_found {
log::trace!( log::trace!(
"TOC not found yet. Location: {}, Start: {}", "TOC not found yet. Location: {}, Start: {:?}",
mark_loc, mark_loc,
mark_start mark_start
); );
if e == mark[mark_loc] { if e == mark[mark_loc] {
if mark_start == -1 { if mark_start.is_none() {
mark_start = c; mark_start = Some(span.clone());
} }
mark_loc += 1; mark_loc += 1;
if mark_loc >= mark.len() { if mark_loc >= mark.len() {
mark_end = span;
toc_found = true toc_found = true
} }
} else if mark_loc > 0 { } else if mark_loc > 0 {
mark_loc = 0; mark_loc = 0;
mark_start = -1; mark_start = None;
} else { } else {
continue; continue;
} }
} }
if let Event::Start(Heading(lvl)) = e { if let Event::Start(Heading(lvl, fragment, classes)) = e {
current_header_level = Some(lvl); log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})");
current_header_level = Some(lvl as u32);
continue; continue;
} }
if let Event::End(Heading(_)) = e { if let Event::End(Heading(..)) = e {
// Skip if this header is nested too deeply. // Skip if this header is nested too deeply.
if let Some(level) = current_header_level.take() { if let Some(level) = current_header_level.take() {
let header = current_header.clone(); let header = current_header.clone();
@ -219,29 +219,30 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
let toc = build_toc(&toc_content); let toc = build_toc(&toc_content);
log::trace!("Built TOC: {:?}", toc); log::trace!("Built TOC: {:?}", toc);
let toc_events = Parser::new(&toc).collect::<Vec<_>>(); log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
let mut c = -1; let content = if toc_found {
let events = Parser::new_ext(&content, opts) let mark_start = mark_start.unwrap();
.map(|e| { let content_before_toc = &content[0..mark_start.start];
c += 1; let content_after_toc = &content[mark_end.end..];
if toc_found && c > mark_start && c < mark_start + (mark.len() as i32) { log::trace!("content_before_toc={:?}", content_before_toc);
vec![] log::trace!("content_after_toc={:?}", content_after_toc);
} else if toc_found && c == mark_start { // Multiline markers might have consumed trailing newlines,
toc_events.clone() // we ensure there's always one before the content.
let extra = if content_after_toc.as_bytes()[0] == b'\n' {
""
} else { } else {
vec![e] "\n"
}
})
.flatten();
let opts = COptions {
newlines_after_codeblock: 1,
..Default::default()
}; };
cmark_with_options(events, &mut buf, None, opts) format!(
.map(|_| buf) "{}{}{}{}",
.map_err(|err| Error::msg(format!("Markdown serialization failed: {}", err))) content_before_toc, toc, extra, content_after_toc
)
} else {
content.to_string()
};
Ok(content)
} }
impl Toc { impl Toc {

View file

@ -18,3 +18,4 @@
## Header 2.2 ## Header 2.2
### Header 2.2.1 ### Header 2.2.1

View file

@ -1,9 +1,9 @@
\*not emphasized\* \*not emphasized*
\<br/> not a tag \<br/> not a tag
\[not a link\](/foo) \[not a link](/foo)
\`not code\` \`not code`
\* not a list \* not a list
\# not a heading \# not a heading
\[foo\]: /url "not a reference" \[foo]: /url "not a reference"
\&ouml; not a character entity \&ouml; not a character entity
1\. not a list 1\. not a list

View file

@ -13,5 +13,3 @@
##### Header 1.1.1.1.1 ##### Header 1.1.1.1.1
# Another header `with inline` code # Another header `with inline` code

View file

@ -58,7 +58,7 @@ macro_rules! assert_toc {
let chapter = Chapter::from_content(content); let chapter = Chapter::from_content(content);
let result = Toc::add_toc(&chapter, &config); let result = Toc::add_toc(&chapter, &config);
match result { match result {
Ok(result) => assert_eq!(expected.trim_end(), result), Ok(result) => assert_eq!(expected, result),
Err(e) => panic!("{} failed. Error: {}", $name, e), Err(e) => panic!("{} failed. Error: {}", $name, e),
} }
}; };
@ -114,7 +114,7 @@ fn unique_slugs() {
#[test] #[test]
fn add_toc_with_github_marker() { fn add_toc_with_github_marker() {
let marker = "* auto-gen TOC:\n{:toc}".to_owned(); let marker = "* auto-gen TOC:\n{:toc}\n".to_owned();
assert_toc!("github_marker", with_marker(marker)); assert_toc!("github_marker", with_marker(marker));
} }

View file

@ -7,14 +7,9 @@
* [Level 1.2.1](#level-121) * [Level 1.2.1](#level-121)
## Level 1.1 ## Level 1.1
### Level 1.1.1 ### Level 1.1.1
### Level 1.1.2 ### Level 1.1.2
## Level 1.2 ## Level 1.2
### Level 1.2.1 ### Level 1.2.1
text text

View file

@ -1,5 +1,6 @@
# Heading # Heading
| Head 1 | Head 2 | | Head 1 | Head 2 |
|------|------| |--------|--------|
| Row 1 | Row 2 | | Row 1 | Row 2 |

View file

@ -1,5 +1,5 @@
# Heading # Heading
| Head 1 | Head 2 | | Head 1 | Head 2 |
|------|------| |--------|--------|
| <span>Row 1</span> | Row 2 | | <span>Row 1</span> | Row 2 |