1
Fork 0

Avoid roundtripping through pulldown-cmark

Roundtripping markdown is actually quite hard.
We don't actually require that.
All we need is once parsing the markdown to find the right marker and
the headings.
We then manually generate markdown and all other content can be copied
unparsed again.
This commit is contained in:
Jan-Erik Rediger 2022-01-25 20:57:43 +01:00
parent e759070dc8
commit adde0c8cfb
10 changed files with 56 additions and 71 deletions

16
Cargo.lock generated
View file

@ -845,8 +845,7 @@ dependencies = [
"log",
"mdbook",
"pretty_assertions",
"pulldown-cmark 0.8.0",
"pulldown-cmark-to-cmark",
"pulldown-cmark 0.9.1",
"serde_json",
"toml",
]
@ -1205,9 +1204,9 @@ dependencies = [
[[package]]
name = "pulldown-cmark"
version = "0.8.0"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
checksum = "34f197a544b0c9ab3ae46c359a7ec9cbbb5c7bf97054266fecb7ead794a181d6"
dependencies = [
"bitflags",
"getopts",
@ -1215,15 +1214,6 @@ dependencies = [
"unicase",
]
[[package]]
name = "pulldown-cmark-to-cmark"
version = "6.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95048382115a9da7be92ad51c84064d585b7da17472dcaa7f5eed8853c4c3707"
dependencies = [
"pulldown-cmark 0.8.0",
]
[[package]]
name = "quick-error"
version = "1.2.3"

View file

@ -10,9 +10,7 @@ edition = "2018"
[dependencies]
mdbook = "0.4.10"
pulldown-cmark = "0.8.0"
pulldown-cmark-to-cmark = "6.0.2"
env_logger = "0.8.4"
pulldown-cmark = "0.9.1"
log = "0.4.11"
clap = "2.33.3"
serde_json = "1.0.57"
@ -20,3 +18,4 @@ toml = "0.5.6"
[dev-dependencies]
pretty_assertions = "0.6.1"
env_logger = "0.8.4"

View file

@ -8,7 +8,6 @@ use mdbook::errors::{Error, Result};
use mdbook::preprocess::{Preprocessor, PreprocessorContext};
use pulldown_cmark::Tag::*;
use pulldown_cmark::{Event, Options, Parser};
use pulldown_cmark_to_cmark::{cmark_with_options, Options as COptions};
use toml::value::Table;
pub struct Toc;
@ -110,6 +109,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
let mut toc_iter = toc.iter().peekable();
// Start from the level of the first header.
let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
let mut last_lower = match toc_iter.peek() {
Some((lvl, _, _)) => *lvl,
None => 0,
@ -127,7 +127,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
});
for (level, name, slug) in toc {
let width = 2 * (level - 1) as usize;
let width = 2 * (level - min_level) as usize;
writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap();
}
@ -135,7 +135,6 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
}
fn add_toc(content: &str, cfg: &Config) -> Result<String> {
let mut buf = String::with_capacity(content.len());
let mut toc_found = false;
let mut toc_content = vec![];
@ -150,40 +149,41 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
opts.insert(Options::ENABLE_TASKLISTS);
let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
let mut mark_start = -1;
let mut mark_start = None;
let mut mark_end = 0..0;
let mut mark_loc = 0;
let mut c = -1;
for e in Parser::new_ext(&content, opts) {
c += 1;
log::trace!("Event: {:?}", e);
for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
log::trace!("Event: {:?} (span: {:?})", e, span);
if !toc_found {
log::trace!(
"TOC not found yet. Location: {}, Start: {}",
"TOC not found yet. Location: {}, Start: {:?}",
mark_loc,
mark_start
);
if e == mark[mark_loc] {
if mark_start == -1 {
mark_start = c;
if mark_start.is_none() {
mark_start = Some(span.clone());
}
mark_loc += 1;
if mark_loc >= mark.len() {
mark_end = span;
toc_found = true
}
} else if mark_loc > 0 {
mark_loc = 0;
mark_start = -1;
mark_start = None;
} else {
continue;
}
}
if let Event::Start(Heading(lvl)) = e {
current_header_level = Some(lvl);
if let Event::Start(Heading(lvl, fragment, classes)) = e {
log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})");
current_header_level = Some(lvl as u32);
continue;
}
if let Event::End(Heading(_)) = e {
if let Event::End(Heading(..)) = e {
// Skip if this header is nested too deeply.
if let Some(level) = current_header_level.take() {
let header = current_header.clone();
@ -219,29 +219,30 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
let toc = build_toc(&toc_content);
log::trace!("Built TOC: {:?}", toc);
let toc_events = Parser::new(&toc).collect::<Vec<_>>();
log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
let mut c = -1;
let events = Parser::new_ext(&content, opts)
.map(|e| {
c += 1;
if toc_found && c > mark_start && c < mark_start + (mark.len() as i32) {
vec![]
} else if toc_found && c == mark_start {
toc_events.clone()
let content = if toc_found {
let mark_start = mark_start.unwrap();
let content_before_toc = &content[0..mark_start.start];
let content_after_toc = &content[mark_end.end..];
log::trace!("content_before_toc={:?}", content_before_toc);
log::trace!("content_after_toc={:?}", content_after_toc);
// Multiline markers might have consumed trailing newlines,
// we ensure there's always one before the content.
let extra = if content_after_toc.as_bytes()[0] == b'\n' {
""
} else {
vec![e]
}
})
.flatten();
let opts = COptions {
newlines_after_codeblock: 1,
..Default::default()
"\n"
};
cmark_with_options(events, &mut buf, None, opts)
.map(|_| buf)
.map_err(|err| Error::msg(format!("Markdown serialization failed: {}", err)))
format!(
"{}{}{}{}",
content_before_toc, toc, extra, content_after_toc
)
} else {
content.to_string()
};
Ok(content)
}
impl Toc {

View file

@ -18,3 +18,4 @@
## Header 2.2
### Header 2.2.1

View file

@ -1,9 +1,9 @@
\*not emphasized\*
\*not emphasized*
\<br/> not a tag
\[not a link\](/foo)
\`not code\`
\[not a link](/foo)
\`not code`
\* not a list
\# not a heading
\[foo\]: /url "not a reference"
\[foo]: /url "not a reference"
\&ouml; not a character entity
1\. not a list

View file

@ -13,5 +13,3 @@
##### Header 1.1.1.1.1
# Another header `with inline` code

View file

@ -58,7 +58,7 @@ macro_rules! assert_toc {
let chapter = Chapter::from_content(content);
let result = Toc::add_toc(&chapter, &config);
match result {
Ok(result) => assert_eq!(expected.trim_end(), result),
Ok(result) => assert_eq!(expected, result),
Err(e) => panic!("{} failed. Error: {}", $name, e),
}
};
@ -114,7 +114,7 @@ fn unique_slugs() {
#[test]
fn add_toc_with_github_marker() {
let marker = "* auto-gen TOC:\n{:toc}".to_owned();
let marker = "* auto-gen TOC:\n{:toc}\n".to_owned();
assert_toc!("github_marker", with_marker(marker));
}

View file

@ -7,14 +7,9 @@
* [Level 1.2.1](#level-121)
## Level 1.1
### Level 1.1.1
### Level 1.1.2
## Level 1.2
### Level 1.2.1
text

View file

@ -1,5 +1,6 @@
# Heading
| Head 1 | Head 2 |
|------|------|
|--------|--------|
| Row 1 | Row 2 |

View file

@ -1,5 +1,5 @@
# Heading
| Head 1 | Head 2 |
|------|------|
|--------|--------|
| <span>Row 1</span> | Row 2 |