Avoid roundtripping through pulldown-cmark
Roundtripping markdown is actually quite hard. We don't actually require that. All we need is once parsing the markdown to find the right marker and the headings. We then manually generate markdown and all other content can be copied unparsed again.
This commit is contained in:
parent
e759070dc8
commit
adde0c8cfb
16
Cargo.lock
generated
16
Cargo.lock
generated
|
@ -845,8 +845,7 @@ dependencies = [
|
|||
"log",
|
||||
"mdbook",
|
||||
"pretty_assertions",
|
||||
"pulldown-cmark 0.8.0",
|
||||
"pulldown-cmark-to-cmark",
|
||||
"pulldown-cmark 0.9.1",
|
||||
"serde_json",
|
||||
"toml",
|
||||
]
|
||||
|
@ -1205,9 +1204,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "pulldown-cmark"
|
||||
version = "0.8.0"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
|
||||
checksum = "34f197a544b0c9ab3ae46c359a7ec9cbbb5c7bf97054266fecb7ead794a181d6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"getopts",
|
||||
|
@ -1215,15 +1214,6 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark-to-cmark"
|
||||
version = "6.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95048382115a9da7be92ad51c84064d585b7da17472dcaa7f5eed8853c4c3707"
|
||||
dependencies = [
|
||||
"pulldown-cmark 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.2.3"
|
||||
|
|
|
@ -10,9 +10,7 @@ edition = "2018"
|
|||
|
||||
[dependencies]
|
||||
mdbook = "0.4.10"
|
||||
pulldown-cmark = "0.8.0"
|
||||
pulldown-cmark-to-cmark = "6.0.2"
|
||||
env_logger = "0.8.4"
|
||||
pulldown-cmark = "0.9.1"
|
||||
log = "0.4.11"
|
||||
clap = "2.33.3"
|
||||
serde_json = "1.0.57"
|
||||
|
@ -20,3 +18,4 @@ toml = "0.5.6"
|
|||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "0.6.1"
|
||||
env_logger = "0.8.4"
|
||||
|
|
71
src/lib.rs
71
src/lib.rs
|
@ -8,7 +8,6 @@ use mdbook::errors::{Error, Result};
|
|||
use mdbook::preprocess::{Preprocessor, PreprocessorContext};
|
||||
use pulldown_cmark::Tag::*;
|
||||
use pulldown_cmark::{Event, Options, Parser};
|
||||
use pulldown_cmark_to_cmark::{cmark_with_options, Options as COptions};
|
||||
use toml::value::Table;
|
||||
|
||||
pub struct Toc;
|
||||
|
@ -110,6 +109,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
|
|||
let mut toc_iter = toc.iter().peekable();
|
||||
|
||||
// Start from the level of the first header.
|
||||
let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
|
||||
let mut last_lower = match toc_iter.peek() {
|
||||
Some((lvl, _, _)) => *lvl,
|
||||
None => 0,
|
||||
|
@ -127,7 +127,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
|
|||
});
|
||||
|
||||
for (level, name, slug) in toc {
|
||||
let width = 2 * (level - 1) as usize;
|
||||
let width = 2 * (level - min_level) as usize;
|
||||
writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap();
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,6 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
|
|||
}
|
||||
|
||||
fn add_toc(content: &str, cfg: &Config) -> Result<String> {
|
||||
let mut buf = String::with_capacity(content.len());
|
||||
let mut toc_found = false;
|
||||
|
||||
let mut toc_content = vec![];
|
||||
|
@ -150,40 +149,41 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
|
|||
opts.insert(Options::ENABLE_TASKLISTS);
|
||||
|
||||
let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
|
||||
let mut mark_start = -1;
|
||||
let mut mark_start = None;
|
||||
let mut mark_end = 0..0;
|
||||
let mut mark_loc = 0;
|
||||
let mut c = -1;
|
||||
|
||||
for e in Parser::new_ext(&content, opts) {
|
||||
c += 1;
|
||||
log::trace!("Event: {:?}", e);
|
||||
for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
|
||||
log::trace!("Event: {:?} (span: {:?})", e, span);
|
||||
if !toc_found {
|
||||
log::trace!(
|
||||
"TOC not found yet. Location: {}, Start: {}",
|
||||
"TOC not found yet. Location: {}, Start: {:?}",
|
||||
mark_loc,
|
||||
mark_start
|
||||
);
|
||||
if e == mark[mark_loc] {
|
||||
if mark_start == -1 {
|
||||
mark_start = c;
|
||||
if mark_start.is_none() {
|
||||
mark_start = Some(span.clone());
|
||||
}
|
||||
mark_loc += 1;
|
||||
if mark_loc >= mark.len() {
|
||||
mark_end = span;
|
||||
toc_found = true
|
||||
}
|
||||
} else if mark_loc > 0 {
|
||||
mark_loc = 0;
|
||||
mark_start = -1;
|
||||
mark_start = None;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if let Event::Start(Heading(lvl)) = e {
|
||||
current_header_level = Some(lvl);
|
||||
if let Event::Start(Heading(lvl, fragment, classes)) = e {
|
||||
log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})");
|
||||
current_header_level = Some(lvl as u32);
|
||||
continue;
|
||||
}
|
||||
if let Event::End(Heading(_)) = e {
|
||||
if let Event::End(Heading(..)) = e {
|
||||
// Skip if this header is nested too deeply.
|
||||
if let Some(level) = current_header_level.take() {
|
||||
let header = current_header.clone();
|
||||
|
@ -219,29 +219,30 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
|
|||
|
||||
let toc = build_toc(&toc_content);
|
||||
log::trace!("Built TOC: {:?}", toc);
|
||||
let toc_events = Parser::new(&toc).collect::<Vec<_>>();
|
||||
log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
|
||||
|
||||
let mut c = -1;
|
||||
let events = Parser::new_ext(&content, opts)
|
||||
.map(|e| {
|
||||
c += 1;
|
||||
if toc_found && c > mark_start && c < mark_start + (mark.len() as i32) {
|
||||
vec![]
|
||||
} else if toc_found && c == mark_start {
|
||||
toc_events.clone()
|
||||
let content = if toc_found {
|
||||
let mark_start = mark_start.unwrap();
|
||||
let content_before_toc = &content[0..mark_start.start];
|
||||
let content_after_toc = &content[mark_end.end..];
|
||||
log::trace!("content_before_toc={:?}", content_before_toc);
|
||||
log::trace!("content_after_toc={:?}", content_after_toc);
|
||||
// Multiline markers might have consumed trailing newlines,
|
||||
// we ensure there's always one before the content.
|
||||
let extra = if content_after_toc.as_bytes()[0] == b'\n' {
|
||||
""
|
||||
} else {
|
||||
vec![e]
|
||||
}
|
||||
})
|
||||
.flatten();
|
||||
|
||||
let opts = COptions {
|
||||
newlines_after_codeblock: 1,
|
||||
..Default::default()
|
||||
"\n"
|
||||
};
|
||||
cmark_with_options(events, &mut buf, None, opts)
|
||||
.map(|_| buf)
|
||||
.map_err(|err| Error::msg(format!("Markdown serialization failed: {}", err)))
|
||||
format!(
|
||||
"{}{}{}{}",
|
||||
content_before_toc, toc, extra, content_after_toc
|
||||
)
|
||||
} else {
|
||||
content.to_string()
|
||||
};
|
||||
|
||||
Ok(content)
|
||||
}
|
||||
|
||||
impl Toc {
|
||||
|
|
|
@ -18,3 +18,4 @@
|
|||
## Header 2.2
|
||||
|
||||
### Header 2.2.1
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
\*not emphasized\*
|
||||
\*not emphasized*
|
||||
\<br/> not a tag
|
||||
\[not a link\](/foo)
|
||||
\`not code\`
|
||||
\[not a link](/foo)
|
||||
\`not code`
|
||||
\* not a list
|
||||
\# not a heading
|
||||
\[foo\]: /url "not a reference"
|
||||
\[foo]: /url "not a reference"
|
||||
\ö not a character entity
|
||||
1\. not a list
|
||||
|
|
|
@ -13,5 +13,3 @@
|
|||
##### Header 1.1.1.1.1
|
||||
|
||||
# Another header `with inline` code
|
||||
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ macro_rules! assert_toc {
|
|||
let chapter = Chapter::from_content(content);
|
||||
let result = Toc::add_toc(&chapter, &config);
|
||||
match result {
|
||||
Ok(result) => assert_eq!(expected.trim_end(), result),
|
||||
Ok(result) => assert_eq!(expected, result),
|
||||
Err(e) => panic!("{} failed. Error: {}", $name, e),
|
||||
}
|
||||
};
|
||||
|
@ -114,7 +114,7 @@ fn unique_slugs() {
|
|||
|
||||
#[test]
|
||||
fn add_toc_with_github_marker() {
|
||||
let marker = "* auto-gen TOC:\n{:toc}".to_owned();
|
||||
let marker = "* auto-gen TOC:\n{:toc}\n".to_owned();
|
||||
assert_toc!("github_marker", with_marker(marker));
|
||||
}
|
||||
|
||||
|
|
|
@ -7,14 +7,9 @@
|
|||
* [Level 1.2.1](#level-121)
|
||||
|
||||
## Level 1.1
|
||||
|
||||
### Level 1.1.1
|
||||
|
||||
### Level 1.1.2
|
||||
|
||||
## Level 1.2
|
||||
|
||||
### Level 1.2.1
|
||||
|
||||
text
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Heading
|
||||
|
||||
|Head 1|Head 2|
|
||||
|------|------|
|
||||
|Row 1|Row 2|
|
||||
| Head 1 | Head 2 |
|
||||
|--------|--------|
|
||||
| Row 1 | Row 2 |
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Heading
|
||||
|
||||
|Head 1|Head 2|
|
||||
|------|------|
|
||||
|<span>Row 1</span>|Row 2|
|
||||
| Head 1 | Head 2 |
|
||||
|--------|--------|
|
||||
| <span>Row 1</span> | Row 2 |
|
||||
|
|
Loading…
Reference in a new issue