diff --git a/Cargo.lock b/Cargo.lock index ecbbbfe..1b12f4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -845,8 +845,7 @@ dependencies = [ "log", "mdbook", "pretty_assertions", - "pulldown-cmark 0.8.0", - "pulldown-cmark-to-cmark", + "pulldown-cmark 0.9.1", "serde_json", "toml", ] @@ -1205,9 +1204,9 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" +checksum = "34f197a544b0c9ab3ae46c359a7ec9cbbb5c7bf97054266fecb7ead794a181d6" dependencies = [ "bitflags", "getopts", @@ -1215,15 +1214,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "pulldown-cmark-to-cmark" -version = "6.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95048382115a9da7be92ad51c84064d585b7da17472dcaa7f5eed8853c4c3707" -dependencies = [ - "pulldown-cmark 0.8.0", -] - [[package]] name = "quick-error" version = "1.2.3" diff --git a/Cargo.toml b/Cargo.toml index 6538455..68adc9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,7 @@ edition = "2018" [dependencies] mdbook = "0.4.10" -pulldown-cmark = "0.8.0" -pulldown-cmark-to-cmark = "6.0.2" -env_logger = "0.8.4" +pulldown-cmark = "0.9.1" log = "0.4.11" clap = "2.33.3" serde_json = "1.0.57" @@ -20,3 +18,4 @@ toml = "0.5.6" [dev-dependencies] pretty_assertions = "0.6.1" +env_logger = "0.8.4" diff --git a/src/lib.rs b/src/lib.rs index d292c33..c4d11b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,6 @@ use mdbook::errors::{Error, Result}; use mdbook::preprocess::{Preprocessor, PreprocessorContext}; use pulldown_cmark::Tag::*; use pulldown_cmark::{Event, Options, Parser}; -use pulldown_cmark_to_cmark::{cmark_with_options, Options as COptions}; use toml::value::Table; pub struct Toc; @@ -110,6 +109,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String { let mut toc_iter = toc.iter().peekable(); // Start from the level of the first header. + let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1); let mut last_lower = match toc_iter.peek() { Some((lvl, _, _)) => *lvl, None => 0, @@ -127,7 +127,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String { }); for (level, name, slug) in toc { - let width = 2 * (level - 1) as usize; + let width = 2 * (level - min_level) as usize; writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap(); } @@ -135,7 +135,6 @@ fn build_toc(toc: &[(u32, String, String)]) -> String { } fn add_toc(content: &str, cfg: &Config) -> Result { - let mut buf = String::with_capacity(content.len()); let mut toc_found = false; let mut toc_content = vec![]; @@ -150,40 +149,41 @@ fn add_toc(content: &str, cfg: &Config) -> Result { opts.insert(Options::ENABLE_TASKLISTS); let mark: Vec = Parser::new(&cfg.marker).collect(); - let mut mark_start = -1; + let mut mark_start = None; + let mut mark_end = 0..0; let mut mark_loc = 0; - let mut c = -1; - for e in Parser::new_ext(&content, opts) { - c += 1; - log::trace!("Event: {:?}", e); + for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() { + log::trace!("Event: {:?} (span: {:?})", e, span); if !toc_found { log::trace!( - "TOC not found yet. Location: {}, Start: {}", + "TOC not found yet. Location: {}, Start: {:?}", mark_loc, mark_start ); if e == mark[mark_loc] { - if mark_start == -1 { - mark_start = c; + if mark_start.is_none() { + mark_start = Some(span.clone()); } mark_loc += 1; if mark_loc >= mark.len() { + mark_end = span; toc_found = true } } else if mark_loc > 0 { mark_loc = 0; - mark_start = -1; + mark_start = None; } else { continue; } } - if let Event::Start(Heading(lvl)) = e { - current_header_level = Some(lvl); + if let Event::Start(Heading(lvl, fragment, classes)) = e { + log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})"); + current_header_level = Some(lvl as u32); continue; } - if let Event::End(Heading(_)) = e { + if let Event::End(Heading(..)) = e { // Skip if this header is nested too deeply. if let Some(level) = current_header_level.take() { let header = current_header.clone(); @@ -219,29 +219,30 @@ fn add_toc(content: &str, cfg: &Config) -> Result { let toc = build_toc(&toc_content); log::trace!("Built TOC: {:?}", toc); - let toc_events = Parser::new(&toc).collect::>(); + log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}"); - let mut c = -1; - let events = Parser::new_ext(&content, opts) - .map(|e| { - c += 1; - if toc_found && c > mark_start && c < mark_start + (mark.len() as i32) { - vec![] - } else if toc_found && c == mark_start { - toc_events.clone() - } else { - vec![e] - } - }) - .flatten(); - - let opts = COptions { - newlines_after_codeblock: 1, - ..Default::default() + let content = if toc_found { + let mark_start = mark_start.unwrap(); + let content_before_toc = &content[0..mark_start.start]; + let content_after_toc = &content[mark_end.end..]; + log::trace!("content_before_toc={:?}", content_before_toc); + log::trace!("content_after_toc={:?}", content_after_toc); + // Multiline markers might have consumed trailing newlines, + // we ensure there's always one before the content. + let extra = if content_after_toc.as_bytes()[0] == b'\n' { + "" + } else { + "\n" + }; + format!( + "{}{}{}{}", + content_before_toc, toc, extra, content_after_toc + ) + } else { + content.to_string() }; - cmark_with_options(events, &mut buf, None, opts) - .map(|_| buf) - .map_err(|err| Error::msg(format!("Markdown serialization failed: {}", err))) + + Ok(content) } impl Toc { diff --git a/tests/adds_toc.out.md b/tests/adds_toc.out.md index d23da60..a0f5312 100644 --- a/tests/adds_toc.out.md +++ b/tests/adds_toc.out.md @@ -18,3 +18,4 @@ ## Header 2.2 ### Header 2.2.1 + diff --git a/tests/backslash_escapes.out.md b/tests/backslash_escapes.out.md index 6be9dec..55fd035 100644 --- a/tests/backslash_escapes.out.md +++ b/tests/backslash_escapes.out.md @@ -1,9 +1,9 @@ -\*not emphasized\* +\*not emphasized* \
not a tag -\[not a link\](/foo) -\`not code\` +\[not a link](/foo) +\`not code` \* not a list \# not a heading -\[foo\]: /url "not a reference" +\[foo]: /url "not a reference" \ö not a character entity 1\. not a list diff --git a/tests/handles_inline_code.in.md b/tests/handles_inline_code.in.md index bf49fa2..bd3181f 100644 --- a/tests/handles_inline_code.in.md +++ b/tests/handles_inline_code.in.md @@ -13,5 +13,3 @@ ##### Header 1.1.1.1.1 # Another header `with inline` code - - diff --git a/tests/it.rs b/tests/it.rs index b0d7e60..474b2dc 100644 --- a/tests/it.rs +++ b/tests/it.rs @@ -58,7 +58,7 @@ macro_rules! assert_toc { let chapter = Chapter::from_content(content); let result = Toc::add_toc(&chapter, &config); match result { - Ok(result) => assert_eq!(expected.trim_end(), result), + Ok(result) => assert_eq!(expected, result), Err(e) => panic!("{} failed. Error: {}", $name, e), } }; @@ -114,7 +114,7 @@ fn unique_slugs() { #[test] fn add_toc_with_github_marker() { - let marker = "* auto-gen TOC:\n{:toc}".to_owned(); + let marker = "* auto-gen TOC:\n{:toc}\n".to_owned(); assert_toc!("github_marker", with_marker(marker)); } diff --git a/tests/multi_header_linear.out.md b/tests/multi_header_linear.out.md index d5afac4..219ff6f 100644 --- a/tests/multi_header_linear.out.md +++ b/tests/multi_header_linear.out.md @@ -7,14 +7,9 @@ * [Level 1.2.1](#level-121) ## Level 1.1 - ### Level 1.1.1 - ### Level 1.1.2 - ## Level 1.2 - ### Level 1.2.1 text - diff --git a/tests/tables_untouched.out.md b/tests/tables_untouched.out.md index 7207799..1d668ed 100644 --- a/tests/tables_untouched.out.md +++ b/tests/tables_untouched.out.md @@ -1,5 +1,6 @@ # Heading -|Head 1|Head 2| -|------|------| -|Row 1|Row 2| +| Head 1 | Head 2 | +|--------|--------| +| Row 1 | Row 2 | + diff --git a/tests/tables_with_html.out.md b/tests/tables_with_html.out.md index 97e5b03..9f13052 100644 --- a/tests/tables_with_html.out.md +++ b/tests/tables_with_html.out.md @@ -1,5 +1,5 @@ # Heading -|Head 1|Head 2| -|------|------| -|Row 1|Row 2| +| Head 1 | Head 2 | +|--------|--------| +| Row 1 | Row 2 |