From 984004115864610ac52bda6726e53be209b9831c Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Fri, 22 Apr 2022 23:41:41 +0200 Subject: [PATCH] Strip markdown tags before indexing --- index.sh | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/index.sh b/index.sh index f45d165..8f5b570 100755 --- a/index.sh +++ b/index.sh @@ -21,5 +21,37 @@ PRAGMA foreign_keys = ON; " >/dev/null markdown-to-sqlite "$DATABASE_PATH" posts _posts/* + +sqlite-utils convert "$DATABASE_PATH" posts text ' +from markdown import Markdown +import io + +__md = None + +def define(): + global __md + def unmark_element(element, stream=None): + global io + if stream is None: + stream = io.StringIO() + if element.text: + stream.write(element.text) + for sub in element: + unmark_element(sub, stream) + if element.tail: + stream.write(element.tail) + return stream.getvalue() + + Markdown.output_formats["plain"] = unmark_element + __md = Markdown(output_format="plain") + __md.stripTopLevelTags = False + +define() + +def convert(text): + global __md + return __md.convert(text) +' + sqlite-utils enable-fts --fts5 "$DATABASE_PATH" posts title text 2>/dev/null || true sqlite-utils rebuild-fts "$DATABASE_PATH" posts