1
Fork 0

Strip markdown tags before indexing

This commit is contained in:
Jan-Erik Rediger 2022-04-22 23:41:41 +02:00
parent 7a907f3c18
commit 9840041158

View file

@ -21,5 +21,37 @@ PRAGMA foreign_keys = ON;
" >/dev/null " >/dev/null
markdown-to-sqlite "$DATABASE_PATH" posts _posts/* markdown-to-sqlite "$DATABASE_PATH" posts _posts/*
sqlite-utils convert "$DATABASE_PATH" posts text '
from markdown import Markdown
import io
__md = None
def define():
global __md
def unmark_element(element, stream=None):
global io
if stream is None:
stream = io.StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()
Markdown.output_formats["plain"] = unmark_element
__md = Markdown(output_format="plain")
__md.stripTopLevelTags = False
define()
def convert(text):
global __md
return __md.convert(text)
'
sqlite-utils enable-fts --fts5 "$DATABASE_PATH" posts title text 2>/dev/null || true sqlite-utils enable-fts --fts5 "$DATABASE_PATH" posts title text 2>/dev/null || true
sqlite-utils rebuild-fts "$DATABASE_PATH" posts sqlite-utils rebuild-fts "$DATABASE_PATH" posts