515 lines
16 KiB
Python
515 lines
16 KiB
Python
|
"""Generic blocks extension."""
|
||
|
from markdown import Extension
|
||
|
from markdown.blockprocessors import BlockProcessor
|
||
|
from markdown.treeprocessors import Treeprocessor
|
||
|
from markdown import util as mutil
|
||
|
from .. import util
|
||
|
import xml.etree.ElementTree as etree
|
||
|
import re
|
||
|
import yaml
|
||
|
import textwrap
|
||
|
|
||
|
# Fenced block placeholder for SuperFences
|
||
|
FENCED_BLOCK_RE = re.compile(
|
||
|
r'^([\> ]*){}({}){}$'.format(
|
||
|
mutil.HTML_PLACEHOLDER[0],
|
||
|
mutil.HTML_PLACEHOLDER[1:-1] % r'([0-9]+)',
|
||
|
mutil.HTML_PLACEHOLDER[-1]
|
||
|
)
|
||
|
)
|
||
|
|
||
|
# Block start/end
|
||
|
RE_START = re.compile(
|
||
|
r'(?:^|\n)[ ]{0,3}(/{3,})[ ]*([\w-]+)[ ]*(?:\|[ ]*(.*?)[ ]*)?(?:\n|$)'
|
||
|
)
|
||
|
|
||
|
RE_END = re.compile(
|
||
|
r'(?m)(?:^|\n)[ ]{0,3}(/{3,})[ ]*(?:\n|$)'
|
||
|
)
|
||
|
|
||
|
# Frontmatter patterns
|
||
|
RE_YAML_START = re.compile(r'(?m)^[ ]{0,3}(-{3})[ ]*(?:\n|$)')
|
||
|
|
||
|
RE_YAML_END = re.compile(
|
||
|
r'(?m)^[ ]{0,3}(-{3})[ ]*(?:\n|$)'
|
||
|
)
|
||
|
|
||
|
RE_INDENT_YAML_LINE = re.compile(r'(?m)^(?:[ ]{4,}(?!\s).*?(?:\n|$))+')
|
||
|
|
||
|
|
||
|
class BlockEntry:
|
||
|
"""Track Block entries."""
|
||
|
|
||
|
def __init__(self, block, el, parent):
|
||
|
"""Block entry."""
|
||
|
|
||
|
self.block = block
|
||
|
self.el = el
|
||
|
self.parent = parent
|
||
|
self.hungry = False
|
||
|
|
||
|
|
||
|
def get_frontmatter(string):
|
||
|
"""
|
||
|
Get frontmatter from string.
|
||
|
|
||
|
YAML-ish key value pairs.
|
||
|
"""
|
||
|
|
||
|
frontmatter = None
|
||
|
|
||
|
try:
|
||
|
frontmatter = yaml.safe_load(string)
|
||
|
if frontmatter is None:
|
||
|
frontmatter = {}
|
||
|
if not isinstance(frontmatter, dict):
|
||
|
frontmatter = None
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
return frontmatter
|
||
|
|
||
|
|
||
|
def reindent(text, pos, level):
|
||
|
"""Reindent the code to where it is supposed to be."""
|
||
|
|
||
|
indented = []
|
||
|
for line in text.split('\n'):
|
||
|
index = pos - level
|
||
|
indented.append(line[index:])
|
||
|
return indented
|
||
|
|
||
|
|
||
|
def unescape_markdown(md, blocks, is_raw):
|
||
|
"""Look for SuperFences code placeholders and other HTML stash placeholders and revert them back to plain text."""
|
||
|
|
||
|
superfences = None
|
||
|
try:
|
||
|
from ..superfences import SuperFencesBlockPreprocessor
|
||
|
processor = md.preprocessors['fenced_code_block']
|
||
|
if isinstance(processor, SuperFencesBlockPreprocessor):
|
||
|
superfences = processor.extension
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
new_blocks = []
|
||
|
for block in blocks:
|
||
|
new_lines = []
|
||
|
for line in block.split('\n'):
|
||
|
m = FENCED_BLOCK_RE.match(line)
|
||
|
if m:
|
||
|
key = m.group(2)
|
||
|
|
||
|
# Extract SuperFences content
|
||
|
indent_level = len(m.group(1))
|
||
|
original = None
|
||
|
if superfences is not None:
|
||
|
original, pos = superfences.stash.get(key, (None, None))
|
||
|
if original is not None:
|
||
|
code = reindent(original, pos, indent_level)
|
||
|
new_lines.extend(code)
|
||
|
superfences.stash.remove(key)
|
||
|
|
||
|
# Extract other HTML stashed content
|
||
|
if original is None and is_raw:
|
||
|
index = int(key.split(':')[1])
|
||
|
if index < len(md.htmlStash.rawHtmlBlocks):
|
||
|
original = md.htmlStash.rawHtmlBlocks[index]
|
||
|
if isinstance(original, etree.Element):
|
||
|
original = etree.tostring(original, encoding='unicode', method='html')
|
||
|
new_lines.append(original)
|
||
|
|
||
|
# Couldn't find anything to extract
|
||
|
if original is None: # pragma: no cover
|
||
|
new_lines.append(line)
|
||
|
else:
|
||
|
new_lines.append(line)
|
||
|
new_blocks.append('\n'.join(new_lines))
|
||
|
|
||
|
return new_blocks
|
||
|
|
||
|
|
||
|
class BlocksTreeprocessor(Treeprocessor):
|
||
|
"""Blocks tree processor."""
|
||
|
|
||
|
def __init__(self, md, blocks):
|
||
|
"""Initialize."""
|
||
|
|
||
|
super().__init__(md)
|
||
|
|
||
|
self.blocks = blocks
|
||
|
|
||
|
def run(self, doc):
|
||
|
"""Update tab IDs."""
|
||
|
|
||
|
while self.blocks.inline_stack:
|
||
|
entry = self.blocks.inline_stack.pop(0)
|
||
|
entry.block.on_inline_end(entry.el)
|
||
|
|
||
|
|
||
|
class BlocksProcessor(BlockProcessor):
|
||
|
"""Generic block processor."""
|
||
|
|
||
|
def __init__(self, parser, md):
|
||
|
"""Initialization."""
|
||
|
|
||
|
self.md = md
|
||
|
|
||
|
# The Block classes indexable by name
|
||
|
self.blocks = {}
|
||
|
self.config = {}
|
||
|
self.empty_tags = {'hr',}
|
||
|
self.block_level_tags = set(md.block_level_elements.copy())
|
||
|
self.block_level_tags.add('html')
|
||
|
|
||
|
# Block-level tags in which the content only gets span level parsing
|
||
|
self.span_tags = {
|
||
|
'address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th'
|
||
|
}
|
||
|
# Block-level tags which never get their content parsed.
|
||
|
self.raw_tags = {'canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea', 'code'}
|
||
|
# Block-level tags in which the content gets parsed as blocks
|
||
|
self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
|
||
|
self.span_and_blocks_tags = self.block_tags | self.span_tags
|
||
|
|
||
|
super().__init__(parser)
|
||
|
|
||
|
# Persistent storage across a document for blocks
|
||
|
self.trackers = {}
|
||
|
# Currently queued up blocks
|
||
|
self.stack = []
|
||
|
# Blocks that should be processed after inline.
|
||
|
self.inline_stack = []
|
||
|
# When set, the assigned block is actively parsing blocks.
|
||
|
self.working = None
|
||
|
# Cached the found parent when testing
|
||
|
# so we can quickly retrieve it when running
|
||
|
self.cached_parent = None
|
||
|
self.cached_block = None
|
||
|
|
||
|
# Used during the alpha/beta stage
|
||
|
self.start = RE_START
|
||
|
self.end = RE_END
|
||
|
self.yaml_line = RE_INDENT_YAML_LINE
|
||
|
|
||
|
def register(self, b, config):
|
||
|
"""Register a block."""
|
||
|
|
||
|
if b.NAME in self.blocks:
|
||
|
raise ValueError(f'The block name {b.NAME} is already registered!')
|
||
|
self.blocks[b.NAME] = b
|
||
|
self.config[b.NAME] = config
|
||
|
|
||
|
def test(self, parent, block):
|
||
|
"""Test to see if we should process the block."""
|
||
|
|
||
|
# Are we hungry for more?
|
||
|
if self.get_parent(parent) is not None:
|
||
|
return True
|
||
|
|
||
|
# Is this the start of a new block?
|
||
|
m = self.start.search(block)
|
||
|
if m:
|
||
|
|
||
|
pre_text = block[:m.start()] if m.start() > 0 else None
|
||
|
|
||
|
# Create a block object
|
||
|
name = m.group(2).lower()
|
||
|
if name in self.blocks:
|
||
|
generic_block = self.blocks[name](len(m.group(1)), self.trackers[name], self, self.config[name])
|
||
|
|
||
|
# Remove first line
|
||
|
block = block[m.end():]
|
||
|
|
||
|
# Get frontmatter and argument(s)
|
||
|
options, the_rest = self.split_header(block, generic_block.length)
|
||
|
arguments = m.group(3)
|
||
|
|
||
|
# Options must be valid
|
||
|
status = options is not None
|
||
|
|
||
|
# Update the config for the Block
|
||
|
if status:
|
||
|
status = generic_block._validate(parent, arguments, **options)
|
||
|
|
||
|
# Cache the found Block and any remaining content
|
||
|
if status:
|
||
|
self.cached_block = (generic_block, the_rest)
|
||
|
|
||
|
# Any text before the block should get handled
|
||
|
if pre_text is not None:
|
||
|
self.parser.parseBlocks(parent, [pre_text])
|
||
|
|
||
|
return status
|
||
|
return False
|
||
|
|
||
|
def _reset(self):
|
||
|
"""Reset."""
|
||
|
|
||
|
self.stack.clear()
|
||
|
self.inline_stack.clear()
|
||
|
self.working = None
|
||
|
self.trackers = {d: {} for d in self.blocks.keys()}
|
||
|
|
||
|
def split_end(self, blocks, length):
|
||
|
"""Search for end and split the blocks while removing the end."""
|
||
|
|
||
|
good = []
|
||
|
bad = []
|
||
|
end = False
|
||
|
|
||
|
# Split on our end notation for the current Block
|
||
|
for e, block in enumerate(blocks):
|
||
|
|
||
|
# Find the end of the Block
|
||
|
m = None
|
||
|
for match in self.end.finditer(block):
|
||
|
if len(match.group(1)) == length:
|
||
|
m = match
|
||
|
break
|
||
|
|
||
|
# Separate everything from before the "end" and after
|
||
|
if m:
|
||
|
temp = block[:m.start(0)]
|
||
|
if temp:
|
||
|
good.append(temp[:-1] if temp.endswith('\n') else temp)
|
||
|
end = True
|
||
|
|
||
|
# Since we found our end, everything after is unwanted
|
||
|
temp = block[m.end(0):]
|
||
|
if temp:
|
||
|
bad.append(temp)
|
||
|
bad.extend(blocks[e + 1:])
|
||
|
break
|
||
|
else:
|
||
|
# Gather blocks until we find our end
|
||
|
good.append(block)
|
||
|
|
||
|
# Augment the blocks
|
||
|
blocks.clear()
|
||
|
blocks.extend(bad)
|
||
|
|
||
|
# Send back the new list of blocks to parse and note whether we found our end
|
||
|
return good, end
|
||
|
|
||
|
def split_header(self, block, length):
|
||
|
"""Split, YAML-ish header out."""
|
||
|
|
||
|
# Search for end in first block
|
||
|
m = None
|
||
|
blocks = []
|
||
|
for match in self.end.finditer(block):
|
||
|
if len(match.group(1)) == length:
|
||
|
m = match
|
||
|
break
|
||
|
|
||
|
# Move block ending to be parsed later
|
||
|
if m:
|
||
|
end = block[m.start(0):]
|
||
|
blocks.insert(0, end)
|
||
|
block = block[:m.start(0)]
|
||
|
|
||
|
m = self.yaml_line.match(block)
|
||
|
if m is not None:
|
||
|
config = textwrap.dedent(m.group(0))
|
||
|
blocks.insert(0, block[m.end():])
|
||
|
if config.strip():
|
||
|
return get_frontmatter(config), '\n'.join(blocks)
|
||
|
|
||
|
blocks.insert(0, block)
|
||
|
|
||
|
return {}, '\n'.join(blocks)
|
||
|
|
||
|
def get_parent(self, parent):
|
||
|
"""Get parent."""
|
||
|
|
||
|
# Returned the cached parent from our last attempt
|
||
|
if self.cached_parent is not None:
|
||
|
parent = self.cached_parent
|
||
|
self.cached_parent = None
|
||
|
return parent
|
||
|
|
||
|
temp = parent
|
||
|
while temp is not None:
|
||
|
for entry in self.stack:
|
||
|
if entry.hungry and entry.parent is temp:
|
||
|
self.cached_parent = temp
|
||
|
return temp
|
||
|
if temp is not None:
|
||
|
temp = self.lastChild(temp)
|
||
|
return None
|
||
|
|
||
|
def is_raw(self, tag):
|
||
|
"""Is tag raw."""
|
||
|
|
||
|
return tag.tag in self.raw_tags
|
||
|
|
||
|
def is_block(self, tag):
|
||
|
"""Is tag block."""
|
||
|
|
||
|
return tag.tag in self.block_tags
|
||
|
|
||
|
def parse_blocks(self, blocks, entry):
|
||
|
"""Parse the blocks."""
|
||
|
|
||
|
# Get the target element and parse
|
||
|
|
||
|
for b in blocks:
|
||
|
target = entry.block.on_add(entry.el)
|
||
|
|
||
|
# The Block does not or no longer accepts more content
|
||
|
if target is None: # pragma: no cover
|
||
|
break
|
||
|
|
||
|
mode = entry.block.on_markdown()
|
||
|
if mode not in ('block', 'inline', 'raw'):
|
||
|
mode = 'auto'
|
||
|
is_block = mode == 'block' or (mode == 'auto' and self.is_block(target))
|
||
|
is_atomic = mode == 'raw' or (mode == 'auto' and self.is_raw(target))
|
||
|
|
||
|
# We should revert fenced code in spans or atomic tags.
|
||
|
# Make sure atomic tags have content wrapped as `AtomicString`.
|
||
|
if is_atomic or not is_block:
|
||
|
child = list(target)[-1] if len(target) else None
|
||
|
text = target.text if child is None else child.tail
|
||
|
b = '\n\n'.join(unescape_markdown(self.md, [b], is_atomic))
|
||
|
|
||
|
if text:
|
||
|
text += '\n\n' + b
|
||
|
else:
|
||
|
text = b
|
||
|
|
||
|
if child is None:
|
||
|
target.text = mutil.AtomicString(text) if is_atomic else text
|
||
|
else: # pragma: no cover
|
||
|
# TODO: We would need to build a special plugin to test this,
|
||
|
# as none of the default ones do this, but we have verified this
|
||
|
# locally. Once we've written a test, we can remove this.
|
||
|
child.tail = mutil.AtomicString(text) if is_atomic else text
|
||
|
|
||
|
# Block tags should have content go through the normal block processor
|
||
|
else:
|
||
|
self.parser.state.set('blocks')
|
||
|
working = self.working
|
||
|
self.working = entry
|
||
|
self.parser.parseChunk(target, b)
|
||
|
self.parser.state.reset()
|
||
|
self.working = working
|
||
|
|
||
|
def run(self, parent, blocks):
|
||
|
"""Convert to details/summary block."""
|
||
|
|
||
|
# Get the appropriate parent for this Block
|
||
|
temp = self.get_parent(parent)
|
||
|
if temp is not None:
|
||
|
parent = temp
|
||
|
|
||
|
# Did we find a new Block?
|
||
|
if self.cached_block:
|
||
|
# Get cached Block and reset the cache
|
||
|
generic_block, block = self.cached_block
|
||
|
self.cached_block = None
|
||
|
|
||
|
# Discard first block as we've already processed what we need from it
|
||
|
blocks.pop(0)
|
||
|
if block:
|
||
|
blocks.insert(0, block)
|
||
|
|
||
|
# Ensure a "tight" parent list item is converted to "loose".
|
||
|
if parent is not None and parent.tag in ('li', 'dd'): # pragma: no cover
|
||
|
text = parent.text
|
||
|
if parent.text:
|
||
|
parent.text = ''
|
||
|
p = etree.SubElement(parent, 'p')
|
||
|
p.text = text
|
||
|
|
||
|
# Create the block element
|
||
|
el = generic_block._create(parent)
|
||
|
|
||
|
# Push a Block entry on the stack.
|
||
|
self.stack.append(BlockEntry(generic_block, el, parent))
|
||
|
|
||
|
# Split out blocks we care about
|
||
|
ours, end = self.split_end(blocks, generic_block.length)
|
||
|
|
||
|
# Parse the text blocks under the Block
|
||
|
index = len(self.stack) - 1
|
||
|
self.parse_blocks(ours, self.stack[-1])
|
||
|
|
||
|
# Remove Block from the stack if we are at the end
|
||
|
# or add it to the hungry list.
|
||
|
if end:
|
||
|
# Run the "on end" event
|
||
|
generic_block._end(el)
|
||
|
self.inline_stack.append(self.stack[index])
|
||
|
del self.stack[index]
|
||
|
else:
|
||
|
self.stack[index].hungry = True
|
||
|
|
||
|
else:
|
||
|
for r in range(len(self.stack)):
|
||
|
entry = self.stack[r]
|
||
|
if entry.hungry and parent is entry.parent:
|
||
|
# Find and remove end from the blocks
|
||
|
ours, end = self.split_end(blocks, entry.block.length)
|
||
|
|
||
|
# Get the target element and parse
|
||
|
entry.hungry = False
|
||
|
self.parse_blocks(ours, entry)
|
||
|
|
||
|
# Clean up if we completed the Block
|
||
|
if end:
|
||
|
# Run "on end" event
|
||
|
entry.block._end(entry.el)
|
||
|
self.inline_stack.append(entry)
|
||
|
del self.stack[r]
|
||
|
else:
|
||
|
entry.hungry = True
|
||
|
|
||
|
break
|
||
|
|
||
|
|
||
|
class BlocksMgrExtension(Extension):
|
||
|
"""Add generic Blocks extension."""
|
||
|
|
||
|
def extendMarkdown(self, md):
|
||
|
"""Add Blocks to Markdown instance."""
|
||
|
|
||
|
md.registerExtension(self)
|
||
|
util.escape_chars(md, ['/'])
|
||
|
self.extension = BlocksProcessor(md.parser, md)
|
||
|
# We want to be right after list indentations are processed
|
||
|
md.parser.blockprocessors.register(self.extension, "blocks", 89.99)
|
||
|
|
||
|
tree = BlocksTreeprocessor(md, self.extension)
|
||
|
md.treeprocessors.register(tree, 'blocks_on_inline_end', 19.99)
|
||
|
|
||
|
def reset(self):
|
||
|
"""Reset."""
|
||
|
|
||
|
self.extension._reset()
|
||
|
|
||
|
|
||
|
class BlocksExtension(Extension):
|
||
|
"""Blocks Extension."""
|
||
|
|
||
|
def register_block_mgr(self, md):
|
||
|
"""Add Blocks to Markdown instance."""
|
||
|
|
||
|
if 'blocks' not in md.parser.blockprocessors:
|
||
|
ext = BlocksMgrExtension()
|
||
|
ext.extendMarkdown(md)
|
||
|
mgr = ext.extension
|
||
|
else:
|
||
|
mgr = md.parser.blockprocessors['blocks']
|
||
|
return mgr
|
||
|
|
||
|
def extendMarkdown(self, md):
|
||
|
"""Extend markdown."""
|
||
|
|
||
|
mgr = self.register_block_mgr(md)
|
||
|
self.extendMarkdownBlocks(md, mgr)
|
||
|
|
||
|
def extendMarkdownBlocks(self, md, block_mgr):
|
||
|
"""Extend Markdown blocks."""
|