333 lines
9.6 KiB
Python
333 lines
9.6 KiB
Python
"""
|
|
General utilities.
|
|
|
|
MIT license.
|
|
|
|
Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
|
|
"""
|
|
from markdown.inlinepatterns import InlineProcessor
|
|
import xml.etree.ElementTree as etree
|
|
from collections import namedtuple
|
|
import sys
|
|
import copy
|
|
import re
|
|
import html
|
|
from urllib.request import pathname2url, url2pathname
|
|
from urllib.parse import urlparse
|
|
from functools import wraps
|
|
import warnings
|
|
|
|
RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
|
|
RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
|
|
RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
|
|
RE_WIN_DEFAULT_PROTOCOL = re.compile(r"^///[A-Za-z]:(?:/.*)?$")
|
|
|
|
if sys.platform.startswith('win'):
|
|
_PLATFORM = "windows"
|
|
elif sys.platform == "darwin": # pragma: no cover
|
|
_PLATFORM = "osx"
|
|
else:
|
|
_PLATFORM = "linux"
|
|
|
|
PY39 = (3, 9) <= sys.version_info
|
|
|
|
|
|
def is_win(): # pragma: no cover
|
|
"""Is Windows."""
|
|
|
|
return _PLATFORM == "windows"
|
|
|
|
|
|
def is_linux(): # pragma: no cover
|
|
"""Is Linux."""
|
|
|
|
return _PLATFORM == "linux"
|
|
|
|
|
|
def is_mac(): # pragma: no cover
|
|
"""Is macOS."""
|
|
|
|
return _PLATFORM == "osx"
|
|
|
|
|
|
def url2path(path):
|
|
"""Path to URL."""
|
|
|
|
return url2pathname(path)
|
|
|
|
|
|
def path2url(url):
|
|
"""URL to path."""
|
|
|
|
path = pathname2url(url)
|
|
# If on windows, replace the notation to use a default protocol `///` with nothing.
|
|
if is_win() and RE_WIN_DEFAULT_PROTOCOL.match(path):
|
|
path = path.replace('///', '', 1)
|
|
return path
|
|
|
|
|
|
def get_code_points(s):
|
|
"""Get the Unicode code points."""
|
|
|
|
return list(s)
|
|
|
|
|
|
def get_ord(c):
|
|
"""Get Unicode ord."""
|
|
|
|
return ord(c)
|
|
|
|
|
|
def get_char(value):
|
|
"""Get the Unicode char."""
|
|
|
|
return chr(value)
|
|
|
|
|
|
def escape_chars(md, echrs):
|
|
"""
|
|
Add chars to the escape list.
|
|
|
|
Don't just append as it modifies the global list permanently.
|
|
Make a copy and extend **that** copy so that only this Markdown
|
|
instance gets modified.
|
|
"""
|
|
|
|
escaped = copy.copy(md.ESCAPED_CHARS)
|
|
for ec in echrs:
|
|
if ec not in escaped:
|
|
escaped.append(ec)
|
|
md.ESCAPED_CHARS = escaped
|
|
|
|
|
|
def parse_url(url):
|
|
"""
|
|
Parse the URL.
|
|
|
|
Try to determine if the following is a file path or
|
|
(as we will call anything else) a URL.
|
|
|
|
We return it slightly modified and combine the path parts.
|
|
|
|
We also assume if we see something like c:/ it is a Windows path.
|
|
We don't bother checking if this **is** a Windows system, but
|
|
'nix users really shouldn't be creating weird names like c: for their folder.
|
|
"""
|
|
|
|
is_url = False
|
|
is_absolute = False
|
|
scheme, netloc, path, params, query, fragment = urlparse(html.unescape(url))
|
|
|
|
if RE_URL.match(scheme):
|
|
# Clearly a URL
|
|
is_url = True
|
|
elif scheme == '' and netloc == '' and path == '':
|
|
# Maybe just a URL fragment
|
|
is_url = True
|
|
elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
|
|
# file://c:/path or file://c:\path
|
|
path = '/' + (netloc + path).replace('\\', '/')
|
|
netloc = ''
|
|
is_absolute = True
|
|
elif scheme == 'file' and netloc.startswith('\\'):
|
|
# file://\c:\path or file://\\path
|
|
path = (netloc + path).replace('\\', '/')
|
|
netloc = ''
|
|
is_absolute = True
|
|
elif scheme == 'file':
|
|
# file:///path
|
|
is_absolute = True
|
|
elif RE_WIN_DRIVE_LETTER.match(scheme):
|
|
# c:/path
|
|
path = '/{}:{}'.format(scheme, path.replace('\\', '/'))
|
|
scheme = 'file'
|
|
netloc = ''
|
|
is_absolute = True
|
|
elif scheme == '' and netloc != '' and url.startswith('//'):
|
|
# //file/path
|
|
path = '//' + netloc + path
|
|
scheme = 'file'
|
|
netloc = ''
|
|
is_absolute = True
|
|
elif scheme != '' and netloc != '':
|
|
# A non-file path or strange URL
|
|
is_url = True
|
|
elif path.startswith(('/', '\\')):
|
|
# /root path
|
|
is_absolute = True
|
|
|
|
return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
|
|
|
|
|
|
class PatSeqItem(namedtuple('PatSeqItem', ['pattern', 'builder', 'tags', 'full_recursion'])):
|
|
"""Pattern sequence item item."""
|
|
|
|
def __new__(cls, pattern, builder, tags, full_recursion=False):
|
|
"""Create object."""
|
|
|
|
return super().__new__(cls, pattern, builder, tags, full_recursion)
|
|
|
|
|
|
class PatternSequenceProcessor(InlineProcessor):
|
|
"""Processor for handling complex nested patterns such as strong and em matches."""
|
|
|
|
PATTERNS = []
|
|
|
|
def build_single(self, m, tag, full_recursion, idx):
|
|
"""Return single tag."""
|
|
el1 = etree.Element(tag)
|
|
text = m.group(2)
|
|
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
|
|
return el1
|
|
|
|
def build_double(self, m, tags, full_recursion, idx):
|
|
"""Return double tag."""
|
|
|
|
tag1, tag2 = tags.split(",")
|
|
el1 = etree.Element(tag1)
|
|
el2 = etree.Element(tag2)
|
|
text = m.group(2)
|
|
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
|
|
el1.append(el2)
|
|
if len(m.groups()) == 3:
|
|
text = m.group(3)
|
|
self.parse_sub_patterns(text, el1, el2, full_recursion, idx)
|
|
return el1
|
|
|
|
def build_double2(self, m, tags, full_recursion, idx):
|
|
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
|
|
|
|
tag1, tag2 = tags.split(",")
|
|
el1 = etree.Element(tag1)
|
|
el2 = etree.Element(tag2)
|
|
text = m.group(2)
|
|
self.parse_sub_patterns(text, el1, None, full_recursion, idx)
|
|
text = m.group(3)
|
|
el1.append(el2)
|
|
self.parse_sub_patterns(text, el2, None, full_recursion, idx)
|
|
return el1
|
|
|
|
def parse_sub_patterns(self, data, parent, last, full_recursion, idx):
|
|
"""
|
|
Parses sub patterns.
|
|
|
|
`data` (`str`):
|
|
text to evaluate.
|
|
|
|
`parent` (`etree.Element`):
|
|
Parent to attach text and sub elements to.
|
|
|
|
`last` (`etree.Element`):
|
|
Last appended child to parent. Can also be None if parent has no children.
|
|
|
|
`idx` (`int`):
|
|
Current pattern index that was used to evaluate the parent.
|
|
|
|
"""
|
|
|
|
offset = 0
|
|
pos = 0
|
|
|
|
length = len(data)
|
|
while pos < length:
|
|
# Find the start of potential emphasis or strong tokens
|
|
if self.compiled_re.match(data, pos):
|
|
matched = False
|
|
# See if the we can match an emphasis/strong pattern
|
|
for index, item in enumerate(self.PATTERNS):
|
|
# Only evaluate patterns that are after what was used on the parent
|
|
if not full_recursion and index <= idx:
|
|
continue
|
|
m = item.pattern.match(data, pos)
|
|
if m:
|
|
# Append child nodes to parent
|
|
# Text nodes should be appended to the last
|
|
# child if present, and if not, it should
|
|
# be added as the parent's text node.
|
|
text = data[offset:m.start(0)]
|
|
if text:
|
|
if last is not None:
|
|
last.tail = text
|
|
else:
|
|
parent.text = text
|
|
el = self.build_element(m, item.builder, item.tags, item.full_recursion, index)
|
|
parent.append(el)
|
|
last = el
|
|
# Move our position past the matched hunk
|
|
offset = pos = m.end(0)
|
|
matched = True
|
|
if not matched:
|
|
# We matched nothing, move on to the next character
|
|
pos += 1
|
|
else:
|
|
# Increment position as no potential emphasis start was found.
|
|
pos += 1
|
|
|
|
# Append any leftover text as a text node.
|
|
text = data[offset:]
|
|
if text:
|
|
if last is not None:
|
|
last.tail = text
|
|
else:
|
|
parent.text = text
|
|
|
|
def build_element(self, m, builder, tags, full_recursion, index):
|
|
"""Element builder."""
|
|
|
|
if builder == 'double2':
|
|
return self.build_double2(m, tags, full_recursion, index)
|
|
elif builder == 'double':
|
|
return self.build_double(m, tags, full_recursion, index)
|
|
else:
|
|
return self.build_single(m, tags, full_recursion, index)
|
|
|
|
def handleMatch(self, m, data):
|
|
"""Parse patterns."""
|
|
|
|
el = None
|
|
start = None
|
|
end = None
|
|
|
|
for index, item in enumerate(self.PATTERNS):
|
|
m1 = item.pattern.match(data, m.start(0))
|
|
if m1:
|
|
start = m1.start(0)
|
|
end = m1.end(0)
|
|
el = self.build_element(m1, item.builder, item.tags, item.full_recursion, index)
|
|
break
|
|
return el, start, end
|
|
|
|
|
|
def deprecated(message, stacklevel=2): # pragma: no cover
|
|
"""
|
|
Raise a `DeprecationWarning` when wrapped function/method is called.
|
|
|
|
Usage:
|
|
|
|
@deprecated("This method will be removed in version X; use Y instead.")
|
|
def some_method()"
|
|
pass
|
|
"""
|
|
|
|
def _wrapper(func):
|
|
@wraps(func)
|
|
def _deprecated_func(*args, **kwargs):
|
|
warnings.warn(
|
|
f"'{func.__name__}' is deprecated. {message}",
|
|
category=DeprecationWarning,
|
|
stacklevel=stacklevel
|
|
)
|
|
return func(*args, **kwargs)
|
|
return _deprecated_func
|
|
return _wrapper
|
|
|
|
|
|
def warn_deprecated(message, stacklevel=2): # pragma: no cover
|
|
"""Warn deprecated."""
|
|
|
|
warnings.warn(
|
|
message,
|
|
category=DeprecationWarning,
|
|
stacklevel=stacklevel
|
|
)
|