markdown_it and pidgy
compatability¶
from pidgy import util
import textwrap, markdown_it
The Markdown
class maintains the operational logic to transform partially coded [Markdown] into fully coded Python. The translation happens in two steps:
Markdown.parse
/lex/tokenize the input string to tokens identifying Markdown blocks.Markdown.render
the tokens into the target language format.
class Markdown(markdown_it.MarkdownIt):
def parse(self, src, env=None, normalize=False):
src = enforce_blanklines(src)
if env is None:
env = markdown_it.utils.AttrDict()
env.update(src=src.splitlines(True))
tokens = super().parse(src, env)
if normalize: tokens = reconfigure_tokens(filter_tangle_tokens(tokens), env)
return tokens
def render(self, src, env=None):
if env is None:
env = markdown_it.utils.AttrDict()
return super().render(src, env)
class Renderer:
__output__ = "html"
__init__ = markdown_it.renderer.RendererHTML.__init__
def render(self, tokens, options, env):
return "".join(env["src"])
def quote(self, str, trailing=''):
"""Wrap a truple block quotations."""
quote, length = self.QUOTES[self.QUOTES[0] in str], len(str)
left, right = length - len(str.lstrip()), len(str.rstrip())
if not str[left:right].strip(): return str
if str[right-1] == '\\':
while str[right-1] == '\\':
right -= 1
else:
if str[left:right].endswith(quote[0]):
quote = {"'''": '"""', '"""': "'''"}[quote]
return str[:left] + quote + str[left:right] + quote + trailing + str[right:]
def measure_base_indent(self, tokens, env):
next = self.get_next_code_token(tokens, -1)
if next and next.type == 'code_block':
env['base_indent'] = lead_indent(env['src'][slice(*next.map)])
else:
env['base_indent'] = 4
def get_next_code_token(self, tokens, idx):
for token in tokens[idx+1:]:
if token.type in {'code_block'}:
return token
def hanging_indent(self, str, env):
start = len(str)-len(str.lstrip())
return str[:start] + ' '* env['extra_indent'] + str[start:]
def indent(self, str, env):
return textwrap.indent(str, ' ' *env['base_indent'])
def token_to_str(self, tokens, idx, env):
if idx < len(tokens):
if tokens[idx] and tokens[idx].map:
return ''.join(env['src'][slice(*tokens[idx].map)])
return ""
def update_env(self, code, tokens, idx, env):
next = self.get_next_code_token(tokens, idx)
extra_indent = 0
if next:
extra_indent = max(0, lead_indent(env['src'][slice(*next.map)]) -env['base_indent'])
if not extra_indent and code.rstrip().endswith(":"):
extra_indent += 4
rstrip = code.rstrip()
env.update(
extra_indent=extra_indent,
base_indent=util.trailing_indent(code),
continued=rstrip.endswith('\\'),
quoted=rstrip.rstrip('\\').endswith(self.QUOTES)
)
def render(self, tokens, options, env):
env.update(base_indent=0, quoted=False, extra_indent=0, continued=False)
tokens = reconfigure_tokens(filter_tangle_tokens(tokens), env)
self.measure_base_indent(tokens, env)
if not tokens:
return self.quote(''.join(env['src']), trailing=';')
return textwrap.dedent(continuation(
markdown_it.renderer.RendererHTML.render(self, tokens, options, env), env
) + "\n" + self.noncode(tokens, len(tokens), env)).rstrip() + '\n'
utility functions for parsing and rendering¶
import doctest
CODE_TYPES = "fence code_block front_matter bullet_list_open ordered_list_open footnote_reference_open reference".split()
def lead_indent(str):
"""Count the lead indent of a string"""
if not isinstance(str, list):
str = str.splitlines(True)
for line in str:
if line.strip():
return len(line) - len(line.lstrip())
return 0
def filter_tangle_tokens(token, code=None):
"""Filter out tokens that reference a potential coded object."""
code = code or []
if isinstance(token, list):
for token in token:
code = filter_tangle_tokens(token, code)
elif token.children:
for token in token.children:
code = filter_tangle_tokens(token, code)
else:
if token.type in CODE_TYPES:
if token.type == "code_block":
while token.content.lstrip().startswith(">>> "):
start, end = next(doctest.DocTestParser._EXAMPLE_RE.finditer(token.content)).span()
token.map[0] += len(token.content[:end].splitlines())
token.content = token.content[end:]
if not token.content.strip(): return code
if token not in code:
code.append(token)
if code and (code[-1].type == "fence") and code[-1].info:
code.pop(-1)
return code or [
markdown_it.utils.AttrDict(type="code_block", content="", map=(0, 0))
]
def make_reference_tokens(env, *tokens):
"""Turn references in the markdown_it environment to tokens."""
for reference in env.get("references", {}).values():
if not tokens:
tokens += (markdown_it.token.Token("reference", "", 1),)
tokens[-1].map = reference["map"]
continue
for line in env["src"][tokens[-1].map[1] : reference["map"][0]]:
if line.strip():
tokens += (markdown_it.token.Token("reference", "", 1),)
tokens[-1].map = reference["map"]
break
else:
tokens[-1].map[1] = reference["map"][1]
tokens[-1].content = "".join(env["src"][slice(*tokens[-1].map)])
return [recontent(x, env) for x in tokens if int.__sub__(*x.map)]
def recontent(token, env):
"""Update the content on a call."""
token.content = "".join(env["src"][slice(*token.map)])
return token
def reconfigure_tokens(tokens, env):
"""Tokens are miss ordered, this function splits and orders cells."""
tokens = sorted(tokens + make_reference_tokens(env), key=lambda x: x.map[0])
new = tokens and [tokens[0]] or []
for token in tokens[1:]:
if token.map[0] < new[-1].map[1]:
new.extend([token, __import__('copy').deepcopy(new[-1])])
new[-3].map[1], new[-1].map[0] = token.map
for i in [-3, -1]:
(
new.pop(i)
if int.__sub__(*new[i].map) == 0
else recontent(new[i], env)
)
continue
new.append(token)
return [x for x in new if int.__sub__(*x.map)]
def continuation(str, env):
"""Extend a line ending with a continuation."""
lines, continuing = str.splitlines(), False
for i, line in enumerate(lines):
if line.strip():
continuing = line.endswith("\\")
elif continuing:
lines[i] = " " * env["base_indent"] + "\\"
return "\n".join(lines)
def enforce_blanklines(str):
"""Make sure blank lines are blank."""
str = "".join(
line if line.strip() else "\n" for line in "".join(str).splitlines(True)
)
if not str.endswith("\n"):
str += "\n"
return str
def quote_docstrings(str):
next, end = "", 0
for m in doctest.DocTestParser._EXAMPLE_RE.finditer(str):
next += str[slice(end, m.start())] + quote(
str[slice(m.start(), m.end())], trailing=";"
)
end = m.end()
if next:
next += str[m.end() :]
return next or str
def unfence(str):
"""Remove code fences froma string."""
return "".join("".join(str.split("```", 1)).rsplit("```", 1))
def dedent_block(str):
"""Dedent a block of non code."""
str = textwrap.dedent(str)
lines = str.splitlines(True)
for i, line in enumerate(lines):
if line.strip():
lines[i] = textwrap.dedent(line)
break
return "".join(lines)