tangling/translating code¶
tangling code, as presented by donald knuth, converts a document language into a programming language. the original implementation converts ".WEB"
files to valid pascal - ".PAS"
- files. the pidgy
approach begins with [markdown] text
that converts to IPython
.
[Markdown]: #
[Python]: #
import typing, IPython, pidgy.util, ast, textwrap, markdown_it
tangling pidgy
uses block level lexical analysis to separate non-code and code lines of code in an input;
pidgy
does not take any opinion on inline level markdown syntax. the PythonRender
uses the markdown_it
module for parsing markdown; past versions of pidgy
have tried #pandoc, mistune, and mistletoe
. markdown_it
is the preferred parser because it provides line numbers for markdown tokens.
class Tangle(pidgy.compat.markdown.Markdown):
def __init__(self, *args, **kwargs):
kwargs['renderer_cls'] = kwargs.get('renderer_cls', PythonRender)
super().__init__(*args, **kwargs)
[self.block.ruler.before(
"code",
"front_matter",
__import__('functools').partial(pidgy.util.frontMatter, x),
{"alt": ["paragraph", "reference", "blockquote", "list"]},
) for x in "-+"]
self.block.ruler.before(
"reference", "footnote_def", markdown_it.extensions.footnote.index.footnote_def, {"alt": ["paragraph", "reference"]}
)
self.disable('html_block')
the primary goal of the pidgy
lexical analysis to separate non-code and code lines when the markdown is pythonified. both indented block code and
code fences determine the heuristics for entangling the non-code and code strings. while developing pidgy
, we’ve purposefully avoided defining any heuristics for code fenced languages. if author’s prefer they can executed code in pidgy
code fences if no language is supplied.
class Pythonify(pidgy.compat.markdown.Renderer):
QUOTES = '"""', "'''"
def noncode(self, tokens, idx, env):
token, range, prior = None, slice(None), slice(*tokens[-1].map)
if idx < len(tokens):
token = tokens[idx]
range, prior = slice(*tokens[idx].map), slice(*tokens[idx-1].map) if idx else slice(0,0)
non_code = pidgy.util.dedent_block(''.join(env['src'][prior.stop:range.start]))
non_code = self.indent(self.hanging_indent(non_code, env), env)
if not env.get('quoted', False):
non_code = self.quote(non_code, trailing=';' if token is None else '')
return non_code
def code_block(self, tokens, idx, options, env):
code = self.noncode(tokens, idx, env) + pidgy.util.quote_docstrings(self.token_to_str(tokens, idx, env))
return self.update_env(code, tokens, idx, env) or code
def fence(self, tokens, idx, options, env):
"We'll only recieve fences without a lang."
code = self.noncode(tokens, idx, env) + textwrap.indent(
pidgy.util.quote_docstrings(pidgy.util.unfence(self.token_to_str(tokens, idx, env))), ' '*4
)
return self.update_env(code, tokens, idx, env) or code
def update_env(self, code, tokens, idx, env):
next = self.get_next_code_token(tokens, idx)
env.update(base_indent=pidgy.util.trailing_indent(code))
extra_indent = 0
if next:
extra_indent = max(0, pidgy.util.lead_indent(env['src'][slice(*next.map)]) -env['base_indent'])
if not extra_indent and code.rstrip().endswith(":"):
extra_indent += 4
rstrip = code.rstrip()
env.update(
extra_indent=extra_indent,
continued=rstrip.endswith('\\'),
quoted=rstrip.rstrip('\\').endswith(self.QUOTES)
)
pidgy
includes special affordances affordances for common notation like front matter, footnotes as annotations, and bulleted lists.
class PythonRender(Pythonify):
def front_matter(self, tokens, idx, options, env):
token, code = tokens[idx], self.token_to_str(tokens, idx, env)
if token.markup == '+++':
code = F'''locals().update(__import__('toml').loads("""{code}""".partition('+++')[2].rpartition('+++')[0]))\n'''
elif token.markup == '---':
code = F'''locals().update(__import__('ruamel.yaml').yaml.safe_load("""{code}""".partition('---')[2].rpartition('---')[0]))\n'''
return self.indent(code, env)
def reference(self, tokens, idx, options, env, *, re='link_item'):
token, code = tokens[idx], self.token_to_str(tokens, idx, env)
if env['quoted']:
return code
expr = "{"+F"""x.group(1): x.group(2).rstrip() for x in __import__('pidgy').util.{re}.finditer({
self.quote(textwrap.dedent(code), trailing=")}").rstrip()
}"""
if not env['continued']:
expr = """locals()["__annotations__"] = {**%s, **locals().get('__annotations__', {})}"""%expr
code = self.noncode(tokens, idx, env) + self.indent(expr + "\n", env)
return code
def footnote_reference_open(self, tokens, idx, options, env):
return self.reference(tokens, idx, options, env, re='footnote_item')
def bullet_list_open(self, tokens, idx, options, env):
token, code = tokens[idx], self.token_to_str(tokens, idx, env)
if env['quoted']:
return code
if env['continued']:
return self.indent(
(F"""[x.group().rstrip().partition(' ')[2] for x in __import__('pidgy').util.list_item.finditer({
self.quote(textwrap.dedent(code), trailing=')]')
}\n"""), env)
code = self.quote(textwrap.dedent(code), trailing=';')
code = self.indent(self.hanging_indent(code, env), env)
return code
ordered_list_open = bullet_list_open
tangle
is a public function for tangling markdown to python.
def tangle(str:str)->str:
translate = Tangle()
return translate.render(''.join(str or []))
pidgy
interfaces with IPython
as an input transform manager trait.
class pidgyManager(pidgy.base.Trait, IPython.core.inputtransformer2.TransformerManager):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tangle = Tangle()
def transform_cell(self, cell):
if self.enabled:
cell = self.tangle.render(cell)
return super(type(self), self).transform_cell(cell)
more langauge features¶
pidgy
experiments extra language features for python, using the same system
that IPython uses to add features like line and cell magics.
Recently, IPython introduced a convention that allows top level await statements outside of functions. Building of this convenience, pidgy
allows for top-level return and yield statements. These statements are replaced with the an IPython display statement.
class ExtraSyntax(ast.NodeTransformer):
def visit_FunctionDef(self, node): return node
visit_AsyncFunctionDef = visit_FunctionDef
def visit_Return(self, node):
replace = ast.parse('''__import__('IPython').display.display()''').body[0]
replace.value.args = node.value.elts if isinstance(node.value, ast.Tuple) else [node.value]
return ast.copy_location(replace, node)
def visit_Expr(self, node):
if isinstance(node.value, (ast.Yield, ast.YieldFrom)): return ast.copy_location(self.visit_Return(node.value), node)
return node
visit_Expression = visit_Expr
We know naming is hard, there is no point focusing on it. pidgy
allows authors
to use emojis as variables in python. They add extra color and expression to the narrative.
def demojize(lines, delimiters=('_', '_')):
str = ''.join(lines or [])
import tokenize, emoji, stringcase; tokens = []
try:
for token in list(tokenize.tokenize(
__import__('io').BytesIO(str.encode()).readline)):
if token.type == tokenize.ERRORTOKEN:
string = emoji.demojize(token.string, delimiters=delimiters
).replace('-', '_').replace("’", "_")
if tokens and tokens[-1].type == tokenize.NAME: tokens[-1] = tokenize.TokenInfo(tokens[-1].type, tokens[-1].string + string, tokens[-1].start, tokens[-1].end, tokens[-1].line)
else: tokens.append(
tokenize.TokenInfo(
tokenize.NAME, string, token.start, token.end, token.line))
else: tokens.append(token)
return tokenize.untokenize(tokens).decode()
except BaseException: ...
return ''.join(lines)