`markdown_it and pidgy` compatability¶

    from pidgy import util
    import textwrap, markdown_it

The Markdown class maintains the operational logic to transform partially coded [Markdown] into fully coded Python. The translation happens in two steps:

Markdown.parse/lex/tokenize the input string to tokens identifying Markdown blocks.
Markdown.render the tokens into the target language format.

    class Markdown(markdown_it.MarkdownIt):
        def parse(self, src, env=None, normalize=False):
            src = enforce_blanklines(src)
            if env is None:
                env = markdown_it.utils.AttrDict()
            env.update(src=src.splitlines(True))
            tokens = super().parse(src, env)
            if normalize: tokens = reconfigure_tokens(filter_tangle_tokens(tokens), env)
            return tokens
        def render(self, src, env=None):                
            if env is None:
                env  = markdown_it.utils.AttrDict()
            return super().render(src, env)

    class Renderer:        
        __output__ = "html"
        __init__ = markdown_it.renderer.RendererHTML.__init__

        def render(self, tokens, options, env):
            return "".join(env["src"])

        def quote(self, str, trailing=''):
            """Wrap a truple block quotations."""
            quote, length = self.QUOTES[self.QUOTES[0] in str], len(str)
            left, right = length - len(str.lstrip()), len(str.rstrip())
            if not str[left:right].strip(): return str
            if str[right-1] == '\\':
                while str[right-1] == '\\':
                    right -= 1
            else:
                if str[left:right].endswith(quote[0]):
                    quote = {"'''": '"""', '"""': "'''"}[quote]
            return str[:left] + quote + str[left:right] + quote + trailing + str[right:]

        def measure_base_indent(self, tokens, env):
            next = self.get_next_code_token(tokens, -1)
            if next and next.type == 'code_block':
                env['base_indent'] = lead_indent(env['src'][slice(*next.map)])
            else:
                env['base_indent'] = 4
                
        def get_next_code_token(self, tokens, idx):
            for token in tokens[idx+1:]:
                if token.type in {'code_block'}:
                    return token
        
        def hanging_indent(self, str, env):
            start = len(str)-len(str.lstrip())
            return str[:start] + ' '* env['extra_indent'] + str[start:]
        
        def indent(self, str, env):
            return textwrap.indent(str, ' ' *env['base_indent'])


        def token_to_str(self, tokens, idx, env):
            if idx < len(tokens):
                if tokens[idx] and tokens[idx].map:
                    return ''.join(env['src'][slice(*tokens[idx].map)])
            return ""
        
        def update_env(self, code, tokens, idx, env):
            next = self.get_next_code_token(tokens, idx)
            extra_indent = 0
            if next:
                extra_indent = max(0, lead_indent(env['src'][slice(*next.map)]) -env['base_indent'])
            if not extra_indent and code.rstrip().endswith(":"):
                extra_indent += 4
            rstrip = code.rstrip()
            env.update(
                extra_indent=extra_indent,
                base_indent=util.trailing_indent(code),
                continued=rstrip.endswith('\\'), 
                quoted=rstrip.rstrip('\\').endswith(self.QUOTES)
            )
        def render(self, tokens, options, env):
            env.update(base_indent=0, quoted=False, extra_indent=0, continued=False)
            tokens = reconfigure_tokens(filter_tangle_tokens(tokens), env)
            self.measure_base_indent(tokens, env)
            if not tokens:
                return self.quote(''.join(env['src']), trailing=';')
            return textwrap.dedent(continuation(
                markdown_it.renderer.RendererHTML.render(self, tokens, options, env), env
            ) + "\n" + self.noncode(tokens, len(tokens), env)).rstrip() + '\n'        

utility functions for parsing and rendering¶

    import doctest

    CODE_TYPES = "fence code_block front_matter bullet_list_open ordered_list_open footnote_reference_open reference".split()

    def lead_indent(str):
        """Count the lead indent of a string"""
        if not isinstance(str, list):
            str = str.splitlines(True)
        for line in str:
            if line.strip():
                return len(line) - len(line.lstrip())
        return 0



    def filter_tangle_tokens(token, code=None):
        """Filter out tokens that reference a potential coded object."""
        code = code or []
        if isinstance(token, list):
            for token in token:
                code = filter_tangle_tokens(token, code)
        elif token.children:
            for token in token.children:
                code = filter_tangle_tokens(token, code)
        else:
            if token.type in CODE_TYPES:
                if token.type == "code_block":
                    while token.content.lstrip().startswith(">>> "):
                        start, end = next(doctest.DocTestParser._EXAMPLE_RE.finditer(token.content)).span()
                        token.map[0] += len(token.content[:end].splitlines())
                        token.content = token.content[end:]
                    if not token.content.strip(): return code
                if token not in code:
                    code.append(token)
            if code and (code[-1].type == "fence") and code[-1].info:
                code.pop(-1)
        return code or [
            markdown_it.utils.AttrDict(type="code_block", content="", map=(0, 0))
        ]


    def make_reference_tokens(env, *tokens):
        """Turn references in the markdown_it environment to tokens."""
        for reference in env.get("references", {}).values():
            if not tokens:
                tokens += (markdown_it.token.Token("reference", "", 1),)
                tokens[-1].map = reference["map"]
                continue
            for line in env["src"][tokens[-1].map[1] : reference["map"][0]]:
                if line.strip():
                    tokens += (markdown_it.token.Token("reference", "", 1),)
                    tokens[-1].map = reference["map"]
                    break
            else:
                tokens[-1].map[1] = reference["map"][1]

            tokens[-1].content = "".join(env["src"][slice(*tokens[-1].map)])

        return [recontent(x, env) for x in tokens if int.__sub__(*x.map)]


    def recontent(token, env):
        """Update the content on a call."""
        token.content = "".join(env["src"][slice(*token.map)])
        return token


    def reconfigure_tokens(tokens, env):
        """Tokens are miss ordered, this function splits and orders cells."""
        tokens = sorted(tokens + make_reference_tokens(env), key=lambda x: x.map[0])
        new = tokens and [tokens[0]] or []
        for token in tokens[1:]:
            if token.map[0] < new[-1].map[1]:
                new.extend([token, __import__('copy').deepcopy(new[-1])])
                new[-3].map[1], new[-1].map[0] = token.map

                for i in [-3, -1]:
                    (
                        new.pop(i)
                        if int.__sub__(*new[i].map) == 0
                        else recontent(new[i], env)
                    )
                continue
            new.append(token)

        return [x for x in new if int.__sub__(*x.map)]


    def continuation(str, env):
        """Extend a line ending with a continuation."""
        lines, continuing = str.splitlines(), False
        for i, line in enumerate(lines):
            if line.strip():
                continuing = line.endswith("\\")
            elif continuing:
                lines[i] = " " * env["base_indent"] + "\\"
        return "\n".join(lines)

    def enforce_blanklines(str):
        """Make sure blank lines are blank."""
        str = "".join(
            line if line.strip() else "\n" for line in "".join(str).splitlines(True)
        )
        if not str.endswith("\n"):
            str += "\n"
        return str

    def quote_docstrings(str):
        next, end = "", 0
        for m in doctest.DocTestParser._EXAMPLE_RE.finditer(str):
            next += str[slice(end, m.start())] + quote(
                str[slice(m.start(), m.end())], trailing=";"
            )
            end = m.end()
        if next:
            next += str[m.end() :]
        return next or str


    def unfence(str):
        """Remove code fences froma string."""
        return "".join("".join(str.split("```", 1)).rsplit("```", 1))


    def dedent_block(str):
        """Dedent a block of non code."""
        str = textwrap.dedent(str)
        lines = str.splitlines(True)
        for i, line in enumerate(lines):
            if line.strip():
                lines[i] = textwrap.dedent(line)
                break
        return "".join(lines)

pidgy metasyntax Test pidgy.tangle