Skip to content

Commit

Permalink
✨ NEW: Add plugin & tests to render subscripts
Browse files Browse the repository at this point in the history
  • Loading branch information
miteshashar committed Sep 29, 2024
1 parent 2236898 commit f6a71eb
Show file tree
Hide file tree
Showing 5 changed files with 284 additions and 0 deletions.
6 changes: 6 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ html_string = md.render("some *Markdown*")
.. autofunction:: mdit_py_plugins.amsmath.amsmath_plugin
```

## Subscripts

```{eval-rst}
.. autofunction:: mdit_py_plugins.subscript.sub_plugin
```

## MyST plugins

`myst_blocks` and `myst_role` plugins are also available, for utilisation by the [MyST renderer](https://myst-parser.readthedocs.io/en/latest/using/syntax.html)
Expand Down
117 changes: 117 additions & 0 deletions mdit_py_plugins/subscript/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""
Markdown-it-py plugin to introduce <sub> markup using ~subscript~.
Ported from
https://github.com/markdown-it/markdown-it-sub/blob/master/index.mjs
Originally ported during implementation of https://github.com/hasgeek/funnel/blob/main/funnel/utils/markdown/mdit_plugins/sub_tag.py
"""

from __future__ import annotations

from collections.abc import Sequence
import re

from markdown_it import MarkdownIt
from markdown_it.renderer import RendererHTML
from markdown_it.rules_inline import StateInline
from markdown_it.token import Token
from markdown_it.utils import EnvType, OptionsDict

__all__ = ["sub_plugin"]

TILDE_CHAR = "~"

WHITESPACE_RE = re.compile(r"(^|[^\\])(\\\\)*\s")
UNESCAPE_RE = re.compile(r'\\([ \\!"#$%&\'()*+,.\/:;<=>?@[\]^_`{|}~-])')


def tokenize(state: StateInline, silent: bool) -> bool:
"""Parse a ~subscript~ token."""
start = state.pos
ch = state.src[start]
maximum = state.posMax
found = False

# Don't run any pairs in validation mode
if silent:
return False

if ch != TILDE_CHAR:
return False

if start + 2 >= maximum:
return False

state.pos = start + 1

while state.pos < maximum:
if state.src[state.pos] == TILDE_CHAR:
found = True
break
state.md.inline.skipToken(state)

if not found or start + 1 == state.pos:
state.pos = start
return False

content = state.src[start + 1 : state.pos]

# Don't allow unescaped spaces/newlines inside
if WHITESPACE_RE.search(content) is not None:
state.pos = start
return False

# Found a valid pair, so update posMax and pos
state.posMax = state.pos
state.pos = start + 1

# Earlier we checked "not silent", but this implementation does not need it
token = state.push("sub_open", "sub", 1)
token.markup = TILDE_CHAR

token = state.push("text", "", 0)
token.content = UNESCAPE_RE.sub(r"\1", content)

token = state.push("sub_close", "sub", -1)
token.markup = TILDE_CHAR

state.pos = state.posMax + 1
state.posMax = maximum
return True


def sub_open(
renderer: RendererHTML,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
"""Render the opening tag for a ~subscript~ token."""
return "<sub>"


def sub_close(
renderer: RendererHTML,
tokens: Sequence[Token],
idx: int,
options: OptionsDict,
env: EnvType,
) -> str:
"""Render the closing tag for a ~subscript~ token."""
return "</sub>"


def sub_plugin(md: MarkdownIt) -> None:
"""
Markdown-it-py plugin to introduce <sub> markup using ~subscript~.
Ported from
https://github.com/markdown-it/markdown-it-sub/blob/master/index.mjs
Originally ported during implementation of https://github.com/hasgeek/funnel/blob/main/funnel/utils/markdown/mdit_plugins/sub_tag.py
"""
md.inline.ruler.after("emphasis", "sub", tokenize)
md.add_render_rule("sub_open", sub_open)
md.add_render_rule("sub_close", sub_close)
8 changes: 8 additions & 0 deletions mdit_py_plugins/subscript/port.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- package: markdown-it-sub
commit: 422e93885b3c611234d602aa795f3d75a62cc93e
date: 5 Dec 2023
version: 3.0.0
changes:
- TODO - Strikethroughs within a subscript are not rendered correctly in
markdown-it either, but that can be fixed at a later stage, perhaps
in both markdown-it and markdown-it-py
130 changes: 130 additions & 0 deletions tests/fixtures/subscript.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
.
~foo\~
.
<p>~foo~</p>
.

.
~foo bar~
.
<p>~foo bar~</p>
.

.
~foo\ bar\ baz~
.
<p><sub>foo bar baz</sub></p>
.

.
~\ foo\ ~
.
<p><sub> foo </sub></p>
.

.
~foo\\\\\\\ bar~
.
<p><sub>foo\\\ bar</sub></p>
.

.
~foo\\\\\\ bar~
.
<p>~foo\\\ bar~</p>
.

.
**~foo~ bar**
.
<p><strong><sub>foo</sub> bar</strong></p>
.


coverage
.
*~f
.
<p>*~f</p>
.

Basic:
.
H~2~O
.
<p>H<sub>2</sub>O</p>
.

Spaces:
.
H~2 O~2
.
<p>H~2 O~2</p>
.

Escaped:
.
H\~2\~O
.
<p>H~2~O</p>
.

Nested:
.
a~b~c~d~e
.
<p>a<sub>b</sub>c<sub>d</sub>e</p>
.

Strikethrough versus subscript:
.
~~strikethrough~~ versus ~subscript~
.
<p><s>strikethrough</s> versus <sub>subscript</sub></p>
.

Subscript in strikethrough (beginning):
.
~~~subscript~ in the beginning within a strikethrough is perceived as first line of a code block and hence ignored~~
.
<pre><code class="language-subscript~"></code></pre>
.
Strikethrough in subscript (beginning):
.
~~~strikethrough~ in the beginning within a subscript is perceived as first line of a code block and hence ignored~~
.
<pre><code class="language-strikethrough~"></code></pre>
.
Subscript in strikethrough (end):
.
~~strikethrough contains ~subscript~~~
.
<p><s>strikethrough contains <sub>subscript</sub></s></p>
.

Strikethrough in subscript (end):
.
~subscript contains ~~strikethrough~~~
TODO: This is not rendered correctly in markdown-it either, but can be fixed
.
<p>~subscript contains <s>strikethrough</s>~
TODO: This is not rendered correctly in markdown-it either, but can be fixed</p>
.

Subscript in strikethrough:
.
~~strikethrough with ~subscript~ text~~
.
<p><s>strikethrough with <sub>subscript</sub> text</s></p>
.

Strikethrough in subscript:
.
~subscript contains ~~strikethrough~~ text~
TODO: This is not rendered correctly in markdown-it either, but can be fixed
.
<p>~subscript contains <s>strikethrough</s> text~
TODO: This is not rendered correctly in markdown-it either, but can be fixed</p>
.
23 changes: 23 additions & 0 deletions tests/test_subscript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Tests for subscript plugin."""

from pathlib import Path

from markdown_it import MarkdownIt
from markdown_it.utils import read_fixture_file
import pytest

from mdit_py_plugins.subscript import sub_plugin

FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures", "subscript.md")


@pytest.mark.parametrize("line,title,input,expected", read_fixture_file(FIXTURE_PATH))
def test_all(line, title, input, expected):
"""Tests for subscript plugin."""
md = MarkdownIt("commonmark").enable("strikethrough").use(sub_plugin)
text = md.render(input)
try:
assert text.rstrip() == expected.rstrip()
except AssertionError:
print(text)
raise

0 comments on commit f6a71eb

Please sign in to comment.