diff --git a/mkdocs_print_site_plugin/urls.py b/mkdocs_print_site_plugin/urls.py index 3ea202b..d27eb38 100644 --- a/mkdocs_print_site_plugin/urls.py +++ b/mkdocs_print_site_plugin/urls.py @@ -136,12 +136,16 @@ def update_anchor_ids(page_html, page_key): """ # Regex demo / tests: https://regex101.com/r/mlAPNH/1 href_regex = re.compile( - r"\<([h1|h2|h3|h4|h5|h6|sup|li]+).+id=\"([aA-zZ|0-9|\-|\_|\.|\:]+)\"", + r"<([^\s]+).*?id=\"([^\"]*?)\".*?>(.*?)<\/\1>", flags=re.IGNORECASE, ) matches = re.finditer(href_regex, page_html) + change_tags = ["h1", "h2", "h3", "h4", "h5", "h6", "sup", "li"] for m in matches: + tag_text = m.group(1) + if tag_text not in change_tags: + continue id_text = m.group(2) match_text = m.group() new_text = match_text.replace(id_text, page_key + "-" + id_text) @@ -151,6 +155,54 @@ def update_anchor_ids(page_html, page_key): return page_html +def fix_tabbed_content(page_html, page_key): + """ + Tabbed content have and that are linked. + + When combining multiple pages into one, name duplicates occur. + + So in the example: + + + C + + we should change id, and for attribute, to contain the pagekey. + """ + # Replace + href_regex = re.compile( + r"", + flags=re.IGNORECASE, + ) + matches = re.finditer(href_regex, page_html) + for m in matches: + id_text = m.group(1) + match_text = m.group() + new_text = match_text.replace(id_text, page_key + "_" + id_text) + page_html = page_html.replace(match_text, new_text) + + name_text = m.group(2) + match_text = m.group() + new_text = match_text.replace(name_text, page_key + "_" + name_text) + page_html = page_html.replace(match_text, new_text) + + # Replace + href_regex = re.compile( + r"<([^\s]+).*?for=\"([^\"]*?)\".*?>(.*?)<\/\1>", + flags=re.IGNORECASE, + ) + matches = re.finditer(href_regex, page_html) + for m in matches: + tag_text = m.group(1) + if tag_text != "label": + continue + id_text = m.group(2) + match_text = m.group() + new_text = match_text.replace(id_text, page_key + "_" + id_text) + page_html = page_html.replace(match_text, new_text) + + return page_html + + def fix_image_src(page_html, page_url, directory_urls): """ Update img src path for images displayed in print page. @@ -219,6 +271,7 @@ def fix_internal_links(page_html, page_url, directory_urls): page_html = fix_href_links(page_html, page_key, page_url, directory_urls) page_html = update_anchor_ids(page_html, page_key) + page_html = fix_tabbed_content(page_html, page_key) page_html = fix_image_src(page_html, page_url, directory_urls) # Finally, wrap the entire page in a section with an anchor ID diff --git a/tests/fixtures/projects/with_markdown_ext/docs/index.md b/tests/fixtures/projects/with_markdown_ext/docs/index.md index 285ab02..0b8c7d9 100644 --- a/tests/fixtures/projects/with_markdown_ext/docs/index.md +++ b/tests/fixtures/projects/with_markdown_ext/docs/index.md @@ -4,8 +4,25 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent rutrum erat qu Maecenas semper urna ac orci lacinia vestibulum. Donec sed tincidunt quam, in pulvinar velit. Suspendisse tristique lorem augue, non congue eros porta faucibus. Phasellus orci sapien, facilisis vel bibendum ut, ultrices ut erat. Nam in nibh a neque elementum condimentum et non nibh. Phasellus ex dui, pretium vel posuere a, viverra ac diam. Vivamus ipsum felis, placerat id lobortis gravida, mattis ac libero. Integer luctus enim id nibh sollicitudin, quis mattis purus convallis. Ut aliquet sollicitudin mollis. Phasellus quam turpis, sodales sed pulvinar sed, accumsan at felis. Fusce pharetra felis sed condimentum dictum. Quisque sit amet augue vitae felis elementum auctor sit amet eget turpis. Duis dictum, sapien nec semper luctus, orci libero convallis lacus, id ultricies ipsum est sit amet elit. Integer rhoncus erat at ultrices sagittis. +??? optional-class "Summary" + Here's some content. + +??? multiple optional-class "Summary" + Here's some content. + +??? success + Content. + +??? warning classes + Content. + Phasellus commodo volutpat varius. Nulla volutpat id nisi non vulputate. Ut vitae dapibus nulla, nec maximus felis. Vivamus sem leo, mattis vel consequat eget, mollis sit amet justo. Aenean eget laoreet sem, quis vulputate risus. Nunc ac commodo odio, ullamcorper tristique quam. Quisque eros ante, rutrum quis dui vitae, pretium consectetur est. Quisque in urna gravida, molestie quam eget, elementum metus. In leo sapien, posuere eget ante id, bibendum laoreet tellus. Cras tellus eros, congue rhoncus porttitor id, tincidunt id nibh. Nullam eget porta tellus, et gravida orci. In quis aliquet sapien. Aenean rhoncus nisi non magna volutpat egestas. Integer quis ipsum ultrices, feugiat nulla sed, vulputate magna. In sit amet hendrerit metus, et laoreet nulla. Ut neque erat, finibus vitae metus ut, facilisis accumsan risus. Vivamus et rutrum turpis. Quisque ac molestie erat, ut fringilla tortor. Fusce congue gravida sapien, venenatis vulputate purus dictum id. Suspendisse odio lorem, rhoncus id diam eu, euismod fermentum nisi. Mauris eget pretium nunc. Donec at mauris leo. Mauris porta sed purus nec interdum. Donec pretium sit amet turpis eget dignissim. Quisque malesuada orci a purus consequat, vel consectetur massa placerat. +???+ note "Open styled details" + + ??? danger "Nested details!" + And more content again. + Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Phasellus eget justo ut neque iaculis interdum. Donec varius, lectus eu pharetra vulputate, diam sem luctus dui, quis tristique diam neque ac turpis. Fusce porttitor euismod massa, quis accumsan odio tincidunt ac. Sed dictum lorem at magna dignissim congue. Donec ultricies sagittis neque, a blandit quam rutrum eu. In a ligula fermentum, tincidunt nibh vehicula, rutrum erat. Suspendisse blandit malesuada tortor facilisis convallis. Sed placerat rhoncus imperdiet. Cras porta purus vel nulla fermentum, in tristique arcu facilisis. Morbi tristique vitae eros vel tempor. Proin in enim semper risus posuere posuere. Phasellus imperdiet commodo eros sit amet luctus. Aliquam lectus eros, malesuada id vestibulum eget, condimentum eget turpis. Maecenas sollicitudin velit eget elit eleifend mollis. diff --git a/tests/test_urls.py b/tests/test_urls.py index 0c5cddc..28cbf79 100644 --- a/tests/test_urls.py +++ b/tests/test_urls.py @@ -1,21 +1,39 @@ -from mkdocs_print_site_plugin.urls import fix_href_links, update_anchor_ids, fix_image_src, get_page_key, is_external, is_attachment +import pytest +from mkdocs_print_site_plugin.urls import ( + fix_href_links, + update_anchor_ids, + fix_image_src, + get_page_key, + is_external, + is_attachment, +) -def test_get_page_key(): - assert get_page_key('index.html') == 'index' - assert get_page_key('/') =='index' - assert get_page_key('abc/') == 'abc' - assert get_page_key('abc.html') == 'abc' - assert get_page_key('/folder/subfolder/index.html') == "folder-subfolder-index" +def test_get_page_key(): + """ + Test page key. + """ + assert get_page_key("index.html") == "index" + assert get_page_key("/") == "index" + assert get_page_key("abc/") == "abc" + assert get_page_key("abc.html") == "abc" + assert get_page_key("/folder/subfolder/index.html") == "folder-subfolder-index" def test_is_external(): + """ + Test. + """ assert is_external("https://www.google.com") assert not is_external("/index.html") assert not is_external("index.html") + def test_is_attachment(): + """ + Test. + """ assert is_attachment("/file.py") assert is_attachment("../files/file.xlsx") assert not is_attachment("https://www.google.com") @@ -24,7 +42,9 @@ def test_is_attachment(): def test_fix_href_links(): - + """ + Test. + """ html = 'the link' result = 'the link' assert fix_href_links(html, "this_page", "/") == result @@ -61,39 +81,56 @@ def test_fix_href_links(): result = 'page z' assert fix_href_links(html, "this_page", "/Chapter1/Section2/") == result - html = "Wraps the hero teaser (if available)\n\n\nhtmltitle\nWraps the tag\n\n\nlibs\nWraps" + html = "Wraps the hero teaser (if available)\n\n\nhtmltitle\nWraps the tag\n\n\nlibs\nWraps" # noqa result = fix_href_links(html, "this_page", "/") assert result == html -def test_update_anchor_ids(): - +def test_update_anchor_ids_noupdate(): + """ + Test. + """ # Make sure no changes are made + htmls = [ + 'the link', + 'page apage z', + 'page z', + "Wraps the hero teaser (if available)\n\n\nhtmltitle\nWraps the tag\n\n\nlibs\nWraps", # noqa + 'no "id" here', + 'blabla', + 'blabla', + ] + + for html in htmls: + assert update_anchor_ids(html, "this_page") == html + + +@pytest.mark.parametrize("html_element", ["h1", "h2", "h3", "h4", "h5", "h6", "li", "sup"]) +def test_update_anchor_ids(html_element): + """ + Test changing ids. + """ + html = '<%s id="a-section-on-something">A Section on something%s>' % (html_element, html_element) + result = '<%s id="this_page-a-section-on-something">A Section on something%s>' % (html_element, html_element) + assert update_anchor_ids(html, "this_page") == result - html = 'the link' - assert update_anchor_ids(html, "this_page") == html - - html = ' -# Print Site Page -# First example with text surrounded by a red border. This example also has multiple lines. -# -# -# page a -# page z -# anchor on page -# -# text -# Z -# text -# A -# text -# sub one -# text -# sub two -# """ - -# assert 'page a' in fix_internal_links(html, page_url="a/") - -# def sample_html(): -# return """ -# -# Print Site Page -# First example with text surrounded by a red border. This example also has multiple lines. -# -# -# page a -# page z -# anchor on page -# -# text -# Z -# text -# A -# text -# sub one -# text -# sub two -# """
htmltitle
libs
First example with text surrounded by a red border. This example also has multiple lines.
text