From dc4c6fb5b4d7ee69538d34d36a9d510ee2c1cd45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Krier?= Date: Sun, 13 Aug 2023 23:34:13 +0200 Subject: [PATCH] Add sitemap index and serve sitemap for subprojects The sitemap index lists all the sitemap locations for a project including the subprojects. The sitemap of subprojects are also served from the parent domain. Closes #6841 --- docs/user/reference/sitemaps.rst | 8 +++- readthedocs/proxito/tests/test_full.py | 45 ++++++++++++++++++ readthedocs/proxito/urls.py | 11 ++++- readthedocs/proxito/views/serve.py | 61 ++++++++++++++++++++++++- readthedocs/templates/sitemap_index.xml | 8 ++++ 5 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 readthedocs/templates/sitemap_index.xml diff --git a/docs/user/reference/sitemaps.rst b/docs/user/reference/sitemaps.rst index 49483613a92..46970321391 100644 --- a/docs/user/reference/sitemaps.rst +++ b/docs/user/reference/sitemaps.rst @@ -16,12 +16,18 @@ It contains information such as: * How important this URL is in relation to other URLs in the site. * What translations are available for a page. -Read the Docs automatically generates a ``sitemap.xml`` for your project, +Read the Docs automatically generates a ``sitemap.xml`` and a +``sitemap_index.xml`` for your project, By default the sitemap includes: * Each version of your documentation and when it was last updated, sorted by version number. +By default the sitemap index includes: + +* The location of ``sitemap.xml`` +* The locations of the ``sitemap.xml`` of subprojects if they are set. + This allows search engines to prioritize results based on the version number, sorted by `semantic versioning`_. diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 56976a00d7b..59753b1bd90 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -1504,6 +1504,51 @@ def test_sitemap_all_private_versions(self): ) self.assertEqual(response.status_code, 404) + def test_sitemap_subproject(self): + self.project.versions.update(active=True) + self.subproject.versions.update(active=True) + + subresponse = self.client.get( + reverse("sitemap_xml", args=["subproject"]), + headers={"host": "project.readthedocs.io"}, + ) + response = self.client.get( + reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"} + ) + + self.assertEqual(subresponse.status_code, 200) + self.assertEqual(response.status_code, 200) + self.assertEqual(subresponse.content, response.content) + + def test_sitemap_index(self): + self.project.versions.update(active=True) + response = self.client.get( + reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"} + ) + self.assertEqual(response.status_code, 200) + self.assertEqual(response["Content-Type"], "application/xml") + expected = dedent( + """ + + + + + https://project.readthedocs.io/sitemap.xml + + + + https://project.readthedocs.io/projects/subproject/sitemap.xml + + + + https://project.readthedocs.io/projects/subproject-alias/sitemap.xml + + + + """ + ).lstrip() + self.assertEqual(response.content.decode(), expected) + @mock.patch( "readthedocs.proxito.views.mixins.staticfiles_storage", new=StaticFileSystemStorageTest(), diff --git a/readthedocs/proxito/urls.py b/readthedocs/proxito/urls.py index 7177a29c0e4..f0073f5d89c 100644 --- a/readthedocs/proxito/urls.py +++ b/readthedocs/proxito/urls.py @@ -46,6 +46,7 @@ ServeError404, ServePageRedirect, ServeRobotsTXT, + ServeSitemapIndexXML, ServeSitemapXML, ServeStaticFiles, ) @@ -136,7 +137,15 @@ name="proxito_404_handler", ), re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"), - re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"), + re_path( + r"^(?:projects/(?P{project_slug})/)?" + r"sitemap\.xml$".format(**pattern_opts), + ServeSitemapXML.as_view(), + name="sitemap_xml", + ), + re_path( + r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml" + ), ] docs_urls = [ diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 088e96ee9db..3fe7b35e149 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -1019,7 +1019,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View): # Extra cache tag to invalidate only this view if needed. project_cache_tag = "sitemap.xml" - def get(self, request): + def get(self, request, subproject_slug=None): """ Generate and serve a ``sitemap.xml`` for a particular ``project``. @@ -1078,6 +1078,12 @@ def changefreqs_generator(): yield from itertools.chain(changefreqs, itertools.repeat('monthly')) project = request.unresolved_domain.project + + if subproject_slug: + project = get_object_or_404( + project.subprojects, alias=subproject_slug + ).child + public_versions = Version.internal.public( project=project, only_active=True, @@ -1164,6 +1170,59 @@ class ServeSitemapXML(SettingsOverrideObject): _default_class = ServeSitemapXMLBase +class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View): + + """Serve sitemap_index.xml from the domain's root.""" + + cache_response = True + project_cache_tag = "sitemap.xml" + + def get(self, request): + """ + Generate and serve a ``sitemap_index.xml`` for a ``project``. + + The sitemap index is generated from the project and all sub-projects. + """ + + project = request.unresolved_domain.project + + locations = [ + "{scheme}://{domain}/sitemap.xml".format( + scheme="https", + domain=project.subdomain(), + ) + ] + for subproject in project.related_projects.all(): + locations.append( + "{scheme}://{domain}/projects/{subproject}/sitemap.xml".format( + scheme="https", + domain=project.subdomain(), + subproject=subproject.slug, + ) + ) + context = { + "locations": locations, + } + return render( + request, + "sitemap_index.xml", + context, + content_type="application/xml", + ) + + def _get_project(self): + # Method used by the CDNCacheTagsMixin class. + return self.request.unresolved_domain.project + + def _get_version(self): + # This view isn't attached to a version. + return None + + +class ServeSitemapIndexXML(SettingsOverrideObject): + _default_class = ServeSitemapIndexXMLBase + + class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View): """ diff --git a/readthedocs/templates/sitemap_index.xml b/readthedocs/templates/sitemap_index.xml new file mode 100644 index 00000000000..c248d2820fa --- /dev/null +++ b/readthedocs/templates/sitemap_index.xml @@ -0,0 +1,8 @@ + + +{% for loc in locations %} + + {{ loc }} + +{% endfor %} +