Skip to content
This repository has been archived by the owner on Aug 26, 2022. It is now read-only.

Commit

Permalink
Bug 1509430: Kuma changes for the new version of KumaScript
Browse files Browse the repository at this point in the history
mdn/kumascript#1035 is a big patch that
rewrites and modernizes the KumaScript server. This is a smaller PR
that updates the Kuma repo for those changes.

The primary change is that the KumaScript server no longer supports
a GET endpoint for rendering documents, so Kuma now needs to send
document content to KumaScript for rendering with a POST request. This
change is primarily in wiki/kumascript.py, but it causes small
follow-on changes in a nubmer of other files as well.

The patch also includes some new useful scripts I wrote while testing.

And it changes docker-compose.yml to remove memcached which is no
longer required.
  • Loading branch information
David Flanagan committed Jan 9, 2019
1 parent dd08b4a commit 18d5d53
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 239 deletions.
22 changes: 4 additions & 18 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ services:
ports:
- "8001:8000"

memcached:
image: memcached

mysql:
image: mysql:5.6
environment:
Expand Down Expand Up @@ -95,22 +92,11 @@ services:
command: node run.js
depends_on:
- api
- memcached
environment:
- interactive_examples__base_url=${INTERACTIVE_EXAMPLES_BASE:-https://interactive-examples.mdn.mozilla.net}
- live_samples__base_url=http://${ATTACHMENT_HOST:-localhost:8000}
- log__console=true
- log__file=
- server__template_root_dir=macros
- server__document_url_template=http://api:8000/en-US/docs/{path}?raw=1
- server__memcache__server=memcached:11211
- server__template_class=EJSTemplate
- server__autorequire__mdn=MDN:Common
- server__autorequire__Page=DekiScript:Page
- server__autorequire__String=DekiScript:String
- server__autorequire__Uri=DekiScript:Uri
- server__autorequire__Web=DekiScript:Web
- server__autorequire__Wiki=DekiScript:Wiki
- DOCUMENT_URL_TEMPLATE=http://api:8000/en-US/docs/{path}?raw=1
- DOCUMENT_URL=http://api:8000
- INTERACTIVE_EXAMPLES_URL=${INTERACTIVE_EXAMPLES_BASE:-https://interactive-examples.mdn.mozilla.net}
- LIVE_SAMPLES_URL=http://${ATTACHMENT_HOST:-localhost:8000}
ports:
- "9080:9080"
volumes:
Expand Down
6 changes: 6 additions & 0 deletions kuma/scrape/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def session(self):
return self._session

def request(self, path, raise_for_status=True, method='GET'):
# When doing a long-running recursive scrape, you may find
# it useful to uncomment the following two lines. The sleep
# helps to avoid rate-limiting, and the print allows you to
# monitor the progress of your scrape.
# time.sleep(1)
# print path
url = self.base_url + path
logger.debug("%s %s", method, url)
attempts = 0
Expand Down
7 changes: 7 additions & 0 deletions kuma/wiki/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,13 @@ def clean_content(content):
else:
allowed_iframe_patterns = settings.ALLOWED_IFRAME_PATTERNS
filtered = parsed.filterIframeHosts(allowed_iframe_patterns)

# NOTE(djf): if you ever want to compare HTML renderings of a document
# (before and after a KumaScript or macro change, for example) you
# may need to add alphabetical_attributes=True to the serialize call
# below so that attributes are output in a deterministic order. This was
# a particular problem for <iframe> tags, presumably because of the
# iframe filtering above.
content_out = filtered.serialize()
return content_out

Expand Down
116 changes: 22 additions & 94 deletions kuma/wiki/kumascript.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import base64
import hashlib
import json
import time
import unicodedata
Expand All @@ -10,7 +9,6 @@
from constance import config
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.cache import cache
from django.utils.six.moves.urllib.parse import urljoin
from elasticsearch import TransportError

Expand Down Expand Up @@ -40,20 +38,18 @@ def should_use_rendered(doc, params, html=None):
(force_macros or (not no_macros and not show_raw)))


def post(request, content, locale=settings.LANGUAGE_CODE):
def _post(content, env_vars, timeout=config.KUMASCRIPT_TIMEOUT):
url = settings.KUMASCRIPT_URL_TEMPLATE.format(path='')
headers = {
'X-FireLogger': '1.2',
}
env_vars = {
'url': request.build_absolute_uri('/'),
'locale': locale,
}
add_env_headers(headers, env_vars)

response = requests.post(url,
timeout=config.KUMASCRIPT_TIMEOUT,
data=content.encode('utf8'),
headers=headers)
headers=headers,
timeout=timeout)

if response:
body = process_body(response)
errors = process_errors(response)
Expand All @@ -63,6 +59,13 @@ def post(request, content, locale=settings.LANGUAGE_CODE):
return content, errors


def post(request, content, locale=settings.LANGUAGE_CODE):
return _post(content, {
'url': request.build_absolute_uri('/'),
'locale': locale,
})


def _get_attachment_metadata_dict(attachment):
current_revision = attachment.current_revision
try:
Expand All @@ -80,43 +83,21 @@ def _get_attachment_metadata_dict(attachment):
}


def get(document, cache_control, base_url, timeout=None):
"""Perform a kumascript GET request for a document locale and slug."""
if not cache_control:
# Default to the configured max-age for cache control.
max_age = config.KUMASCRIPT_MAX_AGE
cache_control = 'max-age=%s' % max_age
# TODO(djf): This get() function is actually implemented on top of
# _post() and it performs an HTTP POST request. It should probably
# be renamed to render_document(), and the post() method above should
# be renamed to render_string(), maybe. For now, though, there are so
# many tests that mock kumascript.get() that I've left the name unchanged.
def get(document, base_url, timeout=config.KUMASCRIPT_TIMEOUT):
"""Request a rendered version of document.html from KumaScript."""

if not base_url:
site = Site.objects.get_current()
base_url = 'http://%s' % site.domain

if not timeout:
timeout = config.KUMASCRIPT_TIMEOUT

document_locale = document.locale
document_slug = document.slug
max_age = config.KUMASCRIPT_MAX_AGE

# 1063580 - Kumascript converts template name calls to lower case and bases
# caching keys off of that.
document_slug_for_kumascript = document_slug
body, errors = None, None

try:
url_tmpl = settings.KUMASCRIPT_URL_TEMPLATE
url = unicode(url_tmpl).format(path=u'%s/%s' %
(document_locale,
document_slug_for_kumascript))

cache_keys = build_cache_keys(document_slug, document_locale)
etag_key, modified_key, body_key, errors_key = cache_keys

headers = {
'X-FireLogger': '1.2',
'Cache-Control': cache_control,
}

# Create the file interface
files = []
for attachment in document.files.select_related('current_revision'):
Expand All @@ -127,6 +108,7 @@ def get(document, cache_control, base_url, timeout=None):
# http://developer.mindtouch.com/en/docs/DekiScript/Reference/
# Wiki_Functions_and_Variables
path = document.get_absolute_url()

# TODO: Someday merge with _get_document_for_json in views.py
# where most of this is duplicated code.
env_vars = dict(
Expand All @@ -142,51 +124,9 @@ def get(document, cache_control, base_url, timeout=None):
tags=list(document.tags.names()),
review_tags=list(document.current_revision.review_tags.names()),
modified=time.mktime(document.modified.timetuple()),
cache_control=cache_control,
)
add_env_headers(headers, env_vars)

# Set up for conditional GET, if we have the details cached.
cached_meta = cache.get_many([etag_key, modified_key])
if etag_key in cached_meta:
headers['If-None-Match'] = cached_meta[etag_key]
if modified_key in cached_meta:
headers['If-Modified-Since'] = cached_meta[modified_key]

# Finally, fire off the request.
response = requests.get(url, headers=headers, timeout=timeout)

if response.status_code == 304:
# Conditional GET was a pass, so use the cached content.
result = cache.get_many([body_key, errors_key])
body = result.get(body_key, '').decode('utf-8')
errors = result.get(errors_key, None)

elif response.status_code == 200:
body = process_body(response)
errors = process_errors(response)

# Cache the request for conditional GET, but use the max_age for
# the cache timeout here too.
headers = response.headers
cache.set(etag_key, headers.get('etag'), timeout=max_age)
cache.set(modified_key, headers.get('last-modified'), timeout=max_age)
cache.set(body_key, body.encode('utf-8'), timeout=max_age)
if errors:
cache.set(errors_key, errors, timeout=max_age)

elif response.status_code is None:
errors = KUMASCRIPT_TIMEOUT_ERROR

else:
errors = [
{
"level": "error",
"message": "Unexpected response from Kumascript service: %s" %
response.status_code,
"args": ["UnknownError"],
},
]

body, errors = _post(document.html, env_vars, timeout)

except Exception as exc:
# Last resort: Something went really haywire. Kumascript server died
Expand Down Expand Up @@ -256,18 +196,6 @@ def process_errors(response):
return errors


def build_cache_keys(document_locale, document_slug):
"""Build the cache keys used for Kumascript"""
path_hash = hashlib.md5((u'%s/%s' % (document_locale, document_slug))
.encode('utf8'))
base_key = 'kumascript:%s:%%s' % path_hash.hexdigest()
etag_key = base_key % 'etag'
modified_key = base_key % 'modified'
body_key = base_key % 'body'
errors_key = base_key % 'errors'
return (etag_key, modified_key, body_key, errors_key)


def macro_sources(force_lowercase_keys=False):
"""
Get active macros and their source paths.
Expand Down
2 changes: 1 addition & 1 deletion kuma/wiki/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def render(self, cache_control=None, base_url=None, timeout=None):
# A timeout of 0 should shortcircuit kumascript usage.
self.rendered_html, self.rendered_errors = self.html, []
else:
self.rendered_html, errors = kumascript.get(self, cache_control,
self.rendered_html, errors = kumascript.get(self,
base_url,
timeout=timeout)
self.rendered_errors = errors and json.dumps(errors) or None
Expand Down
2 changes: 1 addition & 1 deletion kuma/wiki/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def test_long_render_sets_deferred(self, mock_kumascript_get):
config.KUMASCRIPT_TIMEOUT = 1.0
rendered_content = self.rendered_content

def my_kumascript_get(self, cache_control, base_url, timeout):
def my_kumascript_get(self, base_url, timeout):
time.sleep(1.0)
return (rendered_content, None)

Expand Down
105 changes: 0 additions & 105 deletions kuma/wiki/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

from . import (create_document_tree, document, make_translation,
new_document_data, normalize_html, revision, WikiTestCase)
from .conftest import ks_toolbox
from ..content import get_seo_description
from ..events import EditDocumentEvent, EditDocumentInTreeEvent
from ..forms import MIDAIR_COLLISION
Expand Down Expand Up @@ -421,110 +420,6 @@ def test_raw_macros(self, mock_kumascript_get):
self.client.get('%s?raw&macros' % self.url, follow=False)
assert mock_kumascript_get.called, "kumascript should have been used"

@override_config(KUMASCRIPT_TIMEOUT=1.0, KUMASCRIPT_MAX_AGE=1234)
@requests_mock.mock()
def test_ua_max_age_zero(self, mock_requests):
"""
Authenticated users can request a zero max-age for kumascript
"""
mock_requests.get(
requests_mock.ANY,
[
dict(content='HELLO WORLD'),
ks_toolbox().macros_response,
]
)

self.client.get(self.url, follow=False, HTTP_CACHE_CONTROL='no-cache')
assert ('max-age=1234' ==
mock_requests.request_history[0].headers['Cache-Control'])

self.client.login(username='admin', password='testpass')
self.client.get(self.url, follow=False,
HTTP_CACHE_CONTROL='no-cache')
assert ('no-cache' ==
mock_requests.request_history[1].headers['Cache-Control'])

@override_config(KUMASCRIPT_TIMEOUT=1.0, KUMASCRIPT_MAX_AGE=1234)
@requests_mock.mock()
def test_ua_no_cache(self, mock_requests):
"""
Authenticated users can request no-cache for kumascript
"""
mock_requests.get(
requests_mock.ANY,
[
dict(content='HELLO WORLD'),
ks_toolbox().macros_response,
]
)

self.client.get(self.url, follow=False, HTTP_CACHE_CONTROL='no-cache')
assert ('max-age=1234' ==
mock_requests.request_history[0].headers['Cache-Control'])

self.client.login(username='admin', password='testpass')
self.client.get(self.url, follow=False, HTTP_CACHE_CONTROL='no-cache')
assert ('no-cache' ==
mock_requests.request_history[1].headers['Cache-Control'])

@override_config(KUMASCRIPT_TIMEOUT=1.0, KUMASCRIPT_MAX_AGE=1234)
@requests_mock.mock()
def test_conditional_get(self, mock_requests):
"""
Ensure conditional GET in requests to kumascript work as expected
"""
expected_etag = "8675309JENNY"
expected_modified = "Wed, 14 Mar 2012 22:29:17 GMT"
expected_content = "HELLO THERE, WORLD"

mock_requests.get(
requests_mock.ANY, [
{
'content': expected_content,
'headers': {
'etag': expected_etag,
'last-modified': expected_modified,
'age': '456',
}
},
{
'content': expected_content,
'headers': {
'etag': expected_etag,
'last-modified': expected_modified,
'age': '456',
},
},
{
'content': expected_content,
'status_code': 304,
'headers': {
'etag': expected_etag,
'last-modified': expected_modified,
'age': '123',
},
}
]
)
# First request to let the view cache etag / last-modified
self.client.get(self.url)

# Clear rendered_html to force another request.
self.doc.rendered_html = ''
self.doc.save()

# Second request to verify the view sends them back
response = self.client.get(self.url)
assert (expected_etag ==
mock_requests.request_history[1].headers['If-None-Match'])
assert (expected_modified ==
mock_requests.request_history[1].headers['If-Modified-Since'])

# Third request to verify content was cached and served on a 304
response = self.client.get(self.url)
assert expected_content in response.content

@override_config(KUMASCRIPT_TIMEOUT=1.0, KUMASCRIPT_MAX_AGE=600)
@requests_mock.mock()
def test_preview_nonascii(self, mock_requests):
Expand Down
Loading

0 comments on commit 18d5d53

Please sign in to comment.